New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pattern Match fails for specific length string #8494
Comments
From erik@cloudshield.comCreated by erik@cloudshield.com*NOTE*NOTE* in spite of using the perlbug installed from an RPM, The following script produces an unexpected result. Changing the - -cut-cut-cut-cut-cut-cut-cut-cut- # The printing characters # \376 also works, I haven't tried other values # add a +1 or -1 (or change to any other value) to make this succeed. my $test = ''; # create some random junk. Inefficient, but it works. $test .= ($delim x 4); print "Should be empty: $test\n\n"; print "Should be 0: ", length($test), "\n"; - -cut-cut-cut-cut-cut-cut-cut-cut- Perl Info
|
From @demerphqOn 6/23/06, via RT Erik R. Ogan <perlbug-followup@perl.org> wrote:
Attached patch fixes the problem. We should probably convert the sample code to a test. Attached is a Sorry i dont have time to do the test part right now. cheers, -- |
From @demerphq |
From @demerphqsmoke_re_fail.patch--- 28413\regexec.c 2006-06-18 12:01:21.000000000 +0200
+++ 28413_\regexec.c 2006-06-23 11:28:51.951743600 +0200
@@ -4396,9 +4396,10 @@
sayNO;
}
/* PL_reginput == locinput now */
+ PL_reginput = locinput; /* Could be reset... */
TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
/*** all unsaved local vars undefined at this point */
- PL_reginput = locinput; /* Could be reset... */
+
REGCP_UNWIND(st->u.plus.lastcp);
/* Couldn't or didn't -- move forward. */
st->u.plus.old = locinput;
|
The RT System itself - Status changed from 'new' to 'open' |
From @rgsdemerphq wrote:
Thanks, applied as change #28417.
|
@rgs - Status changed from 'open' to 'resolved' |
From @demerphqOn 6/23/06, Rafael Garcia-Suarez <rgarciasuarez@mandriva.com> wrote:
Patch to add a test for this bug is attached. Cheers, -- |
From @demerphqpat.t.28460.patchdiff -wurd 28460/t\op\pat.t 28460_/t\op\pat.t
--- 28460/t\op\pat.t 2006-06-13 21:29:32.000000000 +0200
+++ 28460_/t\op\pat.t 2006-07-02 15:00:53.593750000 +0200
@@ -6,7 +6,8 @@
$| = 1;
-print "1..1208\n";
+# please update note at bottom of file when you change this
+print "1..1211\n";
BEGIN {
chdir 't' if -d 't';
@@ -3514,10 +3515,35 @@
ok($s eq "\x{ffff}", "U+FFFF, NBOUND");
} # non-characters end
+{
+ # https://rt.perl.org/rt3/Ticket/Display.html?id=39583
+
+ # The printing characters
+ my @chars = ("A".."Z");
+ my $delim = ",";
+ my $size = 32771 - 4;
+ my $test = '';
+
+ # create some random junk. Inefficient, but it works.
+ for ($i = 0 ; $i < $size ; $i++) {
+ $test .= $chars[int(rand(@chars))];
+ }
+
+ $test .= ($delim x 4);
+ my $res;
+ my $matched;
+ if ($test =~ s/^(.*?)${delim}{4}//s) {
+ $res = $1;
+ $matched=1;
+ }
+ ok($matched,'pattern matches');
+ ok(length($test)==0,"Empty string");
+ ok(defined($res) && length($res)==$size,"\$1 is correct size");
+}
# Keep the following test last -- it may crash perl
ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274")
or print "# Unexpected outcome: should pass or crash perl\n";
-# last test 1200
+# last test 1211
|
From @iabynOn Sun, Jul 02, 2006 at 03:13:20PM +0200, demerphq wrote:
thanks, applied as change #28462. -- |
From @demerphqOn 7/2/06, Dave Mitchell <davem@iabyn.com> wrote:
Thanks. Attached is a patch to resolve the issue of escaped chars in the Cheers, |
From @demerphqregexec.c.28461.patchdiff -wurd 28461/regexec.c 28461_/regexec.c
--- 28461/regexec.c 2006-06-30 14:11:28.000000000 +0200
+++ 28461_/regexec.c 2006-07-02 22:54:37.115620200 +0200
@@ -2617,6 +2617,17 @@
#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
#ifdef DEBUGGING
+
+#define re_safe_print(len0,s0) STMT_START { \
+ for ( ch=s0, p=0 ; p < len0 ; p++, ch++ ) {\
+ if ( isCNTRL(*ch) ) { \
+ PerlIO_printf(Perl_debug_log,"\\%o",*ch);\
+ } else { \
+ PerlIO_printf(Perl_debug_log,"%c",*ch);\
+ }\
+ } \
+} STMT_END
+
STATIC void
S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const bool do_utf8)
{
@@ -2646,23 +2657,24 @@
if (pref0_len > pref_len)
pref0_len = pref_len;
{
- const char * const s0 =
- do_utf8 && OP(scan) != CANY ?
+ const int is_uni= (do_utf8 && OP(scan) != CANY) ? 1 : 0;
+ const char * const s0 = is_uni ?
pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len;
const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
- const char * const s1 = do_utf8 && OP(scan) != CANY ?
+ const char * const s1 = is_uni ?
pv_uni_display(PERL_DEBUG_PAD(1),
(U8*)(locinput - pref_len + pref0_len),
pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len + pref0_len;
const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
- const char * const s2 = do_utf8 && OP(scan) != CANY ?
+ const char * const s2 = is_uni ?
pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
locinput;
const int len2 = do_utf8 ? (int)strlen(s2) : l;
+ if (is_uni) {
PerlIO_printf(Perl_debug_log,
"%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|",
(IV)(locinput - PL_bostr),
@@ -2678,8 +2690,38 @@
PL_colors[1],
15 - l - pref_len + 1,
"");
+ } else {
+
+ int p=0;
+ char *ch;
+ PerlIO_printf(Perl_debug_log,
+ "%4"IVdf" <%s",
+ (IV)(locinput - PL_bostr),
+ PL_colors[4]);
+ re_safe_print(len0,s0);
+ PerlIO_printf(Perl_debug_log,
+ "%s%s",
+ PL_colors[5],
+ PL_colors[2]);
+ re_safe_print(len1,s1);
+ PerlIO_printf(Perl_debug_log,
+ "%s%s%s",
+ PL_colors[3],
+ (docolor ? "" : "> <"),
+ PL_colors[0]);
+ re_safe_print(len2,s2);
+ PerlIO_printf(Perl_debug_log,
+ "%s>%*s|",
+ PL_colors[1],
+ 15 - l - pref_len + 1,
+ "");
+ }
+
}
}
+
+#undef re_safe_print
+
#endif
STATIC I32 /* 0 failure, 1 success */
|
From @rgsdemerphq wrote:
If I've read my backlog correctly, using pv_display might be a better -- |
From @demerphqOn 7/3/06, Rafael Garcia-Suarez <rgarciasuarez@mandriva.com> wrote:
Actually, as the Germans would say "jein". :-) pv_display contains very similar code, but it has the annoyance that But until then IMO the patch to regexec.c should go in as it does no Yves -- |
From @demerphqOn 7/3/06, demerphq <demerphq@gmail.com> wrote:
And here it is. (Requires regen.pl) Cheers, -- |
From @demerphqpv_escape.patchD:\dev\perl\ver>diff -wurd 28461/dump.c 28461_/dump.c
--- 28461/dump.c 2006-06-13 21:28:58.000000000 +0200
+++ 28461_/dump.c 2006-07-05 20:25:11.771870200 +0200
@@ -119,40 +119,122 @@
op_dump(PL_eval_root);
}
+
+/*
+=for apidoc Apd|char*|pv_escape|NN SV *dsv|NN const char *pv|const STRLEN count|const STRLEN max|const U32 flags
+
+Escapes at most the first count chars of pv and put the results into
+buf such that the size of the escaped string will not exceed max chars
+and will not contain any incomplete escape sequences.
+
+If flags & PERL_PV_ESCAPE_QUOTE then the string will have quotes
+placed around it, additionally if the number of chars converted was
+less than count then a trailing elipses (...) will be added after the
+closing quote.
+
+If PERL_PV_ESCAPE_QUOTE is not set, but PERL_PV_ESCAPE_PADR is
+then the returned string will be right padded with spaces such that it
+is max chars long.
+
+Normally the SV will be cleared before the escaped string is prepared
+but when PERL_PV_ESCAPE_CAT is set this will not occur.
+
+Returns a pointer to the string contained by SV
+
+=cut
+*/
+
char *
-Perl_pv_display(pTHX_ SV *dsv, const char *pv, STRLEN cur, STRLEN len, STRLEN pvlim)
-{
- const bool nul_terminated = len > cur && pv[cur] == '\0';
- bool truncated = 0;
+Perl_pv_escape( pTHX_ SV *dsv, const char *pv, const STRLEN count, const STRLEN max, const U32 flags ) {
+ char dq= (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : '\\';
+ char octbuf[8] = "\\0123456";
+ STRLEN wrote= 0;
+ STRLEN chsize= 0;
+ const char *end= pv + count;
+ if (flags & PERL_PV_ESCAPE_CAT) {
+ if ( dq == '"' )
+ sv_catpvn(dsv, "\"", 1);
+ } else {
+ if ( dq == '"' )
sv_setpvn(dsv, "\"", 1);
- for (; cur--; pv++) {
- if (pvlim && SvCUR(dsv) >= pvlim) {
- truncated = 1;
- break;
+ else
+ sv_setpvn(dsv, "", 0);
}
+ for ( ; (pv < end && (!max || (wrote < max))) ; pv++ ) {
+ if ( (*pv == dq) || (*pv == '\\') || isCNTRL(*pv) ) {
+ chsize= 2;
switch (*pv) {
- case '\t': sv_catpvs(dsv, "\\t"); break;
- case '\n': sv_catpvs(dsv, "\\n"); break;
- case '\r': sv_catpvs(dsv, "\\r"); break;
- case '\f': sv_catpvs(dsv, "\\f"); break;
- case '"': sv_catpvs(dsv, "\\\""); break;
- case '\\': sv_catpvs(dsv, "\\\\"); break;
+ case '\\' : octbuf[1]= '\\'; break;
+ case '\v' : octbuf[1]= 'v'; break;
+ case '\t' : octbuf[1]= 't'; break;
+ case '\r' : octbuf[1]= 'r'; break;
+ case '\n' : octbuf[1]= 'n'; break;
+ case '\f' : octbuf[1]= 'f'; break;
+ case '"' : if ( dq == *pv ) {
+ octbuf[1]= '"';
+ break;
+ }
default:
- if (isPRINT(*pv))
- sv_catpvn(dsv, pv, 1);
- else if (cur && isDIGIT(*(pv+1)))
- Perl_sv_catpvf(aTHX_ dsv, "\\%03o", (U8)*pv);
+ /*note the (U8*) casts here are important.
+ if they are omitted we can produce the octal
+ for a negative number which could produce a
+ buffer overrun in octbuf, with it on we are
+ guaranteed that the longest the string could be
+ is 5, (we reserve 8 just because its the first
+ power of 2 larger than 5.)*/
+ if ( (pv < end) && isDIGIT(*(pv+1)) )
+ chsize= sprintf( octbuf, "\\%03o", (U8)*pv);
else
- Perl_sv_catpvf(aTHX_ dsv, "\\%o", (U8)*pv);
+ chsize= sprintf( octbuf, "\\%o", (U8)*pv);
}
+ if ( max && (wrote + chsize > max) ) {
+ break;
+ } else {
+ sv_catpvn(dsv, octbuf, chsize);
+ wrote += chsize;
+ }
+ } else {
+ sv_catpvn(dsv, pv, 1);
+ wrote++;
}
- sv_catpvs(dsv, "\"");
- if (truncated)
- sv_catpvs(dsv, "...");
- if (nul_terminated)
- sv_catpvs(dsv, "\\0");
+ }
+ if ( dq == '"' ) {
+ sv_catpvn( dsv, "\"", 1 );
+ if ( pv < end )
+ sv_catpvn( dsv, "...", 3 );
+ } else if ( max && (flags & PERL_PV_ESCAPE_PADR) ) {
+ for ( ; wrote < max ; wrote++ )
+ sv_catpvn( dsv, " ", 1 );
+ }
+ return SvPVX(dsv);
+}
+
+/*
+=for apidoc pv_display
+
+ char *pv_display(SV *dsv, const char *pv, STRLEN cur, STRLEN len,
+ STRLEN pvlim, U32 flags)
+
+Similar to
+
+ pv_escape(dsv,pv,cur,pvlim,PERL_PV_ESCAPE_QUOTE);
+
+except that an additional "\0" will be appended to the string when
+len > cur and pv[cur] is "\0".
+
+Note that the final string may be up to 7 chars longer than pvlim.
+
+=cut
+*/
+
+char *
+Perl_pv_display(pTHX_ SV *dsv, const char *pv, STRLEN cur, STRLEN len, STRLEN pvlim)
+{
+ pv_escape( dsv, pv, cur, pvlim, PERL_PV_ESCAPE_QUOTE);
+ if (len > cur && pv[cur] == '\0')
+ sv_catpvn( dsv, "\\0", 2 );
return SvPVX(dsv);
}
D:\dev\perl\ver>diff -wurd 28461/embed.fnc 28461_/embed.fnc
--- 28461/embed.fnc 2006-06-23 18:29:32.000000000 +0200
+++ 28461_/embed.fnc 2006-07-05 10:21:49.724995200 +0200
@@ -980,8 +980,10 @@
Apd |void |sv_setsv_mg |NN SV *dstr|NULLOK SV *sstr
Apdbm |void |sv_usepvn_mg |NN SV *sv|NULLOK char *ptr|STRLEN len
ApR |MGVTBL*|get_vtbl |int vtbl_id
-Ap |char* |pv_display |NN SV *dsv|NN const char *pv|STRLEN cur|STRLEN len \
+Apd |char* |pv_display |NN SV *dsv|NN const char *pv|STRLEN cur|STRLEN len \
|STRLEN pvlim
+Apd |char* |pv_escape |NN SV *dsv|NN const char *pv|const STRLEN count \
+ |const STRLEN max|const U32 flags
Afp |void |dump_indent |I32 level|NN PerlIO *file|NN const char* pat|...
Ap |void |dump_vindent |I32 level|NN PerlIO *file|NN const char* pat \
|NULLOK va_list *args
D:\dev\perl\ver>diff -wurd 28461/perl.h 28461_/perl.h
--- 28461/perl.h 2006-06-30 15:30:31.000000000 +0200
+++ 28461_/perl.h 2006-07-05 20:13:35.896870200 +0200
@@ -5628,5 +5628,13 @@
so that Configure picks them up. */
+/* these are used by Perl_pv_escape() and are here so that they
+ are available throughout the core */
+
+#define PERL_PV_ESCAPE_QUOTE 1
+#define PERL_PV_ESCAPE_PADR 2
+#define PERL_PV_ESCAPE_CAT 4
+
+
#endif /* Include guard */
D:\dev\perl\ver>diff -wurd 28461/regexec.c 28461_/regexec.c
--- 28461/regexec.c 2006-06-30 14:11:28.000000000 +0200
+++ 28461_/regexec.c 2006-07-05 20:08:25.756245200 +0200
@@ -2617,6 +2617,7 @@
#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
#ifdef DEBUGGING
+
STATIC void
S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const bool do_utf8)
{
@@ -2646,23 +2647,29 @@
if (pref0_len > pref_len)
pref0_len = pref_len;
{
- const char * const s0 =
- do_utf8 && OP(scan) != CANY ?
+ const int is_uni= (do_utf8 && OP(scan) != CANY) ? 1 : 0;
+ const char * const s0 = is_uni ?
pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
pref0_len, 60, UNI_DISPLAY_REGEX) :
- locinput - pref_len;
- const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
- const char * const s1 = do_utf8 && OP(scan) != CANY ?
+ pv_escape( PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
+ pref0_len, 60, 0);
+
+ const int len0 = strlen(s0);
+ const char * const s1 = is_uni ?
pv_uni_display(PERL_DEBUG_PAD(1),
(U8*)(locinput - pref_len + pref0_len),
pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
- locinput - pref_len + pref0_len;
- const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
- const char * const s2 = do_utf8 && OP(scan) != CANY ?
+ pv_escape( PERL_DEBUG_PAD(1),
+ (U8*)(locinput - pref_len + pref0_len),
+ pref_len - pref0_len, 60, 0 );
+
+ const int len1 = (int)strlen(s1);
+ const char * const s2 = is_uni ?
pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
- locinput;
- const int len2 = do_utf8 ? (int)strlen(s2) : l;
+ pv_escape( PERL_DEBUG_PAD(2), (U8*)locinput,
+ PL_regeol - locinput, 60, 0 );
+ const int len2 = (int)strlen(s2);
PerlIO_printf(Perl_debug_log,
"%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|",
(IV)(locinput - PL_bostr),
@@ -2680,6 +2687,7 @@
"");
}
}
+
#endif
STATIC I32 /* 0 failure, 1 success */
D:\dev\perl\ver>diff -wurd 28461/t/lib/warnings/9uninit 28461_/t/lib/warnings/9uninit
--- 28461/t/lib/warnings/9uninit 2006-06-13 21:29:31.000000000 +0200
+++ 28461_/t/lib/warnings/9uninit 2006-07-05 10:52:54.974995200 +0200
@@ -1017,7 +1017,7 @@
my %h = ("\0011\002\r\n\t\f\"\\abcdefghijklmnopqrstuvwxyz", undef);
$v = join '', %h;
EXPECT
-Use of uninitialized value $h{"\0011\2\r\n\t\f\"\\abcdefghijkl"...} in join or string at - line 6.
+Use of uninitialized value $h{"\0011\2\r\n\t\f\"\\abcdefghijklm"...} in join or string at - line 6.
########
use warnings 'uninitialized';
my ($m1, $v);
|
From @rgsdemerphq wrote:
Thanks, applied as change #28490 (with a bit of reindentation) |
Migrated from rt.perl.org#39583 (status was 'resolved')
Searchable as RT39583$
The text was updated successfully, but these errors were encountered: