New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Regular Expressions Not Unsetting $1 Vars When Backtracking #2161
Comments
From mikelambert@home.comCreated by mikelambert@home.comWhen perl matches using (), and then backtracks over that code, it does not Here are some test cases which produce incorrect results: The execution of this is pretty simple, but the (b)? is the part that is This behavior is stemming from the fact that when the ? is outside of the "ac" =~ /^(a)(b)?(c)$/ It's not unsetting $2 when the (b)? match fails, but rather the undef of $2 This bug can manifest itself in more complex scenarios where a failed Two ways to fix this, are to unset $1 vars upon backtracking if the The other is to set all failed attempts at remembering data into $1 vars to I've successfully duplicated the bug in perl 5.005_03 on unix, and on perl Please let me know the status of this bug, and what develops. Thank you, Perl Info
|
From @vanstynIn <000701bfe384$ee4ee7c0$6701a8c0@mongo>, "Mike Lambert" writes: I can confirm that this incorrectly leaves $2 set to "b" in 5.005 and Hugo |
From @vanstynIn <000701bfe384$ee4ee7c0$6701a8c0@mongo>, "Mike Lambert" writes: The patch below fixes it. All tests (including the new one) pass here. Hugo Inline Patch--- regexec.c.old Mon May 8 23:30:51 2000
+++ regexec.c Tue Jul 11 12:36:38 2000
@@ -2964,6 +2964,8 @@
else
c1 = c2 = -1000;
PL_reginput = locinput;
+ if (paren)
+ PL_regendp[paren] = -1;
if (minmod) {
CHECKPOINT lastcp;
minmod = 0;
--- t/op/re_tests.old Fri Jan 7 04:16:23 2000
+++ t/op/re_tests Tue Jul 11 12:30:34 2000
@@ -750,3 +750,4 @@
^([a-z]:) C:/ n - -
'^\S\s+aa$'m \nx aa y - -
(^|a)b ab y - -
+^([ab]*?)(b)?(c)$ abac y -$2- -- |
From @vanstynIn <200007111144.MAA04446@crypt.compulink.co.uk>, I write: Here's the parallel patch for 5.005_03. Hugo Inline Patch--- regexec.c.old Sat Mar 27 17:56:09 1999
+++ regexec.c Tue Jul 11 12:54:25 2000
@@ -1484,6 +1484,8 @@
else
c1 = c2 = -1000;
PL_reginput = locinput;
+ if (paren)
+ PL_regendp[paren] = NULL;
if (minmod) {
CHECKPOINT lastcp;
minmod = 0;
--- t/op/re_tests.old Fri Oct 30 01:28:52 1998
+++ t/op/re_tests Tue Jul 11 12:56:04 2000
@@ -489,3 +489,4 @@
(^|x)(c) ca y $2 c
a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz x n - -
round\(((?>[^()]+))\) _I(round(xs * sz),1) y $1 xs * sz
+^([ab]*?)(b)?(c)$ abac y -$2- -- |
From [Unknown Contact. See original ticket]Hugo wrote:
Great! But I was hoping it would fix this too: $ ./perl -e 'print "not ok:$1:$2\n" if "abcab" =~ /(\w)?(abc)\1b/' Should I file a separate bug report? |
From @vanstynIn <396E86B2.31C0AE48@home.com>, Rick Delaney writes: No, I did it wrong. The backreference needs to be invalidated deeper I'd welcome patches to add more comprehensive tests of this type of Hugo Inline Patch--- regexec.c.old Mon May 8 23:30:51 2000
+++ regexec.c Fri Jul 14 04:06:49 2000
@@ -221,6 +221,22 @@
#define regcpblow(cp) LEAVE_SCOPE(cp)
+#define TRYPAREN(paren, n, input) { \
+ if (paren) { \
+ if (n) { \
+ PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
+ PL_regendp[paren] = input - PL_bostr; \
+ } \
+ else \
+ PL_regendp[paren] = -1; \
+ } \
+ if (regmatch(next)) \
+ sayYES; \
+ if (paren && n) \
+ PL_regendp[paren] = -1; \
+}
+
+
/*
* pregexec and friends
*/
@@ -2998,16 +3014,7 @@
sayNO;
}
/* PL_reginput == locinput now */
- if (paren) {
- if (ln) {
- PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
- PL_regendp[paren] = locinput - PL_bostr;
- }
- else
- PL_regendp[paren] = -1;
- }
- if (regmatch(next))
- sayYES;
+ TRYPAREN(paren, ln, locinput);
PL_reginput = locinput; /* Could be reset... */
REGCP_UNWIND;
/* Couldn't or didn't -- move forward. */
@@ -3021,16 +3028,7 @@
UCHARAT(PL_reginput) == c1 ||
UCHARAT(PL_reginput) == c2)
{
- if (paren) {
- if (n) {
- PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
- PL_regendp[paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[paren] = -1;
- }
- if (regmatch(next))
- sayYES;
+ TRYPAREN(paren, n, PL_reginput);
REGCP_UNWIND;
}
/* Couldn't or didn't -- move forward. */
@@ -3064,16 +3062,7 @@
UCHARAT(PL_reginput) == c1 ||
UCHARAT(PL_reginput) == c2)
{
- if (paren && n) {
- if (n) {
- PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
- PL_regendp[paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[paren] = -1;
- }
- if (regmatch(next))
- sayYES;
+ TRYPAREN(paren, n, PL_reginput);
REGCP_UNWIND;
}
/* Couldn't or didn't -- back up. */
@@ -3088,8 +3077,7 @@
UCHARAT(PL_reginput) == c1 ||
UCHARAT(PL_reginput) == c2)
{
- if (regmatch(next))
- sayYES;
+ TRYPAREN(paren, n, PL_reginput);
REGCP_UNWIND;
}
/* Couldn't or didn't -- back up. */
--- t/op/re_tests.old Fri Jul 14 04:14:36 2000
+++ t/op/re_tests Fri Jul 14 03:47:13 2000
@@ -750,3 +750,5 @@
^([a-z]:) C:/ n - -
'^\S\s+aa$'m \nx aa y - -
(^|a)b ab y - -
+^([ab]*?)(b)?(c)$ abac y -$2- --
+(\w)?(abc)\1b abcab n - - |
From @jhi
Thanks, I replaced change #4337 (the first version of this patch) |
Migrated from rt.perl.org#3455 (status was 'resolved')
Searchable as RT3455$
The text was updated successfully, but these errors were encountered: