New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Patch for interpolating variables and block results into regexes #1637
Comments
From bruce@drangle.comThe attached patch adds support for variable and block-result It does so by means of a new PAST::Regex node pasttype 'interpolator'. / $var / -- Interpolates as literal string, unless it's a Regex object |
From bruce@drangle.com0001-Variable-interpolation-in-regexes.patchFrom 29c116e35d95e29b211d396a0b48137172ae0f28 Mon Sep 17 00:00:00 2001
From: Bruce Keeler <bruce@drangle.com>
Date: Sun, 28 Mar 2010 21:41:53 -0700
Subject: [PATCH] Variable interpolation in regexes
---
build/Makefile.in | 1 +
src/Perl6/Actions.pm | 64 ++++++++++---
src/Perl6/Grammar.pm | 20 ++++
src/cheats/parrot/past-compiler-regex.pir | 153 +++++++++++++++++++++++++++++
t/spectest.data | 1 +
5 files changed, 226 insertions(+), 13 deletions(-)
create mode 100644 src/cheats/parrot/past-compiler-regex.pir
diff --git a/build/Makefile.in b/build/Makefile.in
index 743a81a..f9431bd 100644
--- a/build/Makefile.in
+++ b/build/Makefile.in
@@ -154,6 +154,7 @@ CHEATS_PIR = \
src/cheats/want-caller-side-callsig.pir \
src/cheats/parrot-role-quirks.pir \
src/cheats/parrot/attriter.pir \
+ src/cheats/parrot/past-compiler-regex.pir \
src/cheats/parrot/Protoobject.pir \
src/cheats/parrot/P6role.pir \
src/gen/uprop-cheats.pir \
diff --git a/src/Perl6/Actions.pm b/src/Perl6/Actions.pm
index 97fd6e5..197d8db 100644
--- a/src/Perl6/Actions.pm
+++ b/src/Perl6/Actions.pm
@@ -2358,10 +2358,51 @@ class Perl6::RegexActions is Regex::P6Regex::Actions {
my $past := $<statement>.ast;
make PAST::Regex.new( $past, :pasttype('pastnode') );
}
+ method metachar:sym<var>($/) {
+ my $past;
+ my $name := $<pos> ?? +$<pos> !! ~$<name>;
+ if $<quantified_atom> {
+ if $<var> {
+ $/.CURSOR.panic('"$var = " syntax not yet supported in regexes');
+ }
+ $past := $<quantified_atom>[0].ast;
+ if $past.pasttype eq 'quant' && $past[0].pasttype eq 'subrule' {
+ Regex::P6Regex::Actions::subrule_alias($past[0], $name);
+ }
+ elsif $past.pasttype eq 'subrule' { Regex::P6Regex::Actions::subrule_alias($past, $name); }
+ else {
+ $past := PAST::Regex.new( $past, :name($name), :pasttype('subcapture'), :node($/) );
+ }
+ }
+ else {
+ if $<var> {
+ my @MODIFIERS := Q:PIR {
+ %r = get_hll_global ['Regex';'P6Regex';'Actions'], '@MODIFIERS'
+ };
+ my $subtype := @MODIFIERS[0]<i> ?? 'literal_i' !! 'literal';
+ $past := PAST::Regex.new( $<var>.ast, :pasttype('interpolator'),
+ :subtype($subtype), :node($/) );
+ } else {
+ $past := PAST::Regex.new( '!BACKREF', $name, :pasttype('subrule'),
+ :subtype('method'), :node($/) );
+ }
+ }
+ make $past;
+ }
- method metachar:sym<{ }>($/) { make $<codeblock>.ast; }
+ method assertion:sym<var>($/) {
+ make PAST::Regex.new( $<var>.ast, :pasttype('interpolator'),
+ :subtype('compile_regex'), :node($/) );
+ }
+
+ method metachar:sym<{ }>($/) {
+ make PAST::Regex.new(:node($/), :pasttype('pastnode'), $<codeblock>.ast);
+ }
- method assertion:sym<{ }>($/) { make $<codeblock>.ast; }
+ method assertion:sym<{ }>($/) {
+ make PAST::Regex.new( :node($/), :pasttype('interpolator'), :subtype('compile_regex'),
+ $<codeblock>.ast );
+ }
method codeblock($/) {
my $block := $<block>.ast;
@@ -2376,20 +2417,17 @@ class Perl6::RegexActions is Regex::P6Regex::Actions {
}
sub bindmatch($past) {
- PAST::Regex.new(
- PAST::Stmts.new(
+ PAST::Stmts.new(
+ PAST::Op.new(
+ PAST::Var.new( :name('$/') ),
PAST::Op.new(
- PAST::Var.new( :name('$/') ),
- PAST::Op.new(
- PAST::Var.new( :name('$¢') ),
- :name('MATCH'),
- :pasttype('callmethod')
- ),
- :pasttype('bind')
+ PAST::Var.new( :name('$¢') ),
+ :name('MATCH'),
+ :pasttype('callmethod')
),
- $past
+ :pasttype('bind')
),
- :pasttype('pastnode')
+ $past,
);
}
}
diff --git a/src/Perl6/Grammar.pm b/src/Perl6/Grammar.pm
index ef77a7f..0539c48 100644
--- a/src/Perl6/Grammar.pm
+++ b/src/Perl6/Grammar.pm
@@ -1428,6 +1428,26 @@ grammar Perl6::Regex is Regex::P6Regex::Grammar {
token metachar:sym<:my> {
':' <?before 'my'> <statement=.LANG('MAIN', 'statement')> <.ws> ';'
}
+
+ token metachar:sym<$> {
+ <sym> <!before \w>
+ }
+
+ token metachar:sym<var> {
+ [
+ | '$<' $<name>=[<-[>]>+] '>'
+ | '$' $<pos>=[\d+]
+ | <?before <[$@]> \w> <var=.LANG('MAIN', 'variable')>
+ | <?before '%' \w> <.panic: "Use of hash variable in patterns is reserved">
+ ]
+
+ [ <.ws> '=' <.ws> <quantified_atom> ]?
+ }
+
+ token assertion:sym<var> {
+ | <?before <[$@]> \w> <var=.LANG('MAIN', 'variable')>
+ | <?before '%' \w> <.panic: "Use of hash variable in patterns is reserved">
+ }
token metachar:sym<{ }> {
<?[{]> <codeblock>
diff --git a/src/cheats/parrot/past-compiler-regex.pir b/src/cheats/parrot/past-compiler-regex.pir
new file mode 100644
index 0000000..de0a22e
--- /dev/null
+++ b/src/cheats/parrot/past-compiler-regex.pir
@@ -0,0 +1,153 @@
+=head1 NAME
+
+past-compiler-regex.pir - Compiler for the PAST::Regex interpolator type
+
+=head1 DESCRIPTION
+
+Implements the interpolator pasttype of PAST::Regex node. This has to be in Rakudo rather than
+NQP-RX as it calls the Rakudo regex compiler.
+
+Scalar values are interpolated as literal strings or regexes, depending on the subtype. Array values
+(or any Iterable) are interpolated as ||-type alternations.
+
+Subtype can be any of:
+
+=over 4
+
+=item zerowidth
+
+Only test for truthiness and fail or not. No interpolation.
+
+=item compile_regex
+
+String values should be compiled into regexes and then interpolated.
+
+=item literal
+
+String values should be treated as literals.
+
+=item literal_i
+
+String values should be treated as literals and matched case-insensitively.
+
+=back
+
+=head2 Methods
+
+=over 4
+
+=item interpolator(PAST::Regex node)
+
+=cut
+
+.HLL 'parrot'
+
+.namespace ['PAST';'Compiler']
+
+.const int CURSOR_FAIL = -1
+
+.sub 'interpolator' :method :multi(_, ['PAST'; 'Regex'])
+ .param pmc node
+ .local pmc cur, pos, fail, ops, eos, off, tgt
+ (cur, pos, eos, off, tgt, fail) = self.'!rxregs'('cur pos eos off tgt fail')
+ ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+
+ .local pmc zerowidth, negate, testop, subtype
+ subtype = node.'subtype'()
+
+ ops.'push_pirop'('inline', subtype, 'inline'=>' # rx interp subtype=%1')
+ .local pmc cpast, cpost
+ cpast = node[0]
+ cpost = self.'as_post'(cpast, 'rtype'=>'P')
+
+ self.'!cursorop'(ops, '!cursor_pos', 0, pos)
+ ops.'push'(cpost)
+
+ # If this is just a zerowidth assertion, we don't actually interpolate anything. Just evaluate
+ # and fail or not.
+ if subtype == 'zerowidth' goto zerowidth_test
+
+ .local string prefix
+ prefix = self.'unique'('rxinterp_')
+ .local pmc precompiled_label, done_label, loop_label, not_a_list_label, iterator_reg, label_reg
+ $S0 = concat prefix, '_precompiled'
+ precompiled_label = self.'post_new'('Label', 'result'=>$S0)
+ $S0 = concat prefix, '_done'
+ done_label = self.'post_new'('Label', 'result'=>$S0)
+ $S0 = concat prefix, '_loop'
+ loop_label = self.'post_new'('Label', 'result'=>$S0)
+ $S0 = concat prefix, '_not_a_list'
+ not_a_list_label = self.'post_new'('Label', 'result'=>$S0)
+ iterator_reg = self.'uniquereg'("P")
+ label_reg = self.'uniquereg'("I")
+
+ ops.'push_pirop'('descalarref', '$P10', cpost)
+ ops.'push_pirop'('get_hll_global', '$P11', "'Iterable'")
+ ops.'push_pirop'('callmethod', "'isa'", '$P10', '$P11', 'result'=>'$P11')
+ ops.'push_pirop'('unless', '$P11', not_a_list_label)
+
+ ops.'push_pirop'('callmethod', "'iterator'", '$P10', 'result'=>iterator_reg)
+ ops.'push_pirop'('set_addr', label_reg, loop_label)
+ ops.'push'(loop_label)
+ ops.'push_pirop'('callmethod', "'get'", iterator_reg, 'result'=>'$P10')
+ ops.'push_pirop'('isa', '$I10', '$P10', "['EMPTY']")
+ ops.'push_pirop'('if', '$I10', fail)
+ self.'!cursorop'(ops, '!mark_push', 0, 0, pos, label_reg)
+
+ ops.'push'(not_a_list_label)
+ # Check if it isa Regex, and call it as a method if so
+ ops.'push_pirop'('isa', '$I10', '$P10', "['Regex']")
+ ops.'push_pirop'('if', '$I10', precompiled_label)
+ ops.'push_pirop'('set', '$S10', '$P10')
+ ne subtype, 'compile_regex', literal
+
+ # Kinda cheesy, but the compiler can't be entered anywhere but TOP for now
+ ops.'push_pirop'('split', '$P9', "'/'", '$S10')
+ ops.'push_pirop'('join', '$S10', "'\\/'", '$P9')
+ ops.'push_pirop'('concat', '$S10', "'rx/'", '$S10')
+ ops.'push_pirop'('concat', '$S10', '$S10', "'/'")
+ ops.'push_pirop'('compreg', '$P10', '"perl6"')
+ ops.'push_pirop'('getinterp', '$P9')
+ ops.'push_pirop'('set', '$P9', "$P9['context';0]")
+ ops.'push_pirop'('callmethod', '"compile"', '$P10', '$S10', "'outer_ctx'=>$P9", 'result'=>'$P10')
+ ops.'push_pirop'('set', '$P8', '$P10[0]')
+ ops.'push_pirop'('getattribute', '$P9', '$P9', '"current_sub"')
+ ops.'push_pirop'('callmethod', '"set_outer"', '$P8', '$P9')
+ ops.'push_pirop'('call', '$P10', 'result'=>'$P10')
+
+ goto have_compiled_regex
+
+ literal:
+ ops.'push_pirop'('length', '$I10', '$S10')
+ ops.'push_pirop'('add', '$I11', pos, '$I10')
+ ops.'push_pirop'('gt', '$I11', eos, fail)
+ ops.'push_pirop'('sub', '$I11', pos, off)
+ ops.'push_pirop'('substr', '$S11', tgt, '$I11', '$I10')
+ ne subtype, 'literal_i', dont_downcase
+ ops.'push_pirop'('downcase', '$S10', '$S10')
+ ops.'push_pirop'('downcase', '$S11', '$S11')
+ dont_downcase:
+ ops.'push_pirop'('ne', '$S11', '$S10', fail)
+ ops.'push_pirop'('add', pos, '$I10')
+ ops.'push_pirop'('goto', done_label)
+
+ have_compiled_regex:
+ ops.'push'(precompiled_label)
+ ops.'push_pirop'('callmethod', '$P10', cur, 'result'=>'$P10')
+ ops.'push_pirop'('unless', '$P10', fail)
+ self.'!cursorop'(ops, '!mark_push', 0, 0, CURSOR_FAIL, 0, '$P10')
+ ops.'push_pirop'('callmethod', '"pos"', '$P10', 'result'=>pos)
+
+ ops.'push'(done_label)
+
+ goto done
+
+ zerowidth_test:
+ negate = node.'negate'()
+ testop = self.'??!!'(negate, 'if', 'unless')
+ ops.'push_pirop'(testop, cpost, fail)
+ done:
+ .return (ops)
+
+
+.end
diff --git a/t/spectest.data b/t/spectest.data
index 8b0b091..8acf9b5 100644
--- a/t/spectest.data
+++ b/t/spectest.data
@@ -243,6 +243,7 @@ S05-grammar/protoregex.t
S05-grammar/protos.t
S05-grammar/signatures.t
# S05-grammar/ws.t
+S05-interpolation/regex-in-variable.t
# S05-mass/named-chars.t # icu
# S05-mass/properties-block.t # icu
# S05-mass/properties-derived.t # icu
--
1.7.0
|
From @pmichaudOn Sun, Mar 28, 2010 at 10:01:42PM -0700, Bruce Keeler wrote:
The patch is a very good start, but please don't apply it yet. I'm finally back at home again, so I expect to be able to do Pm |
The RT System itself - Status changed from 'new' to 'open' |
From @moritzA variant of this patch has been applied; and it's amply tested. I'm Thanks for your patch! |
@moritz - Status changed from 'open' to 'resolved' |
Migrated from rt.perl.org#73862 (status was 'resolved')
Searchable as RT73862$
The text was updated successfully, but these errors were encountered: