# WARNING: This file is no longer used by PCR itself; # we're using examples/Grammar.grammar to generate Pugs::Grammar::Rule.pmc # instead. use v6-alpha; use utf8; # Perl6 implementation of the 'Rule' syntax # author: Flavio S. Glock - fglock@gmail.com =for compiling The util/update-rule-pmc script is used to generate lib/Pugs/Grammar/Rule.pmc from this file. This script does the following: Use v6.pm to compile this file and post-process it after compiling: - remove all references to: Data::Bind - replace the header with: package Pugs::Grammar::Rule; use utf8; no strict 'refs'; use Pugs::Runtime::Match; use Pugs::Runtime::Regex; The post-processing thing is done by the Perl 5 script util/patch-rule-pmc.pl =cut grammar Pugs::Grammar::Rule; #use Pugs::Runtime::Match; our %rule_terms; our %variables; token pod_begin { | \n =end \N* | . \N* <.pod_begin> } token pod_other { | \n =cut \N* | . \N* <.pod_other> } token ws { [ | \# \N* | \n [ = [ | begin <.ws> END \N* .* | begin <.pod_begin> | kwid <.pod_other> | pod <.pod_other> | for <.pod_other> | head1 <.pod_other> ]? ]? | \s ]+ } # regex ident can start with a number token ident { [ <.alnum> | _ | '::' ]+ } token alnum { <[0-9a-zA-Z]> } token alpha { <[a-zA-Z]> } token digit { <[0-9]> } # after '\\' token special_char { | ( c | C ) \[ ( [|\s| ';' | '(' | ')' | '-' ]+) \] # \c[LATIN LETTER A] { return { special_char => '\\' ~ $0 ~ $1 , } } | [ x | X ] + # \x0021 \X0021 { return { special_char => '\\' ~ $/ , } } | ( x | X ) \[ (+) \] # \x[0021] \X[0021] { return { special_char => '\\' ~ $0 ~ $1 , } } | [ o | O ] \d+ # \o0021 \O0021 { return { special_char => '\\' ~ $/ , } } | ( o | O ) \[ (\d+) \] # \o[0021] \O[0021] { return { special_char => '\\' ~ $0 ~ $1 , } } | . # \e \E { return { special_char => '\\' ~ $/ , } } } token literal { [ | \\ | <-[ \' ]> ]* } token double_quoted { [ | \\ | <%Pugs::Grammar::Rule::variables> | <-[ \" ]> ]* } token metasyntax { [ | \\ | \' <.literal> \' | \" <.double_quoted> \" | \{ <.string_code> \} | \< <.metasyntax> \> | <-[ \> ]> ]+ } token char_range { [ | \\ | <-[ \] ]> ]+ } token char_class { | <.alpha>+ | \[ <.char_range> \] } token string_code { # bootstrap "code" [ | \\ | \' <.literal> \' | \" <.double_quoted> \" | \{ [ <.string_code> | '' ] \} | \( [ <.string_code> | '' ] \) | \< [ <.string_code> | '' ] \> | [ <.ws> | \> | \= | \- ] \> | <.ws> | <-[ \} \) \> ]> ]+ } token parsed_code { # this subrule is overridden inside the perl6 compiler <.string_code> { return '{' ~ $/ ~ '}' } } token named_capture_body { | \( \) { return { capturing_group => $$ ,} } | \[ \] { return $$ } | \< { return $$ } | \' <.literal> \' { return { metasyntax => { metasyntax => ~ $$/ ,} } } | { die "invalid alias syntax"; } } token parse_metasyntax { $ := [ '!' | '?' | '.' | '' ] [ '{' '}>' { return { closure => { closure => $$, modifier => $$, } } } | ( <[+-]> )+ \> { if ( $$ eq '!' ) { return { negate => { char_class => [ '+' ~ $, @($/[0]), # TODO - stringify ] } } } return { char_class => [ '+' ~ $, @($/[0]), # TODO - stringify ] } } | [ <.ws> \> { if ( $$ eq 'before' || $$ eq 'after' ) { return { $$ => { rule => $$, modifier => $$ } } } return { metasyntax => { metasyntax => $$, rule => $$, modifier => $$, } } } | ':' <.ws>? $ := [ [ | \\ | <%Pugs::Grammar::Rule::variables> | <-[ \> ]> ]* ] \> { if ( $$ eq 'before' || $$ eq 'after' ) { return { $$ => { rule => { metasyntax => { metasyntax => '\'' ~ $$ ~ '\'' } }, modifier => $$, } } } return { metasyntax => { metasyntax => $$, string => $$, modifier => $$, } } } | \( \) \> { return { call => { method => $$, params => $$, modifier => $$, } } } ] | \> { return { metasyntax => { metasyntax => ~$$, modifier => $$, } } } ] } %variables = ( '$<' => token { \> { return { match_variable => '$' ~ $/ ,} } }, '$' => token { <.digit>+ { return { match_variable => '$' ~ $/ ,} } | \^? [ <.alnum> | _ | \: \: ]+ { return { variable => '$' ~ $/ ,} } }, '@' => token { <.digit>+ { return { match_variable => '@' ~ $/ ,} } | \^? [ <.alnum> | _ | \: \: ]+ { return { variable => '@' ~ $/ ,} } }, '%' => token { <.digit>+ { return { match_variable => '%' ~ $/ ,} } | \^? [ <.alnum> | _ | \: \: ]+ { return { variable => '%' ~ $/ ,} } }, ); # /%variables %rule_terms = ( '{*}' => token { # placeholder { return { metasyntax => { metasyntax => 'null' ,} } } }, '\'' => token { <.literal> \' { return { metasyntax => { metasyntax => '\'' ~ $$/ ,} } } }, '(' => token { \) { return { capturing_group => $$ ,} } }, '<(' => token { ')>' { return { capture_as_result => $$ ,} } }, '<+' => token { ( <[+-]> )* \> { return { char_class => [ '+' ~ $, @($/[0]), # TODO - stringify ] } } }, '<-' => token { ( <[+-]> )* \> { return { char_class => [ '-' ~ $, @($/[0]), # TODO - stringify ] } } }, '<[' => token { \] ( <[+-]> )* \> { return { char_class => [ '+[' ~ $ ~ ']', @($/[0]), # TODO - stringify ] } } }, '<' => token { { return $$ } }, '{' => token { \} { return { closure => { closure => $$, modifier => 'plain', } } } }, '\\' => token { { return $$ } }, '.' => token { { return { 'dot' => 1 ,} } }, '[' => token { \] { return $$ } }, ':::' => token { { return { colon => ':::' ,} } }, ':?' => token { { return { colon => ':?' ,} } }, ':+' => token { { return { colon => ':+' ,} } }, '::' => token { { return { colon => '::' ,} } }, ':' => token { { return { colon => ':' ,} } }, '$$' => token { { return { colon => '$$' ,} } }, '$' => token { { return { colon => '$' ,} } }, '^^' => token { { return { colon => '^^' ,} } }, '^' => token { { return { colon => '^' ,} } }, '>>' => token { { return { colon => '>>' ,} } }, '»' => token { { return { colon => '>>' ,} } }, '<<' => token { { return { colon => '<<' ,} } }, '«' => token { { return { colon => '<<' ,} } }, ':i' => token { <.ws> { return { modifier => { modifier => 'ignorecase', :$$, } } } }, ':ignorecase' => token { <.ws> { return { modifier => { modifier => 'ignorecase', :$$, } } } }, ':s' => token { <.ws> { return { modifier => 'sigspace', :$$, } } }, ':sigspace' => token { <.ws> { return { modifier => 'sigspace', :$$, } } }, ':P5' => token { <.ws> { return { modifier => 'Perl5', :$$, } } }, ':Perl5' => token { <.ws> { return { modifier => 'Perl5', :$$, } } }, ':bytes' => token { <.ws> { return { modifier => 'bytes', :$$, } } }, ':codes' => token { <.ws> { return { modifier => 'codes', :$$, } } }, ':graphs' => token { <.ws> { return { modifier => 'graphs', :$$, } } }, ':langs' => token { <.ws> { return { modifier => 'langs', :$$, } } }, ); # /%rule_terms token term { | <%Pugs::Grammar::Rule::variables> [ <.ws>? ':=' <.ws>? { return { named_capture => { rule => $$, ident => $$, }, }; } | { return $$ } ] | <%Pugs::Grammar::Rule::rule_terms> { #print "term: ", Dumper( $_[0]->data ); return $$ } | <-[ \] \} \) \> \: \? \+ \* \| \& ]> { #print "constant: ", Dumper( $_[0]->data ); return { 'constant' => $$/ ,} } } token quant { | '**' <.ws>? \{ \} { return { closure => $$ ,} } | <[ \? \* \+ ]>? } token quantifier { $ := (<.ws>?) > $ := (<.ws>?) $ := (<[ \? \+ ]>?) $ := (<.ws>?) { if $$/{'quant'} eq '' && $$/{'greedy'} eq '' && $$/{'ws1'} eq '' && $$/{'ws2'} eq '' && $$/{'ws3'} eq '' { return $$/{'term'}; } return { quant => { term => $$/{'term'}, quant => $$/{'quant'}, greedy => $$/{'greedy'}, ws1 => $$/{'ws1'}, ws2 => $$/{'ws2'}, ws3 => $$/{'ws3'}, } } } } token concat { + { use v5; my @a = map { $_->() } @{ $::_V6_MATCH_->{'quantifier'} }; return { concat => \@a ,} if scalar @a > 1; return $a[0]; use v6; } } token conjunctive1 { [ <.ws>? \& ]? **{1} [ \& ]* { use v5; my @a = map { $$_ } @{ $::_V6_MATCH_->{'concat'} }; return { conjunctive1 => \@a ,} if scalar @a > 1; return $a[0]; use v6; } } token disjunctive1 { [ <.ws>? \| ]? **{1} [ \| ]* { use v5; my @a = map { $$_ } @{ $::_V6_MATCH_->{'conjunctive1'} }; return { alt1 => \@a ,} if scalar @a > 1; return $a[0]; use v6; } } token conjunctive { [ <.ws>? \& \& ]? **{1} [ \& \& ]* { use v5; my @a = map { $$_ } @{ $::_V6_MATCH_->{'disjunctive1'} }; return { conjunctive => \@a ,} if scalar @a > 1; return $a[0]; use v6; } } token rule { [ <.ws>? \| \| ]? **{1} [ \| \| ]* { use v5; my @a = map { $$_ } @{ $::_V6_MATCH_->{'conjunctive'} }; return { alt => \@a ,} if scalar @a > 1; return $a[0]; use v6; } } token named_regex { ( 'token' | 'regex' | 'rule' ) <.ws> <.ws>? '{' <.ws>? '}' ';'? { return { type => $$0, name => $$, ast => $$ }; } } # This is hacky, will do better later token verbatim { '%{' ( [ . ]* ) '%}' { return { type => 'block', value => $$0 }; } } token item { | { return $$; } | { return $$; } } token grammar { <.ws>? 'grammar' <.ws> <.ws>? ';' <.ws>? [ <.ws>? ]* { return { $$ => $ } } } token spec { ? * { return { block => $, 'grammar' => $ } } }