package Pugs::Emitter::Rule::Parsec; # p6-rule parsec emitter use strict; use warnings; use Pugs::Grammar::MiniPerl6; use Data::Dumper; $Data::Dumper::Indent = 1; our $sigspace = 0; our $capture_counter = 0; sub get_capture_var { return 'capture_' . $capture_counter++; } sub rule_rename($){ my $orig_name = shift; return 'rule' . (uc substr $orig_name, 0, 1) . substr $orig_name, 1; } sub to_genparser_string($) { my $char_parser = shift; return "($char_parser >>= \\c -> return [c])"; } sub call_constant { my $str = shift; $str =~ s/\\/\\\\/g; $str =~ s/"/\\"/g; return 'string "' . $str . '"'; } sub emit { my ($grammar, $ast, $param) = @_; local $sigspace = $param->{sigspace}; # XXX - $sigspace should be lexical local $capture_counter = 0; emit_rule( $ast, '' ) . "\n"; } sub emit_rule { my $n = $_[0]; my $tab = $_[1]; die "unknown node: ", Dumper( $n ) unless ref( $n ) eq 'HASH'; #print "NODE ", Dumper($n); my ( $k, $v ) = each %$n; # XXX - use real references no strict 'refs'; my $code = &$k( $v, $tab ); return $code; } #rule nodes sub non_capturing_group { return emit_rule( $_[0], $_[1] ); } sub quant { my $term = $_[0]->{'term'}; my $quantifier = $_[0]->{quant}; my $spacing = ( $sigspace && ($_[0]->{ws1} ne '' && $_[0]->{ws2} ne '') ); my $tab = $spacing ? $_[1] . ' ' : $_[1]; my $rul = emit_rule( $term, $tab ); my $ws = metasyntax('?ws', $tab); $rul = "$ws\n$tab$rul" if $sigspace && $_[0]->{ws1} ne ''; $rul = "$rul\n$tab$ws" if $sigspace && $_[0]->{ws2} ne ''; $rul = "do\n$tab$rul" if $spacing; return $rul if $quantifier eq ''; # * + ? my $qual = ''; return "option \"\" \$ $rul" if $quantifier eq '?'; $qual = 'many' if $quantifier eq '*'; $qual = 'many1' if $quantifier eq '+'; die "quantifier not implemented: $quantifier" if $qual eq ''; my $final_rul = "(($qual \$ $rul) >>= \\arr -> return \$ foldr (++) \"\" arr)"; return "$final_rul >>\n$tab$ws" if $sigspace and $_[0]->{ws3} ne ''; return $final_rul; } sub alt { my @s; my @alt = @{$_[0]}; # clean up alternative body # XXX maybe can be done earlier (e.g. in parser) foreach(@alt){ if(ref eq 'HASH'){ my($k) = keys %$_; if($k eq 'alt'){ my @sub_alt = @{$_->{alt}}; $_ = $sub_alt[0]; push @alt, @sub_alt[1 .. $#sub_alt]; } } } my $indent = $_[1] . ' '; foreach(@alt){ my $tmp = emit_rule( $_, $indent ); push @s, $tmp if $tmp; } return "do\n$indent" . join "\n$indent<|>\n$indent", @s; } sub concat { my @inner = @{$_[0]}; # clean up concatenation body # XXX maybe can be done earlier (e.g. in parser) foreach(@inner){ if(ref eq 'HASH'){ my($k) = keys %$_; if($k eq 'concat'){ my @sub_inner = @{$_->{concat}}; $_ = $sub_inner[0]; push @inner, @sub_inner[1 .. $#sub_inner]; } } } my $indent = $_[1] . ' '; my $result = 'do'; foreach(@inner){ my $tmp = emit_rule( $_, $indent ); $result .= "\n$indent" . $tmp if $tmp; } return $result; } sub dot { return to_genparser_string("anyChar"); } sub variable {} use vars qw( %special_chars ); BEGIN { %special_chars = ( r => "char '\\r'", n => "char '\\n'", t => "char '\\t'", e => "char '\\033'", f => "char '\\f'", w => "(alphaNum <|> char '_')", d => 'digit', s => 'space', W => "satisfy (\\x -> x /= '_' && not \$ isAlphaNum x)", D => 'noneOf "0123456789"', S => 'noneOf " \\v\\f\\t\\r\\n"', ); while(my ($k, $v) = each %special_chars){ next if $k eq uc $k or exists $special_chars{uc $k} or $v !~ /^char/; my $chars = substr $v, 6; chop $chars; $special_chars{uc $k} = "noneOf \"$chars\""; } } sub special_char { my $char = substr($_[0],1); return to_genparser_string($special_chars{$char}) if exists $special_chars{$char}; $char = '\\\\' if $char eq '\\'; return "string \"$char\""; } sub match_variable {} sub closure { my $miniperl6 = substr $_[0], 1, length($_[0]) - 2; my $haskell = Pugs::Grammar::MiniPerl6->ProductionRule($miniperl6); $haskell =~ s/\n/\n$_[1]/sg; # print ">>> MiniPerl6\n$miniperl6\n===\n$haskell\n<<< Haskell\n"; return "$haskell"; } sub capturing_group { my $program = $_[0]; $program = emit_rule( $program, $_[1] . ' ' ) if ref( $program ); return &get_capture_var . ' <- ' . $program; } sub named_capture { my $name = $_[0]{ident}; my $program = $_[0]{rule}; return "$name <- " . metasyntax($program->{metasyntax}, $_[1] . ' ', 1) if exists $program->{metasyntax}; return "$name <- " . emit_rule($program, $_[1] . ' '); } sub negate { my $body = $_[0]; return not_after($body->{after}, $_[1]) if exists $body->{after}; return not_before($body->{before}, $_[1]) if exists $body->{before}; return ''; } sub before { my $program = $_[0]{rule}; return 'lookAhead (' . emit_rule($program, $_[1] . ' ') . ')'; } sub not_before { my $program = $_[0]{rule}; return 'notFollowedBy $ (' . emit_rule($program, $_[1] . ' ') . ") >> return ' '"; # notFollowedBy :: Show tok => GenParser tok st tok -> GenParser tok st () # tok = Char in the context so that we have to cast it } sub after {} sub not_after {} sub colon {} sub constant { return "string \"$_[0]\""; } use vars qw( %char_class ); BEGIN { %char_class = ( alpha => 'letter', alnum => 'alphaNum', ascii => 'satisfy isAscii', blank => 'oneOf " \\t"', cntrl => 'satisfy isCotrol', digit => 'digit', graph => "satisfy (\\x -> isPrint x && x /= ' ')", lower => 'lower', print => 'satisfy isPrint', punct => "satisfy (\\x -> isPrint x && x /= ' ' && not (isAlphaNum x))", space => 'space', upper => 'upper', word => "(alphaNum <|> char '_')", xdigit => 'hexDigit', ); } sub metasyntax { # my $cmd = $_[0]; my $prefix = substr( $cmd, 0, 1 ); my $named_capturing = !$_[2]; my $negative_lookahead = 0; if ( $prefix eq '@' ) { # XXX - wrap @array items - see end of Pugs::Grammar::Rule # TODO - param list return "$_[1] do {\n" . "$_[1] my \$match;\n" . "$_[1] for my \$subrule ( $cmd ) {\n" . "$_[1] \$match = " . call_subrule( '$subrule', '', () ) . ";\n" . "$_[1] last if \$match;\n" . "$_[1] }\n" . "$_[1] my \$bool = (!\$match != 1);\n" . "$_[1] \$pos = \$match->to if \$bool;\n" . "$_[1] \$bool;\n" . "$_[1] }"; } if ( $prefix eq '$' ) { if ( $cmd =~ /::/ ) { # call method in fully qualified $package::var # ...->match( $rule, $str, $grammar, $flags, $state ) # TODO - send $pos to subrule return "$_[1] do {\n" . "$_[1] push \@match,\n" . "$_[1] $cmd->match( \$s, \$grammar, {p => \$pos}, undef );\n" . "$_[1] \$pos = \$match[-1]->to;\n" . "$_[1] !\$match[-1] != 1;\n" . "$_[1] }" } # call method in lexical $var # TODO - send $pos to subrule return "$_[1] do {\n" . "$_[1] my \$r = Pugs::Runtime::Rule::get_variable( '$cmd' );\n" . "$_[1] push \@match,\n" . "$_[1] \$r->match( \$s, \$grammar, {p => \$pos}, undef );\n" . "$_[1] \$pos = \$match[-1]->to;\n" . "$_[1] !\$match[-1] != 1;\n" . "$_[1] }" } if ( $prefix eq q(') ) { # single quoted literal ' $cmd = substr( $cmd, 1, -1 ); return call_constant( $cmd ); } if ( $prefix eq q(") ) { # interpolated literal " $cmd = substr( $cmd, 1, -1 ); warn "<\"...\"> not implemented"; return; } if ( $prefix =~ /[-+[]/ ) { # character class if ( $prefix eq '-' ) { my $str = substr $cmd, 2, length($cmd) - 3; $str =~ s/\\>/>/g; # XXX $str =~ s/\\/\\\\/g; $str =~ s/"/\\"/g; return to_genparser_string("noneOf \"$str\""); } elsif ( $prefix eq '+' ) { $cmd = substr($cmd, 2); } my $str = substr $cmd, 1, length($cmd) - 2; $str =~ s/\\>/>/g; # XXX $str =~ s/\\/\\\\/g; $str =~ s/"/\\"/g; return to_genparser_string("oneOf \"$str\""); } if ( $prefix eq '?' ) { # non_capturing_subrule / code assertion $cmd = substr( $cmd, 1 ); if ( $cmd =~ /^{/ ) { warn "code assertion not implemented"; return; } $prefix = substr( $cmd, 0, 1 ); $named_capturing = 0; } if ( $prefix eq '!' ) { # negated_subrule / code assertion $cmd = substr( $cmd, 1 ); if ( $cmd =~ /^{/ ) { warn "code assertion not implemented"; return; } $prefix = substr( $cmd, 0, 1 ); $negative_lookahead = 1; warn "<$cmd> not implemented"; return; } if ( $cmd eq '.' ) { warn "<$cmd> not implemented"; return; } if ( $prefix =~ /[_[:alnum:]]/ ) { # "before" and "after" are handled in a separate rule if ( $cmd eq 'ws' ){ return 'perl6WhiteSpace'; # assuming function: # perl6WhiteSpace = do cls <- getPrevCharClass # let mod = if cls == WordClass then many1 else many # do mod whiteSpace # <|> # (satisfy (\c -> charClassOf c /= WordClass) >> return "") } if ( $cmd eq 'cut' ) { warn "<$cmd> not implemented"; return; } if ( $cmd eq 'commit' ) { warn "<$cmd> not implemented"; return; } if ( $cmd eq 'prior' ) { warn "<$cmd> not implemented"; return; } if ( $cmd eq 'null' ) { warn "<$cmd> not implemented"; return; } if ( $char_class{$cmd} ) { # XXX - inlined char classes are not inheritable, but this should be ok return to_genparser_string($char_class{$cmd}); } # capturing subrule # my ( $subrule, $param_list ) = split( /[\(\)]/, $cmd ); $param_list = '' unless defined $param_list; my @param = split( ',', $param_list ); return ($named_capturing ? "$subrule <- " : '') . rule_rename($subrule) . join '', map { " ($_)" } @param; } die "<$cmd> not implemented"; } 1;