#!/usr/bin/perl -w # Modules. use strict; use English; use Pod::Usage; use Getopt::Long; use IO::File; # YEAST byte codes. my $code2class = { U => { code => 'U', type => 'bom', title => "Byte order mark" }, T => { code => 'T', type => 'text', title => "Content text" }, t => { code => 't', type => 'text', title => "Non-content text" }, B => { code => 'B', type => 'line', title => "Content line break" }, b => { code => 'b', type => 'line', title => "Non-content line break" }, L => { code => 'L', type => 'line', title => "Line break normalized to line feed" }, l => { code => 'l', type => 'line', title => "Line break folded to space" }, I => { code => 'I', type => 'text', title => "Indicator character" }, w => { code => 'w', type => 'text', title => "Non-content white space" }, i => { code => 'i', type => 'text', title => "Indentation white space" }, K => { code => 'K', type => 'text', title => "Document start marker" }, k => { code => 'k', type => 'text', title => "Document end marker" }, E => { code => 'E', type => 'begin', title => "Escape sequence" }, e => { code => 'e', type => 'end', title => "Escape sequence" }, C => { code => 'C', type => 'begin', title => "Comment" }, c => { code => 'c', type => 'end', title => "Comment" }, D => { code => 'D', type => 'begin', title => "Directive" }, d => { code => 'd', type => 'end', title => "Directive" }, G => { code => 'G', type => 'begin', title => "Tag" }, g => { code => 'g', type => 'end', title => "Tag" }, H => { code => 'H', type => 'begin', title => "Handle" }, h => { code => 'h', type => 'end', title => "Handle" }, A => { code => 'A', type => 'begin', title => "Anchor" }, a => { code => 'a', type => 'end', title => "Anchor" }, P => { code => 'P', type => 'begin', title => "Properties" }, p => { code => 'p', type => 'end', title => "Properties" }, R => { code => 'R', type => 'begin', title => "Alias" }, r => { code => 'r', type => 'end', title => "Alias" }, S => { code => 'S', type => 'begin', title => "Scalar" }, s => { code => 's', type => 'end', title => "Scalar" }, Q => { code => 'Q', type => 'begin', title => "Sequence" }, q => { code => 'q', type => 'end', title => "Sequence" }, M => { code => 'M', type => 'begin', title => "Mapping" }, m => { code => 'm', type => 'end', title => "Mapping" }, N => { code => 'N', type => 'begin', title => "Node" }, n => { code => 'n', type => 'end', title => "Node" }, X => { code => 'X', type => 'begin', title => "Key:value pair" }, x => { code => 'x', type => 'end', title => "Key:value pair" }, O => { code => 'O', type => 'begin', title => "Document" }, o => { code => 'o', type => 'end', title => "Document" }, Y => { code => 'Y', type => 'begin', title => "Stream" }, y => { code => 'y', type => 'end', title => "Stream" }, '!' => { code => '!', type => 'error', title => "Error" }, '-' => { code => '-', type => 'text', title => "Unparsed" } }; # Command line arguments. my $output_file = ''; my $css_file = ''; my $link_css = ''; my $output = *STDOUT{IO}; my $tree_title = 'Syntax Tree'; my $text_title = 'YAML Text'; # Loaded byte codes. my $data = []; # Tree printing. my $next_id; # Main program. parse_argv(); load_input(); print_output(); # Functions. sub parse_argv { my $do_help = ""; my $do_man = ""; GetOptions("help|h" => \$do_help, "man|m" => \$do_man, "output|o=s" => \$output_file, "css|c=s" => \$css_file, "tree-title|r=s" => \$tree_title, "text-title|x=s" => \$text_title, "link-css|l" => \$link_css) || pod2usage(-verbose => 0); pod2usage(-verbose => 1) if $do_help; pod2usage(-verbose => 2) if $do_man; die("Only one input file, please\n") if @ARGV > 1; die("No CSS file specified to link to\n") if $link_css && !$css_file; if ($output_file) { open ($output, ">$output_file") || die("open(>$output_file): $!"); } } sub load_input { my $stack = []; my $num = 0; while (my $line = <>) { $num++; chomp($line); die("Line $num is not a byte code line\n") unless $line =~ /^(.)(.*)$/; my $code = $1; my $text = $2; my $class = $code2class->{$code}; die("Line $num contains unknown code \"$code\"\n") unless $class; die("Oops!") unless $class->{code} eq $code; if ($class->{type} eq 'bom') { $text = "$code$text"; } else { $text =~ s:\\x0[dD]:↵:g; $text =~ s:\\x0[aA]:↓:g; $text =~ s:\\x85:⇓:g; $text =~ s:\\u2028:§:g; $text =~ s:\\u2029:¶:g; $text =~ s:\t:→:g; $text =~ s: :·:g; $text =~ s:\\u202F:♦:g; } if ($class->{type} eq 'begin') { push(@$stack, [$num, $class]); } elsif ($class->{type} eq 'end') { my $title = $class->{title}; my ($begin_num, $begin_class) = @{pop(@$stack)}; my $begin_code = $begin_class->{code}; my $begin_title = $begin_class->{title}; die("Line $num ($code/$title) " . "ends line $begin_num ($begin_code/$begin_title)\n") if $title ne $begin_title; } push(@$data, [$class, $text]); } #die("Unterminated groups\n") if @$stack; } sub print_output { print $output <<"EOF";
↵ | Carriage Return | |
↓ | Line Feed | |
⇓ | Next Line | |
§ | Line separator | |
¶ | Paragraph separator | |
→ | Tab | |
♦ | Non-breaking space | |
· | Space | |
° | Empty | |
⇔ | Byte order mark |