#!/usr/bin/perl -w # Modules. use strict; use English; use Pod::Usage; use Getopt::Long; use IO::File; # YEAST byte codes. my $code2class = { U => { code => 'U', type => 'bom', title => "Byte order mark" }, T => { code => 'T', type => 'text', title => "Content text" }, t => { code => 't', type => 'text', title => "Non-content text" }, B => { code => 'B', type => 'line', title => "Content line break" }, b => { code => 'b', type => 'line', title => "Non-content line break" }, L => { code => 'L', type => 'line', title => "Line break normalized to line feed" }, l => { code => 'l', type => 'line', title => "Line break folded to space" }, I => { code => 'I', type => 'text', title => "Indicator character" }, w => { code => 'w', type => 'text', title => "Non-content white space" }, i => { code => 'i', type => 'text', title => "Indentation white space" }, K => { code => 'K', type => 'text', title => "Document start marker" }, k => { code => 'k', type => 'text', title => "Document end marker" }, E => { code => 'E', type => 'begin', title => "Escape sequence" }, e => { code => 'e', type => 'end', title => "Escape sequence" }, C => { code => 'C', type => 'begin', title => "Comment" }, c => { code => 'c', type => 'end', title => "Comment" }, D => { code => 'D', type => 'begin', title => "Directive" }, d => { code => 'd', type => 'end', title => "Directive" }, G => { code => 'G', type => 'begin', title => "Tag" }, g => { code => 'g', type => 'end', title => "Tag" }, H => { code => 'H', type => 'begin', title => "Handle" }, h => { code => 'h', type => 'end', title => "Handle" }, A => { code => 'A', type => 'begin', title => "Anchor" }, a => { code => 'a', type => 'end', title => "Anchor" }, P => { code => 'P', type => 'begin', title => "Properties" }, p => { code => 'p', type => 'end', title => "Properties" }, R => { code => 'R', type => 'begin', title => "Alias" }, r => { code => 'r', type => 'end', title => "Alias" }, S => { code => 'S', type => 'begin', title => "Scalar" }, s => { code => 's', type => 'end', title => "Scalar" }, Q => { code => 'Q', type => 'begin', title => "Sequence" }, q => { code => 'q', type => 'end', title => "Sequence" }, M => { code => 'M', type => 'begin', title => "Mapping" }, m => { code => 'm', type => 'end', title => "Mapping" }, N => { code => 'N', type => 'begin', title => "Node" }, n => { code => 'n', type => 'end', title => "Node" }, X => { code => 'X', type => 'begin', title => "Key:value pair" }, x => { code => 'x', type => 'end', title => "Key:value pair" }, O => { code => 'O', type => 'begin', title => "Document" }, o => { code => 'o', type => 'end', title => "Document" }, Y => { code => 'Y', type => 'begin', title => "Stream" }, y => { code => 'y', type => 'end', title => "Stream" } }; # Command line arguments. my $output_file = ''; my $css_file = ''; my $link_css = ''; my $output = *STDOUT{IO}; my $tree_title = 'Syntax Tree'; my $text_title = 'YAML Text'; # Loaded byte codes. my $data = []; # Tree printing. my $next_id; # Main program. parse_argv(); load_input(); print_output(); # Functions. sub parse_argv { my $do_help = ""; my $do_man = ""; GetOptions("help|h" => \$do_help, "man|m" => \$do_man, "output|o=s" => \$output_file, "css|c=s" => \$css_file, "tree-title|r=s" => \$tree_title, "text-title|x=s" => \$text_title, "link-css|l" => \$link_css) || pod2usage(-verbose => 0); pod2usage(-verbose => 1) if $do_help; pod2usage(-verbose => 2) if $do_man; die("Only one input file, please\n") if @ARGV > 1; die("No CSS file specified to link to\n") if $link_css && !$css_file; if ($output_file) { open ($output, ">$output_file") || die("open(>$output_file): $!"); } } sub load_input { my $stack = []; my $num = 0; while (my $line = <>) { $num++; chomp($line); die("Line $num is not a byte code line\n") unless $line =~ /^(.)(.*)$/; my $code = $1; my $text = $2; my $class = $code2class->{$code}; die("Line $num contains unknown code \"$code\"\n") unless $class; die("Oops!") unless $class->{code} eq $code; if ($class->{type} eq 'bom') { $text = "$code$text"; } else { $text =~ s:\\x0[dD]:↵:g; $text =~ s:\\x0[aA]:↓:g; $text =~ s:\\x85:⇓:g; $text =~ s:\\u2028:§:g; $text =~ s:\\u2029:¶:g; $text =~ s:\t:→:g; $text =~ s: :·:g; $text =~ s:\\u202F:♦:g; } if ($class->{type} eq 'begin') { push(@$stack, [$num, $class]); } elsif ($class->{type} eq 'end') { my $title = $class->{title}; my ($begin_num, $begin_class) = @{pop(@$stack)}; my $begin_code = $begin_class->{code}; my $begin_title = $begin_class->{title}; die("Line $num ($code/$title) " . "ends line $begin_num ($begin_code/$begin_title)\n") if $title ne $begin_title; } push(@$data, [$class, $text]); } die("Unterminated groups\n") if @$stack; } sub print_output { print $output <<"EOF"; YEAST2HTML EOF print_css(); print_script(); print $output <<"EOF"; EOF print_text(); print_tree(); print $output <<"EOF"; EOF } sub print_css { if ($link_css) { print $output "\n"; return; } print $output "\n"; } sub print_script { print $output <<"EOF"; EOF } sub print_tree { print $output "
\n"; print $output "

$tree_title

"; $next_id = 0; start_tree_node(0, "Legend"); print_legend(); end_tree_node(); my $depth = 0; for my $entry (@$data) { my ($class, $text) = @$entry; if ($class->{type} eq 'begin') { start_tree_node($depth++, $class->{title}); } elsif ($class->{type} eq 'end') { end_tree_node(); $depth--; } else { my $label = $class->{title}; $label .= " ($text)" if $class->{type} eq 'bom'; tree_leaf($depth, $label); } } print $output "
\n"; } sub start_tree_node { my $depth = shift; my $text = shift; print $output "
\n"; for (my $i = 1; $i <= $depth; $i++) { print $output " " } print $output ""; print $output " 0 ? "" : " legend_node") . "\" onclick=\"highlight($next_id)\">$text\n"; print $output "
\n"; $next_id++; } sub end_tree_node { print $output "
\n"; } sub tree_leaf { my $depth = shift; my $text = shift; print $output "
\n"; for (my $i = 1; $i <= $depth; $i++) { print $output " " } print $output "·"; print $output "$text\n"; print $output "
\n"; $next_id++; } sub print_legend { print <<"EOF";
  Carriage Return
  Line Feed
  Next Line
  § Line separator
  Paragraph separator
  Tab
  Non-breaking space
  · Space
  ° Empty
  Byte order mark
EOF } sub print_text { print $output "
\n"; print $output "

$text_title

"; $next_id = 1; my $pending_break = 0; for my $entry (@$data) { my ($class, $text) = @$entry; if ($class->{type} eq 'end') { print $output ""; next; } if ($pending_break) { print $output "
\n"; $pending_break = 0; } print $output "{type} eq 'begin'; print ">"; $next_id++; next if $class->{type} eq 'begin'; if ($class->{type} eq 'bom') { print $output "⇔"; } else { print $output "$text"; } $pending_break = $class->{type} eq 'line'; } print $output "\n
\n"; } __END__ =head1 NAME yeast2html - Convert YEAST byte codes to viewable HTML =head1 SYNOPSIS yeast2html [options] [yeast-file] =head1 DESCRIPTION This Perl script is designed to allow exploring the syntactical structure of YAML files in an interactive way. The input file is a sequence of YEAST byte codes that describe the YAML text. The output is an XHTML file that allows viewing the YAML syntax tree together with the original (reconstructed) YAML text. =head1 COMMAND LINE OPTIONS =over 4 =item B<--output|-o> I Redirect the output XHTML to the specified I. By default the XHTML is written to standard output. =item B<--css|-c> I Use the specified I to control the style of the generated XHTML. By default, a minimal set of CSS rules is used, which doesn't look very pretty. =item B<--link-css|-l> By default, the content of the I is embedded in the header of the generated XHTML. If this flag is given, this is replaced by a link to the I. Note that in this case the I path must be relative to the URL used for the XHTML file. =item B<--tree-title|-r> I The title for the tree part of the display. By default, it says "Syntax Tree". =item B<--text-title|-x> I<title> The title for the text part of the display. By default, it says "YAML Text". =item B<--help|-h> Print short usage message and exit. =item B<--man|-m> Print this man page and exit. =back =head1 SEE ALSO L<yaml2yeast> =head1 AUTHOR Oren Ben-Kiki <oren@ben-kiki.org> =head1 COPYRIGHT Copyright (c) 2007, Oren Ben-Kiki This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA