#!/usr/bin/perl -T # # darcs.cgi - the darcs repository viewer # # Copyright (c) 2004 Will Glozer # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # This program calls darcs (or its own subroutines) to generate XML # which is rendered into HTML by XSLT. It is capable of displaying # the files in a repository, various patch histories, annotations, etc. # use strict; use CGI qw( :standard ); use CGI::Util; use File::Basename; use File::stat; use IO::File; use POSIX; ## the following variables can be customized to reflect your system ## configuration by defining them appropriately in the file ## "${prefix}/etc/darcs/cgi.conf". The syntax accepts equals signs or simply ## blanks separating values from assignments. $ENV{'PATH'} = read_conf('PATH', $ENV{'PATH'}); # path to executables, or just the executable if they are in $ENV{'PATH'} my $darcs_program = read_conf("darcs", "darcs"); my $xslt_program = read_conf("xsltproc", "xsltproc"); # directory containing repositories my $repository_root = read_conf("reposdir", "/var/www"); # XSLT template locations my $template_root = read_conf("xslt_dir", '${datarootdir}/darcs/xslt'); my $xslt_annotate = "$template_root/annotate.xslt"; my $xslt_browse = "$template_root/browse.xslt"; my $xslt_patches = "$template_root/patches.xslt"; my $xslt_repos = "$template_root/repos.xslt"; my $xslt_rss = "$template_root/rss.xslt"; my $xslt_errors = "$template_root/errors.xslt"; # CSS stylesheet that XSLT templates refer to. This is a HTTP request # path, not a local file system path. The default will cause darcs.cgi # to serve the stylesheet rather than the web server. my $stylesheet = read_conf("stylesheet", "/cgi-bin/darcs.cgi/styles.css"); # location of the CSS stylesheet that darcs.cgi will serve if it # receives a request for '/styles.css' my $css_styles = read_conf("css_styles", '${prefix}/etc/darcs/styles.css'); # location of the favicon that darcs.cgi will serve if it # receives a request for '/[\w\-]+/favicon.ico' my $favicon = read_conf("favicon", "/cgi-bin/favicon.ico"); # XML source for the error pages my $xml_errors = "$template_root/errors.xml"; # encoding to include in XML declaration my $xml_encoding = read_conf("xml_encoding", "UTF-8"); ## end customization # ---------------------------------------------------------------------- # read a value from the cgi.conf file. { my(%conf); sub read_conf { my ($flag, $val) = @_; $val = "" if !defined($val); if (!%conf && open(CGI_CONF, '${prefix}/etc/darcs/cgi.conf')) { while () { chomp; next if /^\s*(?:\#.*)?$/; # Skip blank lines and comment lines if (/^\s*(\S+)\s*(?:\=\s*)?(\S+)\s*$/) { $conf{$1} = $2; # print "read_conf: $1 = $2\n"; } else { warn "read_conf: $_\n"; } } close(CGI_CONF); } $val = $conf{$flag} if exists($conf{$flag}); return $val; } } # open xsltproc to transform and output `xml' with stylesheet file `xslt' sub transform { my ($xslt, $args, $content_type) = @_; $| = 1; printf "Content-type: %s\r\n\r\n", $content_type || "text/html"; my $pipe = new IO::File "| $xslt_program $args $xslt -"; $pipe->autoflush(0); return $pipe; } sub pristine_dir { my ($repo) = @_; my $pristine = "current"; if (! -d "${repository_root}/${repo}/_darcs/$pristine") { $pristine = "pristine"; } return "${repository_root}/${repo}/_darcs/$pristine"; } # begin an XML document with a root element and the repository path sub make_xml { my ($fh, $repo, $dir, $file) = @_; my ($full_path, $path) = '/'; printf $fh qq(\n); printf $fh qq(\n), $repo, ($dir ? "$dir/" : ''), ($file ? "$file" : ''); print $fh qq(\n); foreach $path (split('/', "$repo/$dir")) { $full_path .= "$path/"; print $fh qq($path\n); } if ($file) { print $fh qq($file\n) if $file; } print $fh qq(\n\n); } # finish XML output sub finish_xml { my ($fh) = @_; print $fh "\n\n"; $fh->flush; } # run darcs and wrap the output in an XML document sub darcs_xml { my ($fh, $repo, $cmd, $args, $dir, $file) = @_; make_xml($fh, $repo, $dir, $file); push(@$args, '--xml-output'); darcs($fh, $repo, $cmd, $args, $dir, $file); finish_xml($fh); } # run darcs with output redirected to the specified file handle sub darcs { my ($fh, $repo, $cmd, $args, $dir, $file) = @_; my (@darcs_argv) = ($darcs_program, $cmd, @$args); # push target only if there is one, otherwise darcs will get an empty param if ($dir || $file) { push(@darcs_argv, sprintf("%s%s%s", $dir, ($dir ? '/' : ''), $file)); } my($pid) = fork; if ($pid) { # in the parent process my($status) = waitpid($pid, 0); die "$darcs_program exited with status $?\n" if $?; } elsif(defined($pid)) { # in the child process open(STDIN, '/dev/null'); if (defined($fh)) { open(STDOUT, '>&', $fh) || die "can't dup to stdout: $!\n"; } chdir "$repository_root/$repo" || die "chdir: $repository_root/$repo: $!\n"; exec @darcs_argv; die "can't exec ".$darcs_argv[0].": $!\n"; } else { # fork failed die "can't fork: $!\n"; } } # get a directory listing as XML output sub dir_listing { my ($fh, $repo, $dir) = @_; make_xml($fh, $repo, $dir, ''); print $fh "\n"; my $dir_ = pristine_dir ($repo) . "/$dir"; opendir(DH, $dir_); while( defined (my $file_ = readdir(DH)) ) { next if $file_ =~ /^\.\.?$/; my $file = "$dir_/$file_"; my $secs = stat($file)->mtime; my $mtime = localtime($secs); my $ts = POSIX::strftime("%Y%m%d%H%M%S", gmtime $secs); my ($name, $type); if (-d $file) { ($name, $type) = (basename($file) . '/', 'directory'); } else { ($name, $type) = (basename($file), 'file'); } printf $fh qq( <$type name="$name" modified="$mtime" ts="$ts" />\n); } closedir(DH); print $fh "\n"; finish_xml($fh); } # get a repository listing as XML output sub repo_listing { my($fh) = @_; make_xml($fh, "", "", ""); print $fh "\n"; opendir(DH, $repository_root); while( defined (my $name = readdir(DH)) ) { next if $name =~ /^\.\.?$/; if (-d "$repository_root/$name/_darcs") { printf $fh qq( \n); } } closedir(DH); print $fh "\n"; finish_xml($fh); return $fh; } # show an error page sub show_error { my ($type, $code, $message) = @_; my $xml; # set the xslt processing arguments my $xslt_args = qq { --stringparam error-type '$type' --stringparam stylesheet '$stylesheet' }; $xslt_args =~ s/\s+/ /gm; print "Status: $code $message\r\n\r\n"; system("$xslt_program $xslt_args $xslt_errors $xml_errors"); } # check if the requested resource has been modified since the client last # saw it. If not send HTTP status code 304, otherwise set the Last-modified # and Cache-control headers. sub is_cached { my ($path) = @_; my ($stat) = stat($path); # stat may fail because the path was renamed or deleted but still referred # to by older darcs patches if ($stat) { my $last_modified = CGI::expires($stat->mtime); if (http('If-Modified-Since') eq $last_modified) { print("Status: 304 Not Modified\r\n\r\n"); return 1; } print("Cache-control: max-age=0, must-revalidate\r\n"); print("Last-modified: $last_modified\r\n"); } return 0; } # safely extract a parameter from the http request. This applies a regexp # to the parameter which should group only the appropriate parameter value sub safe_param { my ($param, $regex, $default) = @_; my $value = CGI::Util::unescape(param($param)); return ($value =~ $regex) ? $1 : $default; } # common regular expressions for validating passed parameters my $hash_regex = qr/^([\w\-.]+)$/; my $path_regex = qr@^([^\\!\$\^&*()\[\]{}<>`|';"?\r\n]+)$@; # respond to a CGI request sub respond { # untaint the full URL to this CGI my $cgi_url = CGI::Util::unescape(url()); $cgi_url =~ $path_regex or die qq(bad url "$cgi_url"); $cgi_url = $1; # untaint script_name, reasonable to expect only \w, -, /, and . in the name my $script_name = CGI::Util::unescape(script_name()); $script_name =~ qr~^([\w/.\-\~]+)$~ or die qq(bad script_name "$script_name"); $script_name = $1; # untaint simple parameters, which can only have chars matching \w+ my $cmd = safe_param('c', '^(\w+)$', 'browse'); my $sort = safe_param('s', '^(\w+)$', ''); # set the xslt processing arguments my $xslt_args = qq { --stringparam cgi-program '$script_name' --stringparam cgi-url '$cgi_url' --stringparam sort-by '$sort' --stringparam stylesheet '$stylesheet' }; $xslt_args =~ s/\s+/ /gm; my ($path) = CGI::Util::unescape(path_info()); # don't allow ./ or ../ in paths $path =~ s|[.]+/||g; # check whether we're asking for styles.css if ($path eq '/styles.css') { return if is_cached($css_styles); open (STYLES_CSS, $css_styles) or die qq(couldn't open "${css_styles}"); my $size = stat($css_styles)->size; print "Content-length: $size\r\n"; print "Content-type: text/css\r\n\r\n"; while () { print $_; } close (STYLES_CSS); return; } # check whether we're asking for favicon.ico if ($path =~ '/[\w\-]+/favicon.ico') { return if is_cached($favicon); open (FAVICON, $favicon) or die qq(couldn't open "${favicon}"); my $size = stat($favicon)->size; print "Content-length: $size\r\n"; print "Content-type: image/x-icon\r\n\r\n"; while () { print $_; } close (FAVICON); return; } # when no repository is requested display available repositories if (length($path) < 2) { my $fh = transform($xslt_repos, $xslt_args); repo_listing($fh); return; } # don't allow any shell meta characters in paths $path =~ $path_regex or die qq(bad path_info "$path"); my @path = split('/', substr($1, 1)); # split the path into a repository, directory, and file my ($repo, $dir, $file, @bits) = ('', '', ''); while (@path > 0) { $repo = join('/', @path); # check if remaining path elements refer to a repo if (-d "${repository_root}/${repo}/_darcs") { if (@bits > 1) { $dir = join('/', @bits[0..$#bits - 1]); } $file = $bits[$#bits]; # check if last element of path, stored in $file, is really a dir if (-d (pristine_dir ($repo) . "/${dir}/${file}")) { $dir = ($dir ? "$dir/$file" : $file); $file = ''; } last; } else { $repo = ''; unshift(@bits, pop @path); } } # make sure the repository exists unless ($repo) { show_error('invalid-repository', '404', 'Invalid repository'); return; } # don't generate output unless the requested path has been # modified since the client last saw it. return if is_cached(pristine_dir ($repo) . "/$dir/$file"); # untaint patches and tags. Tags can have arbitrary values, so # never pass these unquoted, on pain of pain! my $patch = safe_param('p', $hash_regex); my $tag = safe_param('t', '^(.+)$'); my @darcs_args; push(@darcs_args, '--match', "hash $patch") if $patch; push(@darcs_args, '-t', $tag) if $tag; # process the requested command if ($cmd eq 'browse') { my $fh = transform($xslt_browse, $xslt_args); dir_listing($fh, $repo, $dir); } elsif ($cmd eq 'patches') { # patches as an option is used to support "--patches" if (my $patches = safe_param('patches','^(.+)$')) { push @darcs_args, '--patches', $patches; } my $fh = transform($xslt_patches, $xslt_args); darcs_xml($fh, $repo, "changes", \@darcs_args, $dir, $file); } elsif ($cmd eq 'annotate') { push(@darcs_args, '--summary'); my $creator_hash = safe_param('ch', $hash_regex); my $original_path = safe_param('o', $path_regex); my $fh = transform($xslt_annotate, $xslt_args); # use the creator hash and original file name when available so # annotations can span renames if ($creator_hash ne '' && $original_path ne '') { push(@darcs_args, '--creator-hash', $creator_hash); darcs_xml($fh, $repo, "annotate", \@darcs_args, '', $original_path); } else { darcs_xml($fh, $repo, "annotate", \@darcs_args, $dir, $file); } } elsif ($cmd eq 'diff') { push(@darcs_args, '-u'); print "Content-type: text/plain\r\n\r\n"; darcs(undef, $repo, "diff", \@darcs_args, $dir, $file); } elsif ($cmd eq 'rss') { push(@darcs_args, '--last', '25'); my $fh = transform($xslt_rss, $xslt_args, "application/rss+xml"); darcs_xml($fh, $repo, "changes", \@darcs_args, $dir, $file); } else { show_error('invalid-command', '400', 'Invalid command'); } } # run a self-test when the --check argument is supplied if ($ARGV[0] eq '--check') { (read_conf("css_styles", "abc") ne "abc") || die "cannot read config file: $!\n"; (`$darcs_program`) || die "cannot execute darcs as '$darcs_program': $!\n"; (`$xslt_program`) || die "cannot execute xstlproc as '$xslt_program': $!\n"; (-d $repository_root && -r $repository_root) || die "cannot read repository root directory '$repository_root': $!\n"; (-d $template_root && -r $template_root) || die "cannot read template root directory '$template_root': $!\n"; (-f $css_styles) || die "cannot read css stylesheet '$css_styles': $!\n"; (-f $xml_errors) || die "cannot read error messages '$xml_errors': $!\n"; exit 0; } # handle the CGI request respond();