# Perl Routines to Manipulate CGI input # S.E.Brenner@bioc.cam.ac.uk # $Id: cgi-lib.pl,v 2.14 1996/10/20 12:41:02 brenner Exp $ # # Copyright (c) 1996 Steven E. Brenner # Unpublished work. # Permission granted to use and modify this library so long as the # copyright above is maintained, modifications are documented, and # credit is given for any use of the library. # # Thanks are due to many people for reporting bugs and suggestions # especially Meng Weng Wong, Maki Watanabe, Bo Frese Rasmussen, # Andrew Dalke, Mark-Jason Dominus, Dave Dittrich, Jason Mathews # For more information, see: # http://www.bio.cam.ac.uk/cgi-lib/ $cgi_lib'version = sprintf("%d.%02d", q$Revision: 2.14 $ =~ /(\d+)\.(\d+)/); # Parameters affecting cgi-lib behavior # User-configurable parameters affecting file upload. $cgi_lib'maxdata = 131072; # maximum bytes to accept via POST - 2^17 $cgi_lib'writefiles = 0; # directory to which to write files, or # 0 if files should not be written $cgi_lib'filepre = "cgi-lib"; # Prefix of file names, in directory above # Do not change the following parameters unless you have special reasons $cgi_lib'bufsize = 8192; # default buffer size when reading multipart $cgi_lib'maxbound = 100; # maximum boundary length to be encounterd $cgi_lib'headerout = 0; # indicates whether the header has been printed # ReadParse # Reads in GET or POST data, converts it to unescaped text, and puts # key/value pairs in %in, using "\0" to separate multiple selections # Returns >0 if there was input, 0 if there was no input # undef indicates some failure. # Now that cgi scripts can be put in the normal file space, it is useful # to combine both the form and the script in one place. If no parameters # are given (i.e., ReadParse returns FALSE), then a form could be output. # If a reference to a hash is given, then the data will be stored in that # hash, but the data from $in and @in will become inaccessable. # If a variable-glob (e.g., *cgi_input) is the first parameter to ReadParse, # information is stored there, rather than in $in, @in, and %in. # Second, third, and fourth parameters fill associative arrays analagous to # %in with data relevant to file uploads. # If no method is given, the script will process both command-line arguments # of the form: name=value and any text that is in $ENV{'QUERY_STRING'} # This is intended to aid debugging and may be changed in future releases sub ReadParse { local (*in) = shift if @_; # CGI input local (*incfn, # Client's filename (may not be provided) *inct, # Client's content-type (may not be provided) *insfn) = @_; # Server's filename (for spooled files) local ($len, $type, $meth, $errflag, $cmdflag, $perlwarn, $got); # Disable warnings as this code deliberately uses local and environment # variables which are preset to undef (i.e., not explicitly initialized) $perlwarn = $^W; $^W = 0; binmode(STDIN); # we need these for DOS-based systems binmode(STDOUT); # and they shouldn't hurt anything else binmode(STDERR); # Get several useful env variables $type = $ENV{'CONTENT_TYPE'}; $len = $ENV{'CONTENT_LENGTH'}; $meth = $ENV{'REQUEST_METHOD'}; if ($len > $cgi_lib'maxdata) { #' &CgiDie("cgi-lib.pl: Request to receive too much data: $len bytes\n"); } if (!defined $meth || $meth eq '' || $meth eq 'GET' || $type eq 'application/x-www-form-urlencoded') { local ($key, $val, $i); # Read in text if (!defined $meth || $meth eq '') { $in = $ENV{'QUERY_STRING'}; $cmdflag = 1; # also use command-line options } elsif($meth eq 'GET' || $meth eq 'HEAD') { $in = $ENV{'QUERY_STRING'}; } elsif ($meth eq 'POST') { if (($got = read(STDIN, $in, $len) != $len)) {$errflag="Short Read: wanted $len, got $got\n";}; } else { &CgiDie("cgi-lib.pl: Unknown request method: $meth\n"); } @in = split(/[&;]/,$in); push(@in, @ARGV) if $cmdflag; # add command-line parameters foreach $i (0 .. $#in) { # Convert plus to space $in[$i] =~ s/\+/ /g; # Split into key and value. ($key, $val) = split(/=/,$in[$i],2); # splits on the first =. # Convert %XX from hex numbers to alphanumeric $key =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; $val =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; # Associate key and value $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator $in{$key} .= $val; } } elsif ($ENV{'CONTENT_TYPE'} =~ m#^multipart/form-data#) { # for efficiency, compile multipart code only if needed $errflag = !(eval <<'END_MULTIPART'); local ($buf, $boundary, $head, @heads, $cd, $ct, $fname, $ctype, $blen); local ($bpos, $lpos, $left, $amt, $fn, $ser); local ($bufsize, $maxbound, $writefiles) = ($cgi_lib'bufsize, $cgi_lib'maxbound, $cgi_lib'writefiles); # The following lines exist solely to eliminate spurious warning messages $buf = ''; ($boundary) = $type =~ /boundary="([^"]+)"/; #"; # find boundary ($boundary) = $type =~ /boundary=(\S+)/ unless $boundary; &CgiDie ("Boundary not provided: probably a bug in your server") unless $boundary; $boundary = "--" . $boundary; $blen = length ($boundary); if ($ENV{'REQUEST_METHOD'} ne 'POST') { &CgiDie("Invalid request method for multipart/form-data: $meth\n"); } if ($writefiles) { local($me); stat ($writefiles); $writefiles = "/tmp" unless -d _ && -r _ && -w _; # ($me) = $0 =~ m#([^/]*)$#; $writefiles .= "/$cgi_lib'filepre"; } # read in the data and split into parts: # put headers in @in and data in %in # General algorithm: # There are two dividers: the border and the '\r\n\r\n' between # header and body. Iterate between searching for these # Retain a buffer of size(bufsize+maxbound); the latter part is # to ensure that dividers don't get lost by wrapping between two bufs # Look for a divider in the current batch. If not found, then # save all of bufsize, move the maxbound extra buffer to the front of # the buffer, and read in a new bufsize bytes. If a divider is found, # save everything up to the divider. Then empty the buffer of everything # up to the end of the divider. Refill buffer to bufsize+maxbound # Note slightly odd organization. Code before BODY: really goes with # code following HEAD:, but is put first to 'pre-fill' buffers. BODY: # is placed before HEAD: because we first need to discard any 'preface,' # which would be analagous to a body without a preceeding head. $left = $len; PART: # find each part of the multi-part while reading data while (1) { die $@ if $errflag; $amt = ($left > $bufsize+$maxbound-length($buf) ? $bufsize+$maxbound-length($buf): $left); $errflag = (($got = read(STDIN, $buf, $amt, length($buf))) != $amt); die "Short Read: wanted $amt, got $got\n" if $errflag; $left -= $amt; $in{$name} .= "\0" if defined $in{$name}; $in{$name} .= $fn if $fn; $name=~/([-\w]+)/; # This allows $insfn{$name} to be untainted if (defined $1) { $insfn{$1} .= "\0" if defined $insfn{$1}; $insfn{$1} .= $fn if $fn; } BODY: while (($bpos = index($buf, $boundary)) == -1) { die $@ if $errflag; if ($name) { # if no $name, then it's the prologue -- discard if ($fn) { print FILE substr($buf, 0, $bufsize); } else { $in{$name} .= substr($buf, 0, $bufsize); } } $buf = substr($buf, $bufsize); $amt = ($left > $bufsize ? $bufsize : $left); #$maxbound==length($buf); $errflag = (($got = read(STDIN, $buf, $amt, $maxbound)) != $amt); die "Short Read: wanted $amt, got $got\n" if $errflag; $left -= $amt; } if (defined $name) { # if no $name, then it's the prologue -- discard if ($fn) { print FILE substr($buf, 0, $bpos-2); } else { $in {$name} .= substr($buf, 0, $bpos-2); } # kill last \r\n } close (FILE); last PART if substr($buf, $bpos + $blen, 4) eq "--\r\n"; substr($buf, 0, $bpos+$blen+2) = ''; $amt = ($left > $bufsize+$maxbound-length($buf) ? $bufsize+$maxbound-length($buf) : $left); $errflag = (($got = read(STDIN, $buf, $amt, length($buf))) != $amt); die "Short Read: wanted $amt, got $got\n" if $errflag; $left -= $amt; undef $head; undef $fn; HEAD: while (($lpos = index($buf, "\r\n\r\n")) == -1) { die $@ if $errflag; $head .= substr($buf, 0, $bufsize); $buf = substr($buf, $bufsize); $amt = ($left > $bufsize ? $bufsize : $left); #$maxbound==length($buf); $errflag = (($got = read(STDIN, $buf, $amt, $maxbound)) != $amt); die "Short Read: wanted $amt, got $got\n" if $errflag; $left -= $amt; } $head .= substr($buf, 0, $lpos+2); push (@in, $head); @heads = split("\r\n", $head); ($cd) = grep (/^\s*Content-Disposition:/i, @heads); ($ct) = grep (/^\s*Content-Type:/i, @heads); ($name) = $cd =~ /\bname="([^"]+)"/i; #"; ($name) = $cd =~ /\bname=([^\s:;]+)/i unless defined $name; ($fname) = $cd =~ /\bfilename="([^"]*)"/i; #"; # filename can be null-str ($fname) = $cd =~ /\bfilename=([^\s:;]+)/i unless defined $fname; $incfn{$name} .= (defined $in{$name} ? "\0" : "") . (defined $fname ? $fname : ""); ($ctype) = $ct =~ /^\s*Content-type:\s*"([^"]+)"/i; #"; ($ctype) = $ct =~ /^\s*Content-Type:\s*([^\s:;]+)/i unless defined $ctype; $inct{$name} .= (defined $in{$name} ? "\0" : "") . $ctype; if ($writefiles && defined $fname) { $ser++; $fn = $writefiles . ".$$.$ser"; open (FILE, ">$fn") || &CgiDie("Couldn't open $fn\n"); binmode (FILE); # write files accurately } substr($buf, 0, $lpos+4) = ''; undef $fname; undef $ctype; } 1; END_MULTIPART if ($errflag) { local ($errmsg, $value); $errmsg = $@ || $errflag; foreach $value (values %insfn) { unlink(split("\0",$value)); } &CgiDie($errmsg); } else { # everything's ok. } } else { &CgiDie("cgi-lib.pl: Unknown Content-type: $ENV{'CONTENT_TYPE'}\n"); } # no-ops to avoid warnings $insfn = $insfn; $incfn = $incfn; $inct = $inct; $^W = $perlwarn; return ($errflag ? undef : scalar(@in)); } # PrintHeader # Returns the magic line which tells WWW that we're an HTML document sub PrintHeader { return "Content-type: text/html\n\n"; } # HtmlTop # Returns the
of a document and the beginning of the body # with the title and a body