#!/home/charlie/perl
#
# @(#)url_get.pl        1.3 2/9/94
# @(#)url_get.pl        1.3 /tmp_mnt/home/ccix/u16/graphics/staff/zippy/perl/SCCS/s.url_get.pl
#
# url_get.pl      --- get a document given a WWW URL
#
# Hacked by Stephane Bortzmeyer <bortzmeyer@cnam.cnam.fr> to add "ftp" URLs.
# 22 Jan 1994
#
# Jack Lund 9/3/93 zippy@ccwf.cc.utexas.edu
#
# from hget by:
# Oscar Nierstrasz 26/8/93 oscar@cui.unige.ch
#
# Syntax:
#
# &url_get($url,$prev [$file])
#
# $url - URL of document you want
#
# $prev = previous url
#
# $file - optional file you want it put into. Specify "&STDOUT" if you
#         want it to go to stdout; Leave this off if you want url_get to
#         return the document as one (possibly VERY LARGE) string
########################################################################

package url_get;

$home = $ENV{"HOME"};

require "chat2.pl";
require "url_parse.pl";

$ownername = 'charless@sco.com'; # HTTP From field
$UserAgent = 'websnarf/0.2 (web scan/mirror tool)';  # HTTP User-Agent field

sub url_get {
    local($url, $file) = @_;

# print "url_get: I have $url\n";

    undef ($protocol, $host, $port, $rest1, $rest2, $rest3);
    ($protocol, $host, $port, $rest1, $rest2, $rest3) = &url_parse'parse_url($url);
    if ($host eq "" ) { $host = $MAIN'lasthost};

    if ($protocol !~ /(http|file|news|ftp|telnet|gopher|wais)/) {
       print "&url_get(): no protocol specified!\n";
       print "protocol: $protocol\nhost: $host\nport: $port\nrest1: $rest1\nrest2: $rest2\n";
       return 1;
    };
    if ($host !~ /(\w+\.*)+/) {
       print "&url_get(): confused! host is $host ... returning\n";
       return 1;
    };

# print "url_get(): slurping <$rest1 $rest2 $rest3> via $protocol from host <$host>\n";

# Convert any characters in the string specified in hex by "%xx" to
# the correct character. Note we do this *after* parsing the URL!

    $rest1 =~ s/%(\w\w)/sprintf("%c", hex($1))/ge;

# print "Protocol is: $protocol\n";
    if ($protocol eq "http") {
        undef $foo;
        $foo = &url_get'http_get($host,$port,$rest1, $file);

        return $foo;
    }

    if ($protocol eq "gopher") {

# Convert from hex. See above.

        $rest2 =~ s/%(\w\w)/sprintf("%c", hex($1))/ge if ($rest2);
        $rest3 =~ s/%(\w\w)/sprintf("%c", hex($1))/ge if ($rest3);

        return &url_get'gopher_get($host, $port, $rest1, $rest2, $rest3, $file);
    }

    if ($protocol eq "file" || $protocol eq "ftp") {
        return &url_get'file_get($host, $port, $rest1, $file);
    }

    if ($protocol eq "news") {
        $result= &url_get'news_get($host, $port, $rest1, $file);
        return $result;
    }

    return "Protocol $protocol not supported!\n";
}

package url_get;     # Everything after this is "private"

sub http_get {
    local($host,$port,$request,$file) = @_;
    local($output) = "";

    if ($file) { open(OUT, ">$file") || return "Error opening output file $file: $!\n"; }
    $thisurl_plus = "$request HTTP/1.0\r\n";
    $thisurl_plus .= "User-Agent: $UserAgent\r\n";
    $thisurl_plus .= "From: $ownername\r\n";
    $thisurl_plus .= "Accept: text/*\r\n";
    $thisurl_plus .= "\r\n";

    ($handle = &chat'open_port($host, $port))
        || return "chat'open($host,$port): $!\n";
    &chat'print($handle,"GET $thisurl_plus\n")
        || return "chat'print(GET $thisurl_plus): $!\n";
    *S = *chat'S;
    $output = &listen(10);
    &chat'close($handle);
    close(OUT) if ($file);

    return $output ;
}

sub gopher_get {
    local($host,$port,$gtype,$selector,$search,$file) = @_;
    local($bintypes) = "59sgI";       # Binary gopher types
    local($goodtypes) = "01579sghI";  # types we can handle
    local($output) = "";

    if ($file) { open(OUT, ">$file") || return "Error opening output file $file: $!\n"; }
    $request = ($search ? "$selector\t$search\t$" : $selector);
    ($handle = &chat'open_port($host, $port))
        || return "chat'open($host,$port): $!\n";
    &chat'print($handle,"$request\n")
        || return "chat'print($request): $!\n";
        *S = *chat'S;

    if (index($goodtypes, $gtype) == -1) {
        return "Can't retrieve gopher type $gtype\n";
    }

# If this is a binary document, retreive it using sysreads rather
# than <S>

    if (index($bintypes, $gtype) > -1) {
        $done = 0;
        $rmask = "";
        vec($rmask,fileno(S),1) = 1;
        do {
            ($nfound, $rmask) =
                select($rmask, undef, undef, $timeout);
            if ($nfound) {
                $nread = sysread(S, $thisbuf, 1024);
                if ($nread > 0) {
                    $output .= $thisbuf;
                    if ($file)
                    {
                        syswrite(OUT, $thisbuf, $nread)
                            || return "Syswrite: $!\n";
                    } else {
                        $output .= $thisbuf;
                    }
                } else {
                    $done++;
                }
            } else {
                warn "Timeout\n"; $done++;
            }
        } until $done;
    }

# This is an ASCII document, and we can get it line-by-line using <S>

    else {
       $output= &listen(10);
    }
    &chat'close($handle);
    close(OUT) if ($file);
    return($output) unless ($file);
}

sub file_get {
    local($host, $port, $path, $file) = @_;
    local($error);
    local($output) = "";

    $localhost = `hostname`;
    if ($host eq $localhost && !defined($port)) {
        open(IN, $path) || return "$path: $!\n";
        $binary = (-B IN ? 1 : 0);
        if ($file) { open(OUT, ">$file") || return "Error opening output file $file: $!\n"; }
        if ($binary)
        {
            $done = 0;
            $rmask = "";
            vec($rmask,fileno(S),1) = 1;
            do {
                ($nfound, $rmask) =
                    select($rmask, undef, undef, $timeout);
                if ($nfound) {
                    $nread = sysread(S, $thisbuf, 1024);
                    if ($nread > 0) {
                        if ($file)
                        {
                            syswrite(OUT, $thisbuf, $nread)
                                || return "Syswrite: $!\n";
                        } else { $output .= $thisbuf; }
                    } else {
                        $done++;
                    }
                } else {
                    warn "Timeout\n"; $done++;
                }
            } until $done;
        }
        else
        {
            while (<IN>) {
                if ($file) { print OUT "$_\n"; }
                else { $output .= "$_\n"; }
            }
        }
        close(IN);
        close(OUT) if ($file);
    }
    else {
      ($handle = &chat'open_port($host, $port))
          || return "chat'open($host,$port): $!\n";
      &chat'print($handle,"GET $request\n")
          || return "chat'print(GET $request): $!\n";
      *S = *chat'S;
      $output = &listen(10);
      &chat'close($handle);
      close(OUT) if ($file);
      return($output) unless ($file);
}
#        &ftp'open($host) || return "Unable to open ftp connection to $host\n";
#        if ($opt_b && ! &ftp'type("I")) {
#            $error=&ftp'error;
#            return "$error\n";
#        }
#        if ($file) { &ftp'get_file($path, $file)
#            || return "Unable to get file $path from $host\n"; }
#        else { $output = &ftp'get($path)
#            || return "Unable to get file $path from $host\n"; }
#        &ftp'close;
#    }
#    return($output) unless ($file);
}

sub news_get {
    local($host, $port, $article) = @_;
    local($output) = "";

    if ($file) { open(OUT, ">$file") || return "Error opening output file $file: $!\n"; }
    ($handle = &chat'open_port($host, $port))
        || return "chat'open($host,$port): $!\n";

    if ($article =~ /^[^<].+@.+[^>]$/) {
        $request = "article <$article>";
    }
    elsif ($article =~ /^<.+@.+>$/) {
        $request = "article $article";
    }
    elsif ($article =~ /^\*$/) {
        return "Only support URLs of the form: news:article\n";
    }
    elsif ($article) {
        return "Only support URLs of the form: news:article\n";
    }
    else {
        return "Bad url\n";
    }

# Read NNTP Connect message

    *S = *chat'S;
    $string = <S>;
    $string =~ /^(\d*) (.*)$/;
    return "NNTP Error: $2\n" unless ($1 eq '200');

# Send request

    &chat'print($handle,"$request\r\n")
        || return "chat'print($request): $!\n";

# Read reply message

    $string = <S>;
    $string =~ /^(\d*) (.*)$/;
    return "NNTP Error: $2\n" unless ($1 eq '220');

# Get article

    $output = &listen(10);
    &chat'print($handle,"quit\n")
        || return "chat'print(quit): $!\n";
    &chat'close($handle);
    close(OUT) if ($file);
    return($output) unless ($file);
}
sub listen {
        local($secs) = @_;
        local($return,$tmp) = "";
        while (length($tmp = &chat'expect($secs, '(.|\n)+', '$&'))) {
                print $tmp if $trace;
                $return .= $tmp;
                (return $return) if (length($return) > 100000);
        }
        $return;
}


#}
