#!/bin/perl4.036 # #
# webfind
#
#
$home=$ENV{HOME};
$debug=1;
$debug=$ENV{DEBUG}; # overrides default level of 1
# $|=1;
$scratchfile = "$$$.webfind";
#
require "chat2.pl";
require "url_parse.pl";
require "url_dump.pl";
require "url_get.pl";
require "url_exclude.pl";
$SIG{'INT'}='report';
$SIG{'HUP'}='report';
$SIG{'QUIT'}='report';
$wfurl = "(file|http|ftp|news|telnet)\:\/\/(.+\.*)+(:[0-9]+)?(\/)*(.+\/*)+" ;
#well formed URL
$wfhst = "(file|http|ftp|news|telnet)\:\/\/(.+\.*)+(:[0-9]+)?\/" ; #well formed server
$wfdn = "\/\/(.+\.*)+\/" ; #well formed domain
$maxdepth = 3;
$breadth = $start = $domain =
$host = $file = $grep = $agrep = $url = $print = "";
$depth = $get = $total_visited = $exec = "0";
$lasthost = "localhost"; # initial setting only!
#
@cmd_parms = @ARGV;
$start=@ARGV[$#ARGV];
print "I think you want me to fetch $start\n";
$start_depth=1;
$thisurl = $start;
#$doc= &url_get'url_get($thisurl);
#print "+++ Read $thisurl\n";
#$file_text = $doc;
$pres = &url_dump'do_get($debug, $thisurl, $doc) ;
exit (0);
#------------------ SUPPORT FUNCTIONS ------------------------------
sub listen {
local($secs) = @_;
local($return,$tmp) = "";
while (length($tmp = &chat'expect($secs, '(.|\n)+', '$&'))) {
print $tmp if $trace;
$return .= $tmp;
(return $return) if (length($return) > 100000);
}
$return;
}
# unverified
sub getnext {
if ($depth == 1) { # depth-first == stackwise
for ($i=1;$i>=0;$i--) {
@ret[$i] = pop(@tour_list);
}
} else { # breadth-first == queuewise
for ($i=0;$i<=1;$i++) {
@ret[$i] = shift(@tour_list);
}
}
if ((@ret[1] !~ /\d+/) ||
(@ret[1] =~ /href=(\"*)([^\"]+(\"*)).*A>/gi) ) {
print "HORRID WEIRDNESS DETECTED IN &getnext(); KLUGING\n";
$tmp1 = @ret[0]; @ret[0] = @ret[1]; @ret[1] = $tmp1;
};
print "&getnext(): returns url @ret[0], depth @ret[1] \n";
return @ret ;
}
sub putnext {
local(@item)=@_ ;
foreach $element (@item) {
push(@tour_list, $element) || print "putnext(); PUSH FAILED \n";
}
print "&putnext(): pushed url @item[0], depth @item[1]\n";
$totalpushed++;
}
#