################################################################### # httplib.pl # # Robert Bishop, Kelly Hatcher # 12/95 # # httplib is a command line http client. It is designed to # make http calls just like any browser with a bit more flexibility # such as calling just the HEAD and easily setting the timeout. # # Note: If you wish to use a proxy server you must set the # global variable $proxy_server in the calling routine. # the format is "proxy_servername.my.domain:proxyport" # # Note: If you set $localhost with a fully qualifed domain name # in the calling routine you MAY see a slighter faster speed # on the first call. # # Note: If you set $explicit_header in the calling routine # you will receive EXACTLY the info from the HTTP get # and you MAY see a slighter faster speed # # Note: If you set $useragent in the calling routine # you will be properly identified, otherwise you will get # the ht'client identification. # # Updated 01/22/96 - Added POST capability - RB # Updated 04/11/96 - Added explicit_header capability - KDH # Updated 04/11/96 - FIXED "HTTP 1.0" to "HTTP/1.0" - KDH # Updated 09/04/96 - FIXED localization problem for perl5 - RB # Updated 10/07/96 - Fixed no timeout problem on partial xfers - DWH # Updated 12/13/96 - ADDED global pre-define of $useragent if desired # Modified for speed; added o to all matches # ADDED &ht_prep - KDH # Updated 12/19/96 - Added a second SIGALRM timer for docs once the # header has been retrieved - RB # Updated 12/23/96 - Fixed another localization problem # Cleaned up timeout errors - RB # Updated 1/7/97 - Fixed Document timeout to actually work - KDH # Updated 4/22/97 - Expanded "Error:" line contents - KDH # Updated 4/24/97 - Added capability to use a proxy-server - KDH #################################################################### package httplib; #################################################################### # ht_client # # Usage: # require "httplib.pl" # (HEADER,PAGE) = &ht_client(URL,METHOD,TIMEOUT,DOC_TIMEOUT); # # HEADER = variable to return http header to. # PAGE = variable to return http file to. # URL = the url to gather (send absolute url i.e. http://....) # If there is POST information, attach it after the URL delimited # with a ?. # METHOD = HEAD,GET or POST. If not provided, default is GET. # TIMEOUT = Number of seconds to wait for connection. # Default is $timeout. # DOC_TIMEOUT = Number of seconds to wait for the data portion of the # document once the header has been retrieved. Default is # $doc_timeout. ##################################################################### sub main'ht_client { &ht_prep unless $ht_prepped; # Do one-time initialzation stuff # Clean up from previous runs undef($url_in,$method,$timeout,$doc_timeout,$header,$full_header,$page,$error,$status,$seconds); undef(@ht_take); local($header,$full_header,$page,$error,$status,$seconds); local(@ht_take); local($url_in,$method,$timeout,$doc_timeout) = @_; ##### Defaults ##### $method = "GET" unless $method; $timeout = 60 unless $timeout; $doc_timeout = 300 unless $doc_timeout; $host = "localhost"; $port = 80; $request = "/"; $seperator = $/; ###### end defaults ##### if (defined($main::proxy_server)) { ($host,$port) = ($main::proxy_server =~ m#^([a-z\.0-9\-\_]*)\:?([0-9]*)#io); $port = 80 if $port <= 1; $request = $url_in; } elsif ($url_in =~ m#^http://([a-z\.0-9\-\_]*):([0-9]*)(/.*)#io) { $host = $1; $port = $2; $request = $3; } elsif ($url_in =~ m#^http://([a-z\.0-9\-\_]*):([0-9]*)#io) { $host = $1; $port = $2; } elsif ($url_in =~ m#^http://([a-z\.0-9\-\_]*)(/.*)#io) { $host = $1; $request = $2; } elsif ($url_in =~ m#^http://([a-z\.0-9\-\_]*)#io) { $host = $1; } else { $error = "Error: Deformed URL\n"; } # If there is any stuff, strip it ($request) = split(/\#/,$request); # If this is a POST, separate the POST from the request if ($method =~ /POST/ && $request =~ m/\?/o) { $post_text = $'; $request = $`; } if ($host =~ /\d+\.\d+\.\d+\.\d+/o ) { ($a, $b, $c, $d) = split(/\./, $host); $thataddr = pack('C4', $a, $b, $c, $d); } else { unless (defined $name_to_addr{$host}) { ($name_to_addr{$host}) = (gethostbyname($host))[4]; } $thataddr = $name_to_addr{$host}; } if (!$thataddr) { $error = "Error: Cannot resolve hostname $host\n"; } # $that_ip_addr = join '.', unpack(C4,$thataddr); $thatsock = pack($sockaddr, $af_inet, $port, $thataddr); # The clock is ticking - get the document before $timeout expires $SIG{'ALRM'} = "httplib'timeout"; if (!$main'explicit_header) { $starttime = time; } alarm($timeout); if (!$error) { if(socket(FS,$af_inet,$sock_stream,$proto)) { if(bind(FS, $thissock)) { if(connect(FS,$thatsock)) { select(FS); $| = 1; select(STDOUT); print FS "$method "; # HEAD,GET,POST print FS "$request "; # Path print FS "$version\r\n"; # HTTP/1.0 print FS "Accept: */*\r\n"; # Take whatever comes back print FS "User-agent: $useragent\r\n"; # This tool print FS "From: $from\r\n"; # Network host if (defined $post_text) { print FS "Content-type: application/x-www-form-urlencoded\r\n"; print FS "Content-length: ", length $post_text, "\r\r\n\n"; print FS "$post_text\r\n"; } print FS "\r\n"; # Process the MIME header $/ = "\n"; $_ = ; $status = $_; while() { last if /^[\r\n]+$/; # end of header $header .= $_; } alarm($doc_timeout); # ReSet the timer to Document timeout, from connection timeout. # We are retrieving the doc data now. # This is the file after the header if (!defined($max_page_lines)) { undef ($/); $page = ; } else { my ($line_counter) = 0; while () { $line_counter++; last if $line_counter >= $max_page_lines; $page .= $_; } shutdown(FS, 2); } } else { $error = "Error: Connect - $!\n"; } } else { $error = "Error: Bind - $!\n"; } } else { $error = "Error: Socket - $!\n"; } } alarm(0); $SIG{'ALRM'} = "IGNORE"; # Close the second timer (or the first if we fell out of the ifs) close(FS); if (!$main'explicit_header) { $stoptime = time; $seconds = $stoptime - $starttime; $error = "Error: Timeout\n" if $seconds == $timeout; $error = "Error: Moved\n" if $status =~ m#HTTP\S*\s+30[12]#io; $error = "Error: Access Denied\n" if $status =~ m#HTTP\S*\s+40[013]#io; $error = "Error: Not Found\n" if $status =~ m#HTTP\S*\s+(404|501|503)#io; $error = "Error: Server Error\n" if $status =~ m#HTTP\S*\s+50[02]#io; $error = "Error: None\n" unless $error; $status = "Error\n" unless $status; $full_header = "$error"."Status: $status"."Received-time: $seconds\n"."$header"; } else { $full_header = "$status"."$header"; $full_header = "$error"."$full_header" if $error; } @ht_take = ($full_header,$page); $/ = $seperator; return @ht_take; } sub ht_prep { # If you have it, use it. Otherwise try the defaults # If we are using perl5 use Socket; # Otherwise .... # require "sys/socket.ph"; $af_inet = &AF_INET; $sock_stream = &SOCK_STREAM; # v--- THE DEFAULTS #$af_inet = 2; #$sock_stream = 1; if ($main'localhost) { # You can make this global if you want! $thishost = $main'localhost; } else { open(THISHOST,"-|") || exec("hostname"); $thishost = ; close(THISHOST); } $sockaddr = "S n a4 x8"; $ver = "1.2"; $version = "HTTP/1.0"; $useragent = "ht_client V$ver - $thishost" unless $useragent; $from = "$thishost"; ($proto) = (getprotobyname("tcp"))[2]; ($thisaddr) = (gethostbyname($thishost))[4]; $thissock = pack($sockaddr, $af_inet, 0, $thisaddr); $ht_prepped = "Done"; } sub timeout { shutdown(FS, 2); $status = "Status: Timeout\n"; } 1;