#!/usr/local/bin/perl -w # # getfile - GETs a file using LWP::UserAgent # # Copyright (C) 2000-2004 Steven Pritchard # This program is free software; you can redistribute it # and/or modify it under the same terms as Perl itself. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # $Id: getfile,v 1.7 2004/04/24 15:57:01 steve Exp $ # This script will probably work with anything since 5.002 # or so, but it has only been tested with 5.005 and higher. require 5.005; use strict; use Carp; use FileHandle; use Getopt::Std; use LWP::UserAgent; use vars qw(%opt $dir $file $ua $url); sub usage(); sub mktmp($$); my $VERSION="0.02"; #### # # Process the command line. # # Get the command-line options. getopts('fo:t:', \%opt); # If an output file was specified with -o, # break it into directory and file parts. if (defined($opt{'o'})) { ($dir,$file)=$opt{'o'}=~/^(.*\/)?([^\/]*)$/; } # If a timeout was specified with -t, make sure it is valid. if (defined($opt{'t'})) { delete $opt{'t'} if ($opt{'t'}!~/^\d+$/); } # Assume the directory should be "." if it wasn't specified. $dir="." if (!$dir); # The remaining arguments should be the URL to fetch. $url=shift or &usage; # If a file wasn't specified with -o # (or what was specified was invalid), # get the output filename from the URL. if (!$file) { if ($url=~m{^ \w+:\/\/ # method:// (.+?:.+?\@)? # user:password@ [\w\.\-]+ # host (:\d+)? # :port \/(.*\/)? # /path/ ([^\/]*?)? # file (\?.*)? # ?args $}x) { $file=$4; if (!$file) { carp "couldn't find filename in URL, using 'index.html'.\n"; $file="index.html"; } } else { croak "invalid URL: '$url'\n"; } } #### # # Set up the LWP::UserAgent object. # $ua=new LWP::UserAgent; $ua->agent("getfile/$VERSION " . $ua->agent); $ua->env_proxy; # Get the proxy settings from %ENV. $ua->timeout($opt{'t'}) if (defined($opt{'t'})); # Set the timeout for -t. #### # # Build the request. # my $request=new HTTP::Request("GET", $url); # Set the if_modified_since header to the last # modified time of the file if -f wasn't specified. my @statbuf; if (!$opt{'f'} && (@statbuf=stat("$dir/$file"))) { $request->if_modified_since($statbuf[9]) if (@statbuf); } #### # # Send the request and read the response. # my $response=$ua->request($request); # Follow redirects, but don't loop. my @locations=($url); while ($response->code eq "301" or $response->code eq "302") { $url=$response->header('Location'); croak "Redirect loop detected!\n ".join("\n ", @locations, $url)."\n" if (grep { $url eq $_ } @locations); push(@locations, $url); $request=new HTTP::Request("GET", $url); if (!$opt{'f'} && (@statbuf=stat("$dir/$file"))) { $request->if_modified_since($statbuf[9]) if (@statbuf); } $response=$ua->request($request); } if ($response->is_success) { # We got an answer, so write to a temp file... my ($tmp, $fh)=mktmp($dir, $file); croak "Couldn't create temporary file: $!\n" if (!defined($fh)); print $fh $response->content; close($fh); # Reset the last modified time of our local copy # to the last modified time of the remote copy. my $last_modified=$response->last_modified; utime(time, $last_modified, $tmp) if ($last_modified); # Move our temp file into place, # or remove the temp file and exit. if (!rename($tmp, "$dir/$file")) { unlink($tmp); croak "Couldn't rename '$tmp' to '$dir/$file': $!\n"; } } elsif ($response->code eq "304") { # 304 is a "Not Modified" response. print "$file is up to date.\n"; exit 0; } else { # For whatever reason, we couldn't get the file... croak "failed to get '$url': ", $response->status_line, "\n"; } #### # # Subs/functions. # # Print usage message and exit. sub usage() { my $me=$0; $me=~s/^.*\///; print STDERR "Usage: $me [ -f ] [ -o ] [ -t ] url\n"; exit 1; } # Build a temporary file name and open it for writing. sub mktmp($$) { my ($dir, $file)=@_; my ($name,$fh); # Try 10 times to build a temporary file and open it. # (The down side to this is that if we don't have # write permissions to the directory, we waste time # trying 10 times to create the file.) for (my $n=0;$n<10;$n++) { my $suffix; for (my $m=0;$m<5;$m++) { # This creates a 5 character string of # uppercase characters, i.e. "ABCDE". $suffix.=chr(int(rand(26))+65); } $name="$dir/.$file.$suffix"; # BTW, this almost definitely isn't safe over NFS. $fh=new FileHandle $name, O_CREAT|O_EXCL|O_WRONLY; last if (defined($fh)); } return ($name,$fh); } __END__ =head1 NAME getfile - mirrors a file using LWP::UserAgent =head1 SYNOPSIS B [ C<-f> ] [ C<-o> I ] [ C<-t> I ] I =head1 DESCRIPTION B fetches the file specified by the I command-line argument. If the local file exists, it only fetches the remote file if it is newer than the local file. =head1 OPTIONS =over 9 =item C<-f> force update of the local file even if it is up-to-date =item C<-o> I use I as the local filename =item C<-t> I set the timeout to I =back =head1 NOTES The main advantage of this program versus more functional programs like L or L is simplicity. It is functionally equivalent, more-or-less, to L, but it works around (what I consider to be) bugs in mirror() in L. =head1 SEE ALSO L, L, L, L, L, L =head1 AUTHOR Steven Pritchard > =cut