#!/usr/bin/perl -w # # webwatch - Check web pages against local templates. # # Copyright (C) 2001 Steven Pritchard # This program is free software; you can redistribute it # and/or modify it under the same terms as Perl itself. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # $Id: webwatch,v 1.1 2001/07/31 20:20:48 steve Exp $ # This script will probably work with anything since 5.002 # or so, but it has only been tested with 5.005 (and above). require 5.005; use strict; use Carp; use FileHandle; use DirHandle; use Getopt::Std; use LWP::UserAgent; use vars qw(%opt $dir $file $ua $url); my $VERSION="0.01"; sub mktmp($$); sub recurse_into($); sub debug(@); #### # # Process the command line. # # Get the command-line options. getopts('t:', \%opt); # If a timeout was specified with -t, make sure it is valid. if (defined($opt{'t'})) { delete $opt{'t'} if ($opt{'t'}!~/^\d+$/); } # Find files &recurse_into("."); #### # # Subs/functions. # sub fetch_url($) { my $url=shift; #### # # Set up the LWP::UserAgent object. # $ua=new LWP::UserAgent; $ua->agent("webwatch/$VERSION " . $ua->agent); $ua->env_proxy; # Get the proxy settings from %ENV. $ua->timeout($opt{'t'}) if (defined($opt{'t'})); # Set the timeout for -t. #### # # Build the request. # my $request=new HTTP::Request("GET", $url); #### # # Send the request and read the response. # my $response=$ua->request($request); if ($response->is_success) { return $response->content; } else { # For whatever reason, we couldn't get the file... warn "Failed to get '$url': ", $response->status_line, "\n"; return ""; } } sub recurse_into($) { my $dir=shift; my $path="$dir/"; $path=~s,^\./,,; my $dh=new DirHandle $dir; for my $child ($dh->read) { next if ($child eq "." or $child eq ".."); if (-d "$dir/$child") { debug "Descending into $dir/$child...\n"; &recurse_into("$dir/$child"); } elsif (-f "$dir/$child") { next if ($dir eq "."); debug "Found $path$child...\n"; # Read file my $fh=new FileHandle "<$dir/$child"; if (!$fh) { warn "Failed to open $dir/$child: $!\n"; next; } local $/=undef; my $regex=<$fh>; close($fh); my $url="http://$path$child"; my $file=fetch_url($url); if (!$file) { next; } debug "Got $url (", length($file), " bytes)!\n"; if ($file!~/^$regex$/s) { print STDERR "$url has changed!\n"; } else { debug "$url unchanged...\n"; } } else { print STDERR "$path$child is not a regular file, skipping...\n"; next; } } } sub debug(@) { print STDERR @_ if ($ENV{'DEBUG'}); } # Print usage message and exit. sub usage { my $me=$0; $me=~s/^.*\///; print STDERR "Usage: $me [ -t ] url\n"; exit 1; } __END__ =head1 NAME webwatch - Check web pages against local templates. =head1 SYNOPSIS B [ C<-t> I ] =head1 DESCRIPTION =head1 OPTIONS =over 9 =item C<-t> I set the timeout to I =back =head1 NOTES =head1 SEE ALSO L, L, L, L, L, L =head1 AUTHOR Steven Pritchard > =cut