#!/usr/bin/perl # ************************************************************************** # # HREF.PRL written by Chris Evans (C.Evans@sghms.ac.uk) # # # # program finds all *.htm files off a certain root directory, # # and checks all their href.prl writtend by Chris Evans C.Evans@sghms.ac.uk END $startdir = "/mhs/psychotherapy"; if ($startdir =~ /\/$/) { chop($startdir); } $www_offset = "/usr1/www/pages"; if ($www_offset =~ /\/$/) { chop($www_offset); } # sort out the location that the server will show for files $http_rep_dir = $outputdir; $http_rep_dir =~ s|$www_offset||o; # o switch as the value of $www_offset is fixed # set up the associative arrays used to store the various sorts of calls undef %mail; undef %href; undef %http; undef %file; if ($RELWARN) { open (RELFILE, ">dir/relative.lst") || die "Couldn't open relative.lst: $!\n"; } open (LOCALS,">dir/locals.htm") || die "Couldn't open dir/locals.lst: $!\n"; $title = "Listing of local HREF calls made using =\"http:\" syntax"; print LOCALS $head; open (PROBFILE, ">dir/problem.htm") || die "Couldn't open dir/problem.lst: $!\n"; $title = "Listing of probably faulty HREF calls to local files"; print PROBFILE $head; open (LFILE, ">dir/lfiles.htm") || die "Couldn't open dir/lfiles.lst: $!\n"; $title = "Listing of local HREF calls"; #### start main loop through all *.htm files in this directory and below open(FIND, "find . -name \"*.htm\" -print |") || die "Couldn't run find: $!\n"; while ($filename = ) { if ($filename ne "./dir/problem.htm") { open (FILE, $filename) || warn "Can't open $filename: $!\n"; if ($VERBOSE) { print "Opened file $filename\n"; } $readnum++; $file_dir = &dir($filename); $file_dir =~ s/^.\///; $file_dir = $www_offset.$startdir."/".$file_dir; $linenum = 0; while () { $linenum++; while (/hrefs.lst") || die "Can't open all_hrefs.lst: $!\n"; foreach $href (sort keys(%href)) { print HREFFILE "$href:\n"; $locations = $href{$href}; foreach $location (split(/;/, $locations)) { ($file,$line) = split(/\*/, $location); print HREFFILE " $file line number: $line\n"; } } close (HREFFILE); open (MAILFILE, ">mailto.lst") || die "Can't open mailto.lst: $!\n"; foreach $href (sort keys(%mail)) { print MAILFILE "$href:\n"; $locations = $mail{$href}; foreach $location (split(/;/, $locations)) { ($file,$line) = split(/\*/, $location); print MAILFILE " $file line number: $line\n"; } } close (MAILFILE); open (FILEFILE, ">files.lst") || die "Can't open files.lst: $!\n"; foreach $href (sort keys(%file)) { print FILEFILE "$href:\n"; undef @names; open (NAMEFILE, $href) || warn "Can't open $href: $!\n"; while () { if (/fileprob.lst") || die "Can't open fileprob.lst: $!\n"; foreach $file (sort keys(%hrefp)) { print FILEP "file: $file\n"; $locations = $hrefp{$file}; foreach $location (split(/;/, $locations)) { ($line,$href,$name) = split(/\*/, $location); print FILEP " line number: $line, href=$href, name=$name\n"; } } close (FILEP); open (HTTPFILE, ">http.lst") || die "Can't open http.lst: $!\n"; foreach $href (sort keys(%http)) { print HTTPFILE "$href:\n"; $locations = $http{$href}; foreach $location (split(/;/, $locations)) { ($file,$line) = split(/\*/, $location); print HTTPFILE " $file line number: $line\n"; } } close (HTTPFILE); open (REPORT, ">report.lst") || die "Can't open report.lst: $!\n"; print REPORT "\n\n******************* SUMMARY *****************************\n\n"; print REPORT "Read a total of $readnum files for HREFS\n"; print REPORT "found:\n"; print REPORT " total HREFs: $hrefnum \n"; print REPORT " local files called by file: calls: $lfile \n"; print REPORT " gopher calls: $gophnum \n"; print REPORT " ftp calls: $ftp_num \n"; print REPORT " readable mailtos: $mailnum \n"; print REPORT " readable http calls: $httpnum \n"; print REPORT " of which $loclnum were to local files \n"; print REPORT " readable local file calls: $filenum \n"; print REPORT " of which $dir_num were to a directory rather than a file\n"; $files = $filenum - $dir_num; print REPORT " leaving $files which were to files\n"; print REPORT " of which $namenum included a NAME marker reference \n\n"; $bad = $hrefnum - $gophnum - $ftp_num - $mailnum - $httpnum - $filenum; print REPORT "Total calls which won't find target: $bad \n"; print REPORT " (some may, depending on lenience of browser)\n\n"; print REPORT "and a further $badname of the calls to markers in local files have bad markers\n\n"; close (REPORT); print LOCALS $tail; print LFILE $tail; print PROBFILE $tail; ################################ subroutines ######################################## sub location { print " in $filename:\n $href\n rest of line is: $rest\n"; } sub dir { # returns the directory part of a filename # makes no attempt to check the filename othewise for legality local ($filename) = @_; local ($dir,$slash); $slash = rindex($filename,"/"); if ($slash > 0) { $dir = substr($filename,0,$slash); } else { $dir = "."; } $dir; } sub expand_dir { # expands a relative address supplied as $href # given the directory (".") in $file_dir # will not (at present) deal with a silly reference like "./../filename" # nor will it detect illegal forms like ".../filename" or ".././filename" local ($file_dir,$href) = @_; local ($filename,$dir,$slash); #### first return to calling routine if href is absolute if ($href =~ /^\//) { return ($www_offset.$href); } #### make sure $file_dir doesn't have a trailing slash if ($file_dir =~ m#/$#) { chop($file_dir); } if ($href =~ /\.\./) { @href = split(/\//,$href); @dir = reverse(split(/\//,$file_dir)); $filename = pop(@href); # shift the filename off the end of the href while (@href && ($href[0] eq "..")) { shift(@href); shift(@dir); } @dir = reverse(@dir); $dir = join('/',@dir); $href = join('/',@href); $href = $dir."/".$href."/".$filename; } elsif ($href =~ m#^(\.)/#) { #### is an explicit reference to current directory so replace "." with $file_dir $href =~ s#^.#$file_dir#; } else { #### must be just an implicit reference to current directory so just splice $href = $file_dir."/".$href; } $href; }