@rem = '-*- Perl -*-'; @rem = ' @echo off perl -S %0.cmd %1 %2 %3 %4 %5 %6 %7 %8 %9 goto endofperl '; # musage3.prl script derived from musage.txt (see below) # I have radically altered Nick Phillips script (he asserts # copyright and I have no wish to deny him that and just hope # he doesn't mind my having done this and making this hacked # version publically available. If I knew how to contact him # to discuss this I would. If he, or someone who knows his # whereabouts comes across this I hope they will let me know # how to contact him # # Chris Evans 21.ii.96 # # (Last update of Nick Phillips script: 1st May 1995) # # musage.txt --- script to calculate monthly usage statistics from # log files generate by the EMWAC Windows NT World # Wide Web and gopher servers (https and gophers). # # Copyright (c) 1995 Nick Phillips (N.R.Phillips@lse.ac.uk) # # Configuration variables are hard-coded in # (see the section "CONFIGURE THESE VARIABLES"). # # File and path names should be absolute (e.g. C:/LOGS/) # or relative to current directory (e.g. ./LOGS/) # Directory names must always end with trailing backslash # (e.g. ./ for current directory - don't just leave blank) # In Perl for Windows NT use forward slash (not backslash) # in pathnames (e.g. C:/LOGS/) # # Uses the gethostbyaddr() function to look up the DNS name # corresponding to the IP addresses of users recorded in the logs - # this will only work if your version of Perl supports this function. # It is best to run this at a time when the Internet is quiet as it # could end up doing lots of DNS lookups. # # If you rename the script to MUSAGE.CMD then on an NT system you will # be able to run it from the command line (i.e. you will be able to just # type MUSAGE, rather than PERL MUSAGE.TXT) - note that MUSAGE.CMD is # a batch file (with Perl script embedded in it), so if you invoke it # from another batch file you will have to do CALL MUSAGE in order for # control to return to the original batch file. # print "\n\n\n\n\n\nMUSAGE3.PRL adapted by Chris Evans from the following:\n\n"; print " MUSAGE --- script to calculate monthly usage statistics from\n"; print " v1.4 log files generated by the EMWAC Windows NT World\n"; print " Wide Web and gopher servers (https and gophers).\n"; print " Copyright (c) 1995 Nick Phillips\n"; &config; &error_lab; # provide labels to classify errors if ($dated) { # provide date info. &uptonow; } &countries; # initialise list of names of countries (e.g. uk = United Kingdom etc.) # read in all known domain names to save looking them up again if ($lookupdnsnames) { &getdomains; } # set the parameters usually collected (for subdirectories) # from musage.set files $topfiles = 50; $topips = 30; $inputdir = "/usr1/www/log/"; $outputdir = "/usr1/www/pages/mhs/psychotherapy/wwwstats/"; @focusonpath = ("/mhs/psychotherapy/"); $f_pathcomment = "files in any Psychotherapy Section directories considered"; @ignoreip = ("194.80.201.27","cevanpc.sghms.ac.uk","infoserv.sghms.ac.uk","192.153.12.1","192.153.12.30"); $i_ipcomment = "does not include my own accesses as administrator"; # O.K. now collect up data from the server log directory $logfilepattern = "proxy-log.log"; # log files match this $mylog = "p_mylog.log"; $errlog = "p_errlog.log"; $psylog = "p.cur.log.log"; open (PSYLOG, ">$outputdir$psylog") || die "cannot open $outputdir$psylog"; open (MYLOG, ">$outputdir$mylog") || die "cannot open $outputdir$mylog"; open (ERRLOG, ">$outputdir$errlog") || die "cannot open $outputdir$errlog"; if (!$debug) { # get all log server log directory sorted by filename # (extract only those files that match $logfilepattern) opendir(LOGDIR, "$inputdir") || die "\nCannot open input directory $inputdir"; print "Reading server log files from directory:\n $inputdir"; @logfiles = sort(grep(/$logfilepattern/i, readdir(LOGDIR))); closedir(LOGDIR); } $extractonly = 1; &crunchlog; $extractonly = 0; close PSYLOG; close MYLOG; close ERRLOG; $mylog = "ptotal.log"; print "\nConcatenating extracted records with historical records\n"; system("cat ./wwwstats/p.old.log.log ./wwwstats/p.cur.log.log > ./wwwstats/$mylog"); # relates to getting file size data $outputdir = "/usr1/www/pages/mhs/psychotherapy/dir/"; $startdir = "/mhs/psychotherapy"; $www_offset = "/usr1/www/pages"; @filemask = ("*.*"); # sort out the location that the server will show for files $http_rep_dir = $outputdir; $http_rep_dir =~ s|$www_offset||o; # o switch as the value of $www_offset is fixed open (TIME, ">time.dat") || die "Cannot open file time.dat\n"; &getdata; # now run through all subdirectories looking for config files open(FIND, "find . -name \"musage.set\" -print |") || die "Couldn't run find: $!\n"; while ($filename = ) { # reset the default config &config; # now get the local data from musage.set file open (CONFIG, "$filename") || die "Couldn't open $filename\n"; print "\n\nProcessing config file:\n $filename"; while () { chop; eval; # evaluates the line in musage.set as Perl statement } &undefine; # open the output log files open (PSYLOG, ">$outputdir$psylog") || die "cannot open $outputdir$psylog"; open (ERRLOG, ">$outputdir$errlog") || die "cannot open $outputdir$errlog"; # now for the input log files # get all log files from current directory sorted by filename # (extract only those files that match $logfilepattern) opendir(LOGDIR, "$inputdir") || die "\nCannot open input directory $inputdir"; print "\nReading log files from directory:\n $inputdir"; @logfiles = sort(grep(/$logfilepattern/i, readdir(LOGDIR))); closedir(LOGDIR); &crunchlog; # now crunch the logs using the variable settings picked up close PSYLOG; close ERRLOG; } # write out all the domain names to save looking them up again if ($lookupdnsnames) { &writedomains; } # -------------------------------------------------------------------------- # SUB-ROUTINES # -------------------------------------------------------------------------- sub newmonth { # write report then undefine all the cumulative total variables &summaryofmonth if $currentmonth; # *** the transfers to totals were added by CE foreach (keys %domain) { $totdomain{$_} += $domain{$_}; } undef %domain; $totaccess += $access; undef $access; $totbytes += $totalbytes; undef $totalbytes; $toterror += $error; $tote304 += $e304; undef $error; undef %errcl; undef %error; foreach (keys %file) { $totfile{$_} += $file{$_}; } undef %file; foreach (keys %ip) { $totip{$_} += $ip{$_}; } undef %ip; $tothomeacc += $homeaccess; undef $homeaccess; $tothomebyt += $homebytes; undef $homebytes; foreach (keys %homefile) { $tothomefile{$_} += $homefile{$_}; } undef %homefile; foreach (keys %homeip) { $tothomeip{$_} += $homeip{$_}; } undef %homeip; $totoutacc += $outsideaccess; undef $outsideaccess; $totoutbyt += $outsidebytes; undef $outsidebytes; foreach (keys %outsidefile) { $totoutfile{$_} += $outsidefile{$_}; } undef %outsidefile; foreach (keys %outsideip) { $totoutip{$_} += $outsideip{$_}; } undef %outsideip; undef $currentmonth; undef $currentyear; undef $currentlogtype; } # &newmonth sub undefine { # reset all the cumulating variables, monthly and total # runs before starting crunching for a new musage.set file undef %domain; undef %totdomain; undef $access; undef $totaccess; undef $totalbytes; undef $totbytes; undef $error; undef $toterror; undef %errcl; undef %error; undef %toterrcl; undef %toterror; undef $e304; undef $tote304; undef %file; undef %totfile; undef %ip; undef %totip; undef $homeaccess; undef $tothomeacc; undef $homebytes; undef $tothomebyt; undef %homefile; undef %tothomefile; undef %homeip; undef %tothomeip; undef $outsideaccess; undef $totoutacc; undef $outsidebytes; undef $totoutbyt; undef %outsidefile; undef %totoutfile; undef %outsideip; undef %totoutip; undef $currentmonth; undef $currentyear; undef $currentlogtype; } # &undefine sub total { # *** added by CE, transfers all accumulated totals to usual variables # used to print monthly summaries %domain = %totdomain; $access = $totaccess; $totalbytes = $totbytes; %file = %totfile; %ip = %totip; $homeaccess = $tothomeacc; $homebytes = $tothomebyt; %homefile = %tothomefile; %homeip = %tothomeip; $outsideaccess = $totoutacc; $outsidebytes = $totoutbyt; %outsidefile = %totoutfile; %outsideip = %totoutip; $error = $toterror; $e304 = $tote304; } # &total # -------------------------------------------------------------------------- sub summaryofmonth { # make a new report file for that month # filename e.g. 1994-11.HTM ($_) = grep(/^$currentmonth/i, January01, February02, March03, April04, May05, June06, July07, August08, September09, October10, November11, December12); ($monthname, $mon) = /(\w+)(\d\d)$/; $report = "$currentlogtype$currentyear-$mon"; $servertype = $servertype{$currentlogtype}; open (SUMMARY, ">$outputdir$report.HTM") || die "cannot open $outputdir$report.HTM"; print SUMMARY "\n"; print SUMMARY "$servername $servertype server usage log $monthname $currentyear\n"; print SUMMARY "\n\n"; print SUMMARY "

$servername $servertype server usage log

\n"; if (!$dated) { print SUMMARY "

$monthname $currentyear

\n"; } else { print SUMMARY "

$monthname $currentyear

\n"; print SUMMARY "

$uptonow

\n"; } print SUMMARY "[Files, "; print SUMMARY "sites, "; print SUMMARY "domains, "; print SUMMARY "errors]\n"; if (@focusonip != 0) { print SUMMARY "

Focussing on accesses from the following IP addresses

\n"; print SUMMARY "
    \n"; foreach $focusonip (@focusonip) { print SUMMARY "
  • $focusonip
  • \n"; } print SUMMARY "
\n"; } if (@focusonpath != 0) { print SUMMARY "

Focussing on accesses to the following paths or files

\n"; print SUMMARY "
    \n"; foreach $focusonpath (@focusonpath) { print SUMMARY "
  • $focusonpath
  • \n"; } print SUMMARY "
\n"; } if ($i_ipcomment) { print SUMMARY "
    \n"; print SUMMARY "
  • $i_ipcomment
    \n"; if ($f_pathcomment) { print SUMMARY "
  • $f_pathcomment

    \n"; } print SUMMARY "

\n"; } print SUMMARY "

Total number of accesses

\n"; print SUMMARY "Every time a document is retrieved counts as one access.\n"; print SUMMARY "
";
    $length = length($homename) + 12; $length = 20 if $length < 20;
    printf SUMMARY "     %-${length}s %18s\n", "Overall total:", $access;
    printf SUMMARY "     %-${length}s %18s\n", "$homename users:", $homeaccess;
    printf SUMMARY "     %-${length}s %18s\n", "Outside users:", $outsideaccess;
    if ($access) { $opct = (int(10000*$outsideaccess/$access))/100; }
    elsif ($outsideaccess) { $opct = 100; }
    else { $opct = "***"; }
    $width = 6; $dp = 2;
    printf SUMMARY "\nI.e. outside use is %${width}.${dp}f %\n", $opct;
    print SUMMARY "
\n\n"; print SUMMARY "

Total number of users

\n"; print SUMMARY "Each different IP address that accesses the server\n"; print SUMMARY "counts as one user. N.B. This may represent a considerably\n"; print SUMMARY "greater number of actual people\n"; print SUMMARY "
";
    $length = length($homename) + 12; $length = 20 if $length < 20;
    printf SUMMARY "     %-${length}s %18s\n", "Overall total:",
        ($totcount = keys(%ip));
    printf SUMMARY "     %-${length}s %18s\n", "$homename users:",
        ($count = keys(%homeip));
    printf SUMMARY "     %-${length}s %18s\n", "Outside users:",
        ($outcount = keys(%outsideip));
    if ($totcount) {
       $opct = (int(10000*$outcount/$totcount))/100;
    } elsif ($outcount) {
       $opct = "100";
    } else {
       $opct = "***";
    }
    printf SUMMARY "\nI.e. outside addresses represent %${width}.${dp}f %\n", $opct;
    print SUMMARY "
\n\n"; if ($totalbytes) { print SUMMARY "

Total number of bytes transferred

\n"; print SUMMARY "
";
        $length = length($homename) + 12; $length = 20 if $length < 20;
        printf SUMMARY "     %-${length}s %18s\n", "Overall total:",
            &putcommasinnumber($totalbytes);
        printf SUMMARY "     %-${length}s %18s\n", "$homename users:",
            &putcommasinnumber($homebytes);
        printf SUMMARY "     %-${length}s %18s\n", "Outside users:",
            &putcommasinnumber($outsidebytes);
        $opct = (int(10000*$outsidebytes/$totalbytes))/100;
        printf SUMMARY "\nI.e. outside use is %${width}.${dp}f %\n", $opct;
        print SUMMARY "
\n\n"; } print SUMMARY "
\n"; print SUMMARY "

Files

\n"; print SUMMARY "This is based on the number of times each individual\n"; print SUMMARY "file is accessed.\n\n"; &topfile; print SUMMARY "
\n"; print SUMMARY "

Sites

\n"; print SUMMARY "This is based on the numbers of documents accessed\n"; print SUMMARY "by users calling from each IP address.\n\n"; &topip; print SUMMARY "
\n"; print SUMMARY "

Domains

\n"; print SUMMARY "This is based on the numbers of documents accessed\n"; print SUMMARY "by users calling from each domain\n\n"; &topdomain; print SUMMARY "\n"; print SUMMARY "

Errors

"; if (!$error) { print SUMMARY "Wonderful but frankly suspicious: no failed access attempts found!\n"; if ($e304) { print ERRORS "There were $e304 \"not modified\" \"errors\" which I don't count\n"; print ERRORS "as errors as I regard them as legitimate responses to enquiries\n"; } } else { if ($error == 1) { print SUMMARY "One error only\n"; } else { print SUMMARY "Total of $error errors\n"; } print SUMMARY "

Error class summary

\n
    \n"; foreach $class (sort keys(%errcl)) { if ($errcl{$class} == 1) { print SUMMARY "
  • $errcl{$class} error of class $class, i.e. \"$errc_lab{$class}\"\n"; } else { print SUMMARY "
  • $errcl{$class} errors of class $class, i.e. \"$errc_lab{$class}\"\n"; } } print SUMMARY "
\n

Error numbers

\n
    \n"; foreach $class (sort keys(%error)) { if ($error{$class} == 1) { print SUMMARY "
  • $error{$class} error of number $class, i.e. \"$err_lab{$class}\"\n"; } else { print SUMMARY "
  • $error{$class} errors of number $class, i.e. \"$err_lab{$class}\"\n"; } } print SUMMARY "
\n"; print SUMMARY "A total cumulative error log is available for these files.\n"; } print SUMMARY $btm_s; close SUMMARY; # print cumulative report and clear cumulative totals open (SUMMARY, ">$outputdir$report.TXT") || die "cannot open $report.TXT"; print SUMMARY "Complete $servername $servertype usage log\n\n"; print SUMMARY "$monthname $currentyear\n\n"; if (@focusonip != 0) { print SUMMARY "Focussing on accesses from the following IP addresses:-\n"; foreach $focusonip (@focusonip) { print SUMMARY " $focusonip\n"; } print SUMMARY "\n"; } if (@focusonpath != 0) { print SUMMARY "Focussing on accesses to the following paths or files:-\n"; foreach $focusonpath (@focusonpath) { print SUMMARY " $focusonpath\n"; } if ($f_pathcomment) { print SUMMARY "$f_pathcomment\n"; } print SUMMARY "\n"; } if ($i_ipcomment) { print SUMMARY "$i_ipcomment\n"; if ($f_pathcomment) { print SUMMARY "$f_pathcomment\n\n"; } } print SUMMARY "Focussing on $focus\n\n" if $focus ne ''; print SUMMARY "Total number of accesses: $access\n"; print SUMMARY "Total number of users: ", ($count = keys(%ip)), "\n"; print SUMMARY "Total number of bytes: ", &putcommasinnumber($totalbytes), "\n"; print SUMMARY "\nBy file\n"; print SUMMARY "=======\n"; foreach $file (sort byfilename keys(%file)) { printf SUMMARY " %9s %-s\n", $file{$file}, $file; } print SUMMARY "\nBy ip\n"; print SUMMARY "=====\n"; foreach $ip (sort byip keys(%ip)) { printf SUMMARY " %9s %-18s %-s\n", $ip{$ip}, $ip, $dnsname{$ip}; } print SUMMARY "\nBy domain\n"; print SUMMARY "=========\n"; $uktotal = $ustotal = $total = 0; foreach $domain (sort bydnsname keys(%domain)) { printf SUMMARY " %9s %-15s %-s\n", $domain{$domain}, $domain, $country{$domain}; if ($domain eq "uk") { $uktotal = $domain{$domain}; } if (($country{$domain} =~ /\*/) || ($domain eq "us")) { $ustotal += $domain{$domain}; } $total += $domain{$domain}; } $blank = " "; $totals = "Grand total"; printf SUMMARY "\n %9s %-15s %-s\n", $total, $blank, $totals; print SUMMARY "\n (* = mainly US based)\n"; print SUMMARY " $ustotal from USA or \"mostly USA\" sites,"; if ($total) { $opct = (int(10000*$ustotal/$total))/100; } elsif ($ustotal) { $opct = "100"; } else { $opct = "***"; } $width = 6; $dp = 2; printf SUMMARY " i.e. %${width}.${dp}f %\n", $opct; print SUMMARY " $uktotal from non-local British sites"; if ($total) { $opct = (int(10000*$uktotal/$total))/100; } elsif ($uktotal) { $opct = "100"; } else { $opct = "***"; } $width = 6; $dp = 2; printf SUMMARY " i.e. %${width}.${dp}f%%\n", $opct; close SUMMARY; # if menu document does not exist, starts a new one # looks to see if this report file has already been added to menu # looks for
    list (if not found, fatal error) # adds new report file as first item of list $menuname = $outputdir . $currentlogtype . "USAGE.HTM"; # open the menu file (points to each month's info) open (MENU, "$menuname") || &makenewmenu; # scoop the whole thing into an array and close it @menu = ; close MENU; # now set up something that's in entry for the current month $srchitem = "
  • $monthname $currentyear<\/A>"; # set up the replacement for that item if ($access) { $opct = (int(10000*$outsideaccess/$access))/100; } elsif ($outsideaccess) { $opct = 100; } else { $opct = "***"; } $failperc = int($failperc); if (!$access) { $access = "0"; } if (!$error) { $error = "0"; } if ($lastentry && ($month eq $mon_now)) { # flag up that this is likely to be an incomplete month (and give date of data collection) $newitem = "
  • $monthname $currentyear<\/A> (to $day_now $dat_now) $access good accesses and $error ($failperc% of total) failures<\/LI>\n"; } else { $newitem = "
  • $monthname $currentyear<\/A> $access good accesses and $error ($failperc% of total) failures<\/LI>\n"; } # now set up the other thing to search for, the statement about when the logs were searched $srchlast = "(.*)(log files last checked.*)(.*)"; $item =~ s/$srchlast/$1$uptonow$3/; # use it! $flag_chge = 0; foreach $item (@menu) { if ($item =~ /$srchitem/i) { # that month is already there, update it $item = $newitem; # and flag the change $flag_chge = 1; } } if (!$flag_chge) { # entry wasn't there, so add entry for new usage report foreach (@menu) { # search only until you come to start of list # replace that with itself followed by newline with # the new item in it last if s/^
      /
        \n$newitem/i; $flag_chge = 1; } } # overwrite old file with new one open (MENU, ">$menuname") || die "cannot open $menuname"; print MENU @menu; close MENU; undef (@menu); } # &summaryofmonth # -------------------------------------------------------------------------- sub totsummary { $report = "TOTAL"; open (SUMMARY, ">$outputdir$report.HTM") || die "cannot open $outputdirTOTAL.HTM"; print SUMMARY "\n"; print SUMMARY "$servername $servertype server cumulative total usage log\n"; print SUMMARY "\n\n"; print SUMMARY "

        $servername $servertype server cumulative total usage log

        \n"; print SUMMARY "

        up to $uptonow\n"; print SUMMARY "

        [Files, "; print SUMMARY "sites, "; print SUMMARY "domains, \n"; print SUMMARY "errors]\n"; if (@focusonip != 0) { print SUMMARY "

        Focussing on accesses from the following IP addresses

        \n"; print SUMMARY "
          \n"; foreach $focusonip (@focusonip) { print SUMMARY "
        • $focusonip
        • \n"; } print SUMMARY "
        \n"; } if (@focusonpath != 0) { print SUMMARY "

        Focussing on accesses to the following paths or files

        \n"; print SUMMARY "
          \n"; foreach $focusonpath (@focusonpath) { print SUMMARY "
        • $focusonpath
        • \n"; } print SUMMARY "
        \n"; } if ($i_ipcomment) { print SUMMARY "
          \n"; print SUMMARY "
        • $i_ipcomment
          \n"; if ($f_pathcomment) { print SUMMARY "
        • $f_pathcomment

          \n"; } print SUMMARY "

        \n"; } print SUMMARY "

        Total number of accesses

        \n"; print SUMMARY "Every time a document is retrieved counts as one access.\n"; print SUMMARY "
        ";
            $length = length($homename) + 12; $length = 20 if $length < 20;
            printf SUMMARY "     %-${length}s %18s\n", "Overall total:", $access;
            printf SUMMARY "     %-${length}s %18s\n", "$homename users:", $homeaccess;
            printf SUMMARY "     %-${length}s %18s\n", "Outside users:", $outsideaccess;
            if (!$access) { $opct = (int(10000*$outsideaccess/$access))/100; }
            elsif ($outsideaccess) { $opct = 100; }
            else { $opct = "***"; }
            $width = 6; $dp = 2;
            printf SUMMARY "\nI.e. outside use is %${width}.${dp}f %\n", $opct;
            printf SUMMARY "\n";
            print SUMMARY "
        \n\n"; print SUMMARY "

        Total number of users

        \n"; print SUMMARY "Each different IP address that accesses the server\n"; print SUMMARY "counts as one user.\n"; print SUMMARY "
        ";
            $length = length($homename) + 12; $length = 20 if $length < 20;
            printf SUMMARY "     %-${length}s %18s\n", "Overall total:",
                ($totcount = keys(%ip));
            printf SUMMARY "     %-${length}s %18s\n", "$homename users:",
                ($count = keys(%homeip));
            printf SUMMARY "     %-${length}s %18s\n", "Outside users:",
                ($outcount = keys(%outsideip));
            $opct = (int(10000*$outcount/$totcount))/100;
            $width = 6; $dp = 2;
            printf SUMMARY "\nI.e. outside use is %${width}.${dp}f %\n", $opct;
        
            print SUMMARY "
        \n\n"; if ($totalbytes) { print SUMMARY "

        Total number of bytes transferred

        \n"; print SUMMARY "
        ";
                $length = length($homename) + 12; $length = 20 if $length < 20;
                printf SUMMARY "     %-${length}s %18s\n", "Overall total:",
                    &putcommasinnumber($totalbytes);
                printf SUMMARY "     %-${length}s %18s\n", "$homename users:",
                    &putcommasinnumber($homebytes);
                printf SUMMARY "     %-${length}s %18s\n", "Outside users:",
                    &putcommasinnumber($outsidebytes);
                $opct = (int(10000*$outsidebytes/$totalbytes))/100;
                $width = 6; $dp = 2;
                printf SUMMARY "\nI.e. outside use is %${width}.${dp}f %\n", $opct;
                print SUMMARY "
        \n\n"; } print SUMMARY "
        \n"; print SUMMARY "

        Files in order of popularity

        \n"; print SUMMARY "This is based on the number of times each individual\n"; print SUMMARY "file is accessed.\n\n"; &topfile; print SUMMARY "
        \n"; print SUMMARY "

        Sites in order of frequency of visits

        \n"; print SUMMARY "This is based on the numbers of documents accessed\n"; print SUMMARY "by users calling from each IP address\n\n"; &topip; print SUMMARY "
        \n"; print SUMMARY "

        Domains

        "; print SUMMARY "This is based on the numbers of documents accessed\n"; print SUMMARY "by users calling from each domain\n\n"; &topdomain; print SUMMARY "
        \n"; print SUMMARY "

        Errors

        "; if (!$toterror) { print SUMMARY "Wonderful but frankly suspicious: no failed access attempts found!\n"; if ($e304) { print ERRORS "There were $e304 \"not modified\" \"errors\" which I don't count\n"; print ERRORS "as errors as I regard them as legitimate responses to enquiries\n"; } } else { if ($error == 1) { print SUMMARY "One error only\n"; } else { print SUMMARY "Total of $error errors\n"; } print SUMMARY "

        Error class summary

        \n
          \n"; foreach $class (sort keys(%toterrcl)) { if ($toterrcl{$class} == 1) { print SUMMARY "
        • $toterrcl{$class} error of class $class, i.e. $errc_lab{$class}\n"; } else { print SUMMARY "
        • $toterrcl{$class} errors of class $class, i.e. $errc_lab{$class}\n"; } } print SUMMARY "
        \n

        Breakdown by actual error code

        \n
          \n"; foreach $class (sort keys(%toterror)) { if ($toterror{$class} == 1) { print SUMMARY "
        • $toterror{$class} error of class $class, i.e. $err_lab{$class}\n"; } else { print SUMMARY "
        • $toterror{$class} errors of class $class, i.e. $err_lab{$class}\n"; } } print SUMMARY "
        \n"; } $report = "TOTAL"; print SUMMARY $btm_s; close SUMMARY; # print cumulative report and clear cumulative totals open (SUMMARY, ">$outputdir$report.TXT") || die "cannot open $report.TXT"; print SUMMARY "Complete $servername $servertype usage log\n\n"; print SUMMARY "$monthname $currentyear\n\n"; if (@focusonip != 0) { print SUMMARY "Focussing on accesses from the following IP addresses:-\n"; foreach $focusonip (@focusonip) { print SUMMARY " $focusonip\n"; } print SUMMARY "\n"; } if (@focusonpath != 0) { print SUMMARY "Focussing on accesses to the following paths or files:-\n"; foreach $focusonpath (@focusonpath) { print SUMMARY " $focusonpath\n"; } print SUMMARY "\n"; } if ($i_ipcomment) { print SUMMARY "
          \n"; print SUMMARY "
        • $i_ipcomment
          \n"; if ($f_pathcomment) { print SUMMARY "
        • $f_pathcomment

          \n"; } print SUMMARY "

        \n"; } print SUMMARY "Focussing on $focus\n\n" if $focus ne ''; print SUMMARY "Total number of accesses: $access\n"; print SUMMARY "Total number of users: ", ($count = keys(%ip)), "\n"; print SUMMARY "Total number of bytes: ", &putcommasinnumber($totalbytes), "\n"; print SUMMARY "\nBy file\n"; print SUMMARY "=======\n"; foreach $file (sort byfilename keys(%file)) { printf SUMMARY " %9s %-s\n", $file{$file}, $file; } print SUMMARY "\nBy ip\n"; print SUMMARY "=====\n"; foreach $ip (sort byip keys(%ip)) { printf SUMMARY " %9s %-18s %-s\n", $ip{$ip}, $ip, $dnsname{$ip}; } print SUMMARY "\nBy domain\n"; print SUMMARY "=========\n"; $uktotal = $ustotal = $total = 0; foreach $domain (sort bydnsname keys(%domain)) { printf SUMMARY " %9s %-15s %-s\n", $domain{$domain}, $domain, $country{$domain}; if ($domain eq "uk") { $uktotal = $domain{$domain}; } if (($country{$domain} =~ /\*/) || ($domain eq "us")) { $ustotal += $domain{$domain}; } $total += $domain{$domain}; } $blank = " "; $totals = "Grand total"; printf SUMMARY "\n %9s %-15s %-s\n", $total, $blank, $totals; print SUMMARY "\n (* = mainly US based)\n"; print SUMMARY " $ustotal from USA or \"mostly USA\" sites"; if ($total) { $opct = (int(10000*$ustotal/$total))/100; } elsif ($ustotal) { $opct = "100"; } else { $opct = "***"; } $width = 6; $dp = 2; printf SUMMARY " i.e. %${width}.${dp}f %\n", $opct; print SUMMARY " $uktotal from non-local British sites"; if ($total) { $opct = (int(10000*$uktotal/$total))/100; } elsif ($uktotal) { $opct = "100"; } else { $opct = "***"; } $width = 6; $dp = 2; printf SUMMARY " i.e. %${width}.${dp}f %\n", $opct; close SUMMARY; } # &totsummary sub errsummary { $report = "ERRORS.HTM"; open (ERRORS, ">$outputdir$report") || die "cannot open $outputdir$report"; print ERRORS "\n"; print ERRORS "$servername $servertype server cumulative error log\n"; print ERRORS "\n\n"; if (!$dated) { print ERRORS "

        $monthname $currentyear

        \n"; } else { print ERRORS "

        $monthname $currentyear

        \n"; print ERRORS "

        $uptonow

        \n"; } if (!$toterror) { print ERRORS "Wonderful but frankly suspicious: no failed access attempts found!\n"; if ($e304) { print ERRORS "There were $e304 \"not modified\" \"errors\" which I don't count\n"; print ERRORS "as errors as I regard them as legitimate responses to enquiries\n"; } } else { print ERRORS "

        Overall error summary

        \n"; if ($i_ipcomment) { print ERRORS "
          \n"; print ERRORS "
        • $i_ipcomment
          \n"; if ($f_pathcomment) { print ERRORS "
        • $f_pathcomment

          \n"; } print ERRORS "

        \n"; } if ($error == 1) { print ERRORS "One error only\n"; } else { print ERRORS "Total of $error errors\n"; } print ERRORS "
        \n

        Breakdown by error classes

        \n
          \n"; foreach $class (sort keys(%toterrcl)) { if ($toterrcl{$class} == 1) { print ERRORS "
        • $toterrcl{$class} error of class $class, i.e. \"$errc_lab{$class}\"\n"; } else { print ERRORS "
        • $toterrcl{$class} errors of class $class, i.e. \"$errc_lab{$class}\"\n"; } } print ERRORS "
        \n

        Breakdown by actual error code

        \n
          \n"; foreach $class (sort keys(%toterror)) { if ($toterror{$class} == 1) { print ERRORS "
        • $toterror{$class} error of class $class, i.e. \"$err_lab{$class}\"\n"; } else { print ERRORS "
        • $toterror{$class} errors of class $class, i.e. \"$err_lab{$class}\"\n"; } } } print ERRORS "
        \n"; print ERRORS $btm_e; close ERRORS; } # &errsummary # -------------------------------------------------------------------------- sub makenewmenu { # no menu (title) document found - start a new one # new menu will contain a blank list
          # leave file open for reading open (MENU, ">$menuname") || die "cannot open $menuname"; print MENU "\n"; print MENU "Monthly $servertype server usage logs\n"; print MENU "\n"; print MENU "

          $servername $servertype server

          \n"; print MENU "

          Cumulative total \nerrors and \n"; print MENU "usage

          \n"; if (!$dated) { print MENU "

          From $monthname $currentyear

          \n"; } else { print MENU "

          From $monthname $currentyear, $uptonow

          \n"; } if (@focusonip != 0) { print MENU "

          Focussing on accesses from the following IP addresses

          \n"; print MENU "
            \n"; foreach $focusonip (@focusonip) { print MENU "
          • $focusonip
          • \n"; } print MENU "
          \n"; } if (@focusonpath != 0) { print MENU "

          Focussing on accesses to the following paths or files

          \n"; print MENU "
            \n"; foreach $focusonpath (@focusonpath) { print MENU "
          • $focusonpath
          • \n"; } print MENU "
          \n"; } if ($i_ipcomment) { print MENU "
            \n"; print MENU "
          • $i_ipcomment
            \n"; if ($f_pathcomment) { print MENU "
          • $f_pathcomment

            \n"; } print MENU "

          \n"; } print MENU "

          Monthly usage logs

          \n"; print MENU "
            \n"; print MENU "
          \n"; print MENU $btm_m; close MENU; open (MENU, "$menuname") || die "cannot open $menuname"; } # &makenewmenu # -------------------------------------------------------------------------- sub addname { local($ip) = $_[0]; undef $domain; # if this is already a DNS name (as opposed to IP address) # then nothing to look up - the punter is wasting my time! if ($ip =~ /[A-Z]/i) { # domain = last word of dns name (e.g. lse.ac.uk -> uk) ($domain) = $ip =~ /(\w+)$/; } # if domain name for this ip address not yet known, look it up else { if ($lookupdnsnames && !$dnsname{$ip}) { print "\nLooking up DNS name for $ip . . . \t" if $verbosemode; # if lookup fails, set to "unknown" ($a, $b, $c, $d) = split(/\./, $ip); $address = pack('C4', $a, $b, $c, $d); ($name, $aliases, $adrtype, $length, @serveraddr) = gethostbyaddr($address, 2); $name =~ tr/A-Z/a-z/ if $filenamestolowercase; $name = " " if $name eq ''; print "$name\n" if $verbosemode; $dnsname{$ip} = $name; } # domain = last word of dns name (e.g. lse.ac.uk -> uk) ($domain) = $dnsname{$ip} =~ /(\w+)$/; } $domain; } # &addname # -------------------------------------------------------------------------- sub topfile { # print out the top $topfiles files # file accesses are in the %file associative array # *** this test was added by CE to deal with small lists if ($topfiles && ($#files > $topfiles)) { print SUMMARY "

          Top $topfiles files accessed by all users

          \n"; } else { print SUMMARY "

          Files accessed by all users in order of popularity

          \n"; } print SUMMARY "
          ";
              local ($count) = 0;
              printf SUMMARY "     %10s %-s\n", "Accesses", "File";
              printf SUMMARY "     %10s %-s\n", "========", "====";
              foreach $file (sort byfileaccess keys(%file)) {
                  if ($size{$file}) {
                     printf SUMMARY "     %9s  %-s\n", $file{$file}, $file, $file;
                 } else {
                     printf SUMMARY "     %9s  %-s\n", $file{$file}, $file;
                 }
                 last if ++$count == $topfiles;
              }
              print SUMMARY "
          \n\n"; if ($topfiles && ($#homefile > $topfiles)) { print SUMMARY "

          Top $topfiles files accessed by $homename users

          \n"; } else { print SUMMARY "

          Files accessed by $homename users in order of popularity

          \n"; } print SUMMARY "
          ";
              $count = 0;
              printf SUMMARY "     %10s %-s\n", "Accesses", "File";
              printf SUMMARY "     %10s %-s\n", "========", "====";
              foreach $file (sort byhomefileaccess keys(%homefile)) {
                  if ($size{$file}) {
                     printf SUMMARY "     %9s  %-s\n", $homefile{$file}, $file, $file;
                  } else {
                     printf SUMMARY "     %9s  %-s\n", $homefile{$file}, $file;
                  }
                  last if ++$count == $topfiles;
              }
              print SUMMARY "
          \n\n"; if ($topfiles && ($#outsidefile > $topfiles)) { print SUMMARY "

          Top $topfiles files accessed by outside users

          \n"; } else { print SUMMARY "

          Files accessed by outside users in order of popularity

          \n"; } print SUMMARY "
          ";
              $count = 1;
              printf SUMMARY "     %10s %-s\n", "Accesses", "File";
              printf SUMMARY "     %10s %-s\n", "========", "====";
              foreach $file (sort byoutsidefileaccess keys(%outsidefile)) {
          
                  if ($size{$file}) {
                     printf SUMMARY "     %9s  %-s\n", $outsidefile{$file}, $file, $file;
                  } else {
                     printf SUMMARY "     %9s  %-s\n", $outsidefile{$file}, $file;
                  }
                  last if ++$count == $topfiles;
              }
              print SUMMARY "
          \n\n"; } # &topfile # -------------------------------------------------------------------------- # SUB-ROUTINES - Sorting # -------------------------------------------------------------------------- sub byfileaccess { # compare by number of accesses in order to sort # into reverse order by access $file{$b} <=> $file{$a}; } # -------------------------------------------------------------------------- sub byfilename { # compare by filename - convert to lowercase to compare strings local($stringa) = $a; local($stringb) = $b; $stringa =~ tr/A-Z/a-z/; $stringb =~ tr/A-Z/a-z/; $stringa cmp $stringb; } # -------------------------------------------------------------------------- sub byhomefileaccess { # compare by number of accesses in order to sort # into reverse order by access $homefile{$b} <=> $homefile{$a}; } # -------------------------------------------------------------------------- sub byoutsidefileaccess { # compare by number of accesses in order to sort # into reverse order by access $outsidefile{$b} <=> $outsidefile{$a}; } # -------------------------------------------------------------------------- sub byipaccess { # compare by number of accesses in order to sort # into reverse order by access $ip{$b} <=> $ip{$a}; } # -------------------------------------------------------------------------- sub byhomeipaccess { # compare by number of accesses in order to sort # into reverse order by access $homeip{$b} <=> $homeip{$a}; } # -------------------------------------------------------------------------- sub byoutsideipaccess { # compare by number of accesses in order to sort # into reverse order by access $outsideip{$b} <=> $outsideip{$a}; } # -------------------------------------------------------------------------- sub bydnsname { # compare by DNS name in order to sort into alphabetical order # by country $country{$a} cmp $country{$b}; } # -------------------------------------------------------------------------- sub bydomainaccess { # compare by number of accesses in order to sort # into reverse order by access $domain{$b} <=> $domain{$a}; } # -------------------------------------------------------------------------- sub byip { # compare by ip address in order to sort into # order by ip address or domain local($comp) = 1; local($dnsa) = $dnsname{$a}; local($dnsb) = $dnsname{$b}; $dnsa = $a if !$dnsa; $dnsb = $b if !$dnsb; if ($dnsa =~ /[A-Z]/i) { # $dnsa is a DNS name if ($dnsb =~ /[A-Z]/i) { # $dnsa and $dnsb are both DNS names $dnsa = join('.', reverse(split(/\./, $dnsa))); $dnsb = join('.', reverse(split(/\./, $dnsb))); $comp = $dnsa cmp $dnsb; } else { # $dnsa is a DNS name but $dnsb is an IP address $comp = -1; } } elsif ($dnsb =~ /[A-Z]/i) { # $dnsa is an IP address but $dnsb is a DNS name $comp = +1; } else { # #$dnsa and $dnsb are both IP addresses ($a1, $a2, $a3, $a4) = split(/\./, $a); ($b1, $b2, $b3, $b4) = split(/\./, $b); $comp = $a1 <=> $b1; $comp = $a2 <=> $b2 if $comp == 0; $comp = $a3 <=> $b3 if $comp == 0; $comp = $a4 <=> $b4 if $comp == 0; } return $comp; } # -------------------------------------------------------------------------- # SUB-ROUTINES - Printing out top sites # -------------------------------------------------------------------------- sub topip { # print out the top $topips sites (IP addresses) # ip accesses are in the %ip associative array if ($topips && ($#ip > $topips)) { print SUMMARY "

          Overall top $topips sites

          \n"; } else { print SUMMARY "

          Listing of sites in order of visiting frequency

          \n"; } print SUMMARY "
          ";
              local ($count) = 0;
              printf SUMMARY "     %10s %-17s", "Accesses", "IP address";
              printf SUMMARY " %-s", "DNS name" if $lookupdnsnames;
              printf SUMMARY "\n";
              printf SUMMARY "     %10s %-17s", "========", "==========";
              printf SUMMARY " %-s", "========" if $lookupdnsnames;
              printf SUMMARY "\n";
              foreach $ip (sort byipaccess keys(%ip)) {
                  printf SUMMARY "     %9s  %-17s %-s\n", $ip{$ip}, $ip, $dnsname{$ip};
                  last if ++$count == $topips;
              }
              print SUMMARY "
          \n\n"; if ($topips && ($#homeip > $topips)) { print SUMMARY "

          Overall top $topips $homename sites

          \n"; } else { print SUMMARY "

          Listing of $homename sites in order of visiting frequency

          \n"; } print SUMMARY "
          ";
              local ($count) = 0;
              printf SUMMARY "     %10s %-17s", "Accesses", "IP address";
              printf SUMMARY " %-s", "DNS name" if $lookupdnsnames;
              printf SUMMARY "\n";
              printf SUMMARY "     %10s %-17s", "========", "==========";
              printf SUMMARY " %-s", "========" if $lookupdnsnames;
              printf SUMMARY "\n";
              foreach $ip (sort byhomeipaccess keys(%homeip)) {
                  printf SUMMARY "     %9s  %-17s %-s\n", $homeip{$ip}, $ip, $dnsname{$ip};
                  last if ++$count == $topips;
              }
              print SUMMARY "
          \n\n"; if ($topips && ($#outsideip > $topips)) { print SUMMARY "

          Overall top $topips outside sites

          \n"; } else { print SUMMARY "

          Listing of outside sites in order of visiting frequency

          \n"; } print SUMMARY "
          ";
              local ($count) = 0;
              printf SUMMARY "     %10s %-17s", "Accesses", "IP address";
              printf SUMMARY " %-s", "DNS name" if $lookupdnsnames;
              printf SUMMARY "\n";
              printf SUMMARY "     %10s %-17s", "========", "==========";
              printf SUMMARY " %-s", "========" if $lookupdnsnames;
              printf SUMMARY "\n";
              foreach $ip (sort byoutsideipaccess keys(%outsideip)) {
                  printf SUMMARY "     %9s  %-17s %-s\n", $ip{$ip}, $ip, $dnsname{$ip};
                  last if ++$count == $topips;
              }
              print SUMMARY "
          \n\n"; } # &topip # -------------------------------------------------------------------------- sub topdomain { # print out the domains # domain accesses are in the %domain associative array print SUMMARY "
          ";
              local ($count) = 0;
              printf SUMMARY "     %10s %-15s %-s\n", "Accesses", "Domain", "Country";
              printf SUMMARY "     %10s %-15s %-s\n", "========", "======", "=======";
              $uktotal = $ustotal = $total = 0;
              foreach $domain (sort bydomainaccess keys(%domain)) {
                  printf SUMMARY "     %9s  %-15s %-s\n",
                      $domain{$domain}, $domain, $country{$domain};
                      if ($domain eq "uk") {
                          $uktotal = $domain{$domain};
                      }
                      if (($country{$domain} =~ /\*/) || ($domain eq "us")) {
                          $ustotal += $domain{$domain};
                      }
                      $total += $domain{$domain};
              }
              $blank = " ";
              $totals = "Grand total";
              printf SUMMARY "\n     %9s  %-15s %-s\n",
                      $total, $blank, $totals;
          
              print SUMMARY "\n     (* = mainly US based)\n";
              print SUMMARY "   $ustotal from USA or \"mostly USA\" sites";
              if ($total) {
                 $opct = (int(10000*$ustotal/$total))/100;
              } elsif ($ustotal) {
                 $opct = "100";
              } else {
                 $opct = "***";
              }
              $width = 6; $dp = 2;
              printf SUMMARY " i.e. %${width}.${dp}f %\n", $opct;
              print SUMMARY "   $uktotal from non-local British sites";
              if ($total) {
                 $opct = (int(10000*$uktotal/$total))/100;
              } elsif ($uktotal) {
                 $opct = "100";
              } else {
                 $opct = "***";
              }
              $width = 6; $dp = 2;
              printf SUMMARY " i.e. %${width}.${dp}f %\n", $opct;
              print SUMMARY "
          "; } # &topdomain # -------------------------------------------------------------------------- # SUB-ROUTINES - Focus on or ignore specific accesses # -------------------------------------------------------------------------- sub ishomeip { local($ip) = @_; local($found) = 0; # $ip may contain an IP address or a DNS name # look for a match in the @homeip list foreach $homeip (@homeip) { if ( (($ip !~ /[A-Z]/i) && (($ip =~ /^$homeip/i) || ($dnsname{$ip} =~ /$homeip$/i))) || (($ip =~ /[A-Z]/i) && ($ip =~ /$homeip$/i)) ) { $found = 1; last; } } $found; } # -------------------------------------------------------------------------- sub focusonthisip { local($ip) = @_; local($found) = 0; if (@focusonip == 0) { $found = 1; } else { # $ip may contain an IP address or a DNS name # look for a match in the @focusonip list foreach $focusonip (@focusonip) { if ( (($ip !~ /[A-Z]/i) && (($ip =~ /^$focusonip/i) || ($dnsname{$ip}=~ /$focusonip$/i))) || (($ip =~ /[A-Z]/i) && ($ip =~ /$focusonip$/i)) ) { $found = 1; last; } } } $found; } # -------------------------------------------------------------------------- sub myownip { local($ip) = @_; local($found) = 0; # $ip may contain an IP address or a DNS name # lok for a match in the @ignoreip list foreach $myownip (@myownip) { if ( (($ip !~ /[A-Z]/i) && (($ip =~ /^$myownip/i) || ($dnsname{$ip} =~ /$myownip$/i))) || (($ip =~ /[A-Z]/i) && ($ip =~ /$myownip$/i)) ) { $found = 1; last; } } $found; } # -------------------------------------------------------------------------- sub ignorethisip { local($ip) = @_; local($found) = 0; # $ip may contain an IP address or a DNS name # lok for a match in the @ignoreip list foreach $ignoreip (@ignoreip) { if ( (($ip !~ /[A-Z]/i) && (($ip =~ /^$ignoreip/i) || ($dnsname{$ip} =~ /$ignoreip$/i))) || (($ip =~ /[A-Z]/i) && ($ip =~ /$ignoreip$/i)) ) { $found = 1; last; } } $found; } # -------------------------------------------------------------------------- sub focusonthispath { local($path) = @_; local($found) = 0; if (@focusonpath == 0) { $found = 1; } else { foreach $focusonpath (@focusonpath) { if ($path =~ /^$focusonpath/i) { $found = 1; last; } } } $found; } # -------------------------------------------------------------------------- sub ignorethispath { local($path) = @_; local($found) = 0; foreach $ignorepath (@ignorepath) { if ($path =~ /^$ignorepath/i) { $found = 1; last; } } $found; } # -------------------------------------------------------------------------- sub focusonthisext { local($path) = @_; local($found) = 0; if (@focusonext == 0) { $found = 1; } else { foreach $focusonext (@focusonext) { if ($path =~ /$focusonext$/i) { $found = 1; last; } } } $found; } # -------------------------------------------------------------------------- sub ignorethisext { local($path) = @_; local($found) = 0; foreach $ignoreext (@ignoreext) { if ($path =~ /$ignoreext$/i) { $found = 1; last; } } $found; } # -------------------------------------------------------------------------- # SUB-ROUTINES - General # -------------------------------------------------------------------------- sub progressmeter { if ($progressmeter eq '-') {$progressmeter = '\\';} elsif ($progressmeter eq '\\') {$progressmeter = '|';} elsif ($progressmeter eq '|') {$progressmeter = '/';} else {$progressmeter = '-';} print "$progressmeter\b"; } # -------------------------------------------------------------------------- sub putcommasinnumber { local($n) = @_; local($result); local($remainder); local($string) = ""; do { $result = $n / 1000; $remainder = $n - (1000 * int($result)); $n = int($result); if (!$n) { $string = $remainder . $string if $remainder; } else { while (length($remainder) < 3) { $remainder = "0" . $remainder; } $string = "," . $remainder . $string; } } until !$n; $string; } # -------------------------------------------------------------------------- # for padding text & numbers to right-justify in column # arguments are string, width of column, and character to pad with sub rightjustifystring { local($string) = $_[0]; local($width) = $_[1]; local($leadingchar) = $_[2]; $leadingchar = ' ' if !$leadingchar; $width = length($string) if $width < length($number); for ($i = length($string); $i < $width; $i++) { $string = $leadingchar . $string; } return $string; } sub three_digits { local ($_) = @_; /(\d+)\.(\d*)/; $_ = $1; if (length($_) == 0) { $_ = " 0"; } elsif (length($_) == 1) { $_ = " ".$_; } elsif (length($_) == 2) { $_ = " ".$_; } elsif (length > 3) { die "Supplied a number of more than 2 digits to three_digits\n"; } $_; } # -------------------------------------------------------------------------- sub countries { $country{$homedomain} = "($homename)"; $country{"com"} = "US Commercial*"; $country{"edu"} = "US Educational*"; $country{"gov"} = "US Government*"; $country{"mil"} = "US Military*"; $country{"net"} = "US Network*"; $country{"org"} = "US Non-Commercial*"; $country{"int"} = "US International*"; $country{"arpa"} = "US Arpanet*"; $country{"nato"} = "US Nato*"; $country{"ad"} = "Andorra"; $country{"ae"} = "UAE"; $country{"af"} = "Afghanistan"; $country{"ag"} = "Antigua and Barbuda"; $country{"ai"} = "Anguilla"; $country{"al"} = "Albania"; $country{"am"} = "Armenia"; $country{"an"} = "Antilles"; $country{"ao"} = "Angola"; $country{"aq"} = "Antarctica"; $country{"ar"} = "Argentina"; $country{"as"} = "American Samoa"; $country{"at"} = "Austria"; $country{"au"} = "Australia"; $country{"aw"} = "Aruba"; $country{"az"} = "Azerbaijan"; $country{"ba"} = "Bosnia and Herzegovina"; $country{"bb"} = "Barbados"; $country{"bd"} = "Bangladesh"; $country{"be"} = "Belgium"; $country{"bf"} = "Burkina Faso"; $country{"bg"} = "Bulgaria"; $country{"bh"} = "Bahrain"; $country{"bi"} = "Burundi"; $country{"bj"} = "Benin"; $country{"bm"} = "Bermuda"; $country{"bn"} = "Brunei Darussalam"; $country{"bo"} = "Bolivia"; $country{"br"} = "Brazil"; $country{"bs"} = "Bahamas"; $country{"bt"} = "Bhutan"; $country{"bv"} = "Bouvet Island"; $country{"bw"} = "Botswana"; $country{"by"} = "Belarus"; $country{"bz"} = "Belize"; $country{"ca"} = "Canada"; $country{"cc"} = "Cocos (Keeling) Islands"; $country{"cf"} = "Central African Republic"; $country{"cg"} = "Congo"; $country{"ch"} = "Switzerland"; $country{"ci"} = "Cote D\'Ivoire (Ivory Coast)"; $country{"ck"} = "Cook Islands"; $country{"cl"} = "Chile"; $country{"cm"} = "Cameroon"; $country{"cn"} = "China"; $country{"co"} = "Colombia"; $country{"cr"} = "Costa Rica"; $country{"cs"} = "Czechoslovakia (former)"; $country{"cu"} = "Cuba"; $country{"cv"} = "Cape Verde"; $country{"cx"} = "Christmas Island"; $country{"cy"} = "Cyprus"; $country{"cz"} = "Czech Republic"; $country{"de"} = "Germany"; $country{"dj"} = "Djibouti"; $country{"dk"} = "Denmark"; $country{"dm"} = "Dominica"; $country{"do"} = "Dominican Republic"; $country{"dz"} = "Algeria"; $country{"ec"} = "Ecuador"; $country{"ee"} = "Estonia"; $country{"eg"} = "Egypt"; $country{"eh"} = "Western Sahara"; $country{"er"} = "Eritrea"; $country{"es"} = "Spain"; $country{"et"} = "Ethiopia"; $country{"fi"} = "Finland"; $country{"fj"} = "Fiji"; $country{"fk"} = "Falkland Islands (Malvinas)"; $country{"fm"} = "Micronesia"; $country{"fo"} = "Faroe Islands"; $country{"fr"} = "France"; $country{"fx"} = "France, Metropolitan"; $country{"ga"} = "Gabon"; $country{"gb"} = "Great Britain (UK)"; $country{"gd"} = "Grenada"; $country{"ge"} = "Georgia"; $country{"gf"} = "French Guiana"; $country{"gh"} = "Ghana"; $country{"gi"} = "Gibraltar"; $country{"gl"} = "Greenland"; $country{"gm"} = "Gambia"; $country{"gn"} = "Guinea"; $country{"gp"} = "Guadeloupe"; $country{"gq"} = "Equatorial Guinea"; $country{"gr"} = "Greece"; $country{"gs"} = "S. Georgia and S. Sandwich Islands"; $country{"gt"} = "Guatemala"; $country{"gu"} = "Guam"; $country{"gw"} = "Guinea-Bissau"; $country{"gy"} = "Guyana"; $country{"hk"} = "Hong Kong"; $country{"hm"} = "Heard and McDonald Islands"; $country{"hn"} = "Honduras"; $country{"hr"} = "Croatia (Hrvatska)"; $country{"ht"} = "Haiti"; $country{"hu"} = "Hungary"; $country{"id"} = "Indonesia"; $country{"ie"} = "Ireland"; $country{"il"} = "Israel"; $country{"in"} = "India"; $country{"io"} = "British Indian Ocean Territory"; $country{"iq"} = "Iraq"; $country{"ir"} = "Iran"; $country{"is"} = "Iceland"; $country{"it"} = "Italy"; $country{"jm"} = "Jamaica"; $country{"jo"} = "Jordan"; $country{"jp"} = "Japan"; $country{"ke"} = "Kenya"; $country{"kg"} = "Kyrgyzstan"; $country{"kh"} = "Cambodia"; $country{"ki"} = "Kiribati"; $country{"km"} = "Comoros"; $country{"kn"} = "Saint Kitts and Nevis"; $country{"kp"} = "Korea (North)"; $country{"kr"} = "Korea (South)"; $country{"kw"} = "Kuwait"; $country{"ky"} = "Cayman Islands"; $country{"kz"} = "Kazakhstan"; $country{"la"} = "Laos"; $country{"lb"} = "Lebanon"; $country{"lc"} = "Saint Lucia"; $country{"li"} = "Liechtenstein"; $country{"lk"} = "Sri Lanka"; $country{"lr"} = "Liberia"; $country{"ls"} = "Lesotho"; $country{"lt"} = "Lithuania"; $country{"lu"} = "Luxembourg"; $country{"lv"} = "Latvia"; $country{"ly"} = "Libya"; $country{"ma"} = "Morocco"; $country{"mc"} = "Monaco"; $country{"md"} = "Moldova"; $country{"mg"} = "Madagascar"; $country{"mh"} = "Marshall Islands"; $country{"mk"} = "Macedonia"; $country{"ml"} = "Mali"; $country{"mm"} = "Myanmar"; $country{"mn"} = "Mongolia"; $country{"mo"} = "Macau"; $country{"mp"} = "Northern Mariana Islands"; $country{"mq"} = "Martinique"; $country{"mr"} = "Mauritania"; $country{"ms"} = "Montserrat"; $country{"mt"} = "Malta"; $country{"mu"} = "Mauritius"; $country{"mv"} = "Maldives"; $country{"mw"} = "Malawi"; $country{"mx"} = "Mexico"; $country{"my"} = "Malaysia"; $country{"mz"} = "Mozambique"; $country{"na"} = "Namibia"; $country{"nc"} = "New Caledonia"; $country{"ne"} = "Niger"; $country{"nf"} = "Norfolk Island"; $country{"ng"} = "Nigeria"; $country{"ni"} = "Nicaragua"; $country{"nl"} = "Netherlands"; $country{"no"} = "Norway"; $country{"np"} = "Nepal"; $country{"nr"} = "Nauru"; $country{"nt"} = "Neutral Zone"; $country{"nu"} = "Niue"; $country{"nz"} = "New Zealand"; $country{"om"} = "Oman"; $country{"pa"} = "Panama"; $country{"pe"} = "Peru"; $country{"pf"} = "French Polynesia"; $country{"pg"} = "Papua New Guinea"; $country{"ph"} = "Philippines"; $country{"pk"} = "Pakistan"; $country{"pl"} = "Poland"; $country{"pm"} = "St. Pierre and Miquelon"; $country{"pn"} = "Pitcairn"; $country{"pr"} = "Puerto Rico"; $country{"pt"} = "Portugal"; $country{"pw"} = "Palau"; $country{"py"} = "Paraguay"; $country{"qa"} = "Qatar"; $country{"re"} = "Reunion"; $country{"ro"} = "Romania"; $country{"ru"} = "Russian Federation"; $country{"rw"} = "Rwanda"; $country{"sa"} = "Saudi Arabia"; $country{"sb"} = "Solomon Islands"; $country{"sc"} = "Seychelles"; $country{"sd"} = "Sudan"; $country{"se"} = "Sweden"; $country{"sg"} = "Singapore"; $country{"sh"} = "St. Helena"; $country{"si"} = "Slovenia"; $country{"sj"} = "Svalbard and Jan Mayen Islands"; $country{"sk"} = "Slovak Republic"; $country{"sl"} = "Sierra Leone"; $country{"sm"} = "San Marino"; $country{"sn"} = "Senegal"; $country{"so"} = "Somalia"; $country{"sr"} = "Suriname"; $country{"st"} = "Sao Tome and Principe"; $country{"su"} = "USSR (former)"; $country{"sv"} = "El Salvador"; $country{"sy"} = "Syria"; $country{"sz"} = "Swaziland"; $country{"tc"} = "Turks and Caicos Islands"; $country{"td"} = "Chad"; $country{"tf"} = "French Southern Territories"; $country{"tg"} = "Togo"; $country{"th"} = "Thailand"; $country{"tj"} = "Tajikistan"; $country{"tk"} = "Tokelau"; $country{"tm"} = "Turkmenistan"; $country{"tn"} = "Tunisia"; $country{"to"} = "Tonga"; $country{"tp"} = "East Timor"; $country{"tr"} = "Turkey"; $country{"tt"} = "Trinidad and Tobago"; $country{"tv"} = "Tuvalu"; $country{"tw"} = "Taiwan"; $country{"tz"} = "Tanzania"; $country{"ua"} = "Ukraine"; $country{"ug"} = "Uganda"; $country{"uk"} = "United Kingdom"; $country{"um"} = "US Minor Outlying Islands"; $country{"us"} = "USA"; $country{"uy"} = "Uruguay"; $country{"uz"} = "Uzbekistan"; $country{"va"} = "Vatican City State"; $country{"vc"} = "Saint Vincent and the Grenadines"; $country{"ve"} = "Venezuela"; $country{"vg"} = "Virgin Islands (British)"; $country{"vi"} = "Virgin Islands (U.S.)"; $country{"vn"} = "Viet Nam"; $country{"vu"} = "Vanuatu"; $country{"wf"} = "Wallis and Futuna Islands"; $country{"ws"} = "Samoa"; $country{"ye"} = "Yemen"; $country{"yt"} = "Mayotte"; $country{"yu"} = "Yugoslavia"; $country{"za"} = "South Africa"; $country{"zm"} = "Zambia"; $country{"zr"} = "Zaire"; $country{"zw"} = "Zimbabwe"; } # --------- *** added by CE to provide date information on stats ----------- sub uptonow { # *** added by CE, sort out today's date & time ($sec,$min,$hour,$mday,$mon,$year,$wday, $yday, $isdat) = localtime(time); $day_now = (Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday)[$wday]; $mon_now = (Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec)[$mon]; $thismon = $mon + 1; if (($mday % 10) == 1) { $ord = "st"; } elsif (($mday % 10) == 2) { $ord = "nd"; } elsif (($mday % 10) == 3) { $ord = "rd"; } else { $ord = "th"; } $dat_now = $mday . $ord; $uptonow = "log files last checked: ".$hour.":".$min.":".$sec.", ".$day_now.", the ".$dat_now." of ".$mon_now." ".$year; } # sub uptonow # --------- *** added by CE to deal with error classification -------------- sub error_lab { $err_lab{"300"} = "Multiple choices"; $err_lab{"301"} = "Moved permanently"; $err_lab{"302"} = "Moved temporarily"; $err_lab{"304"} = "Not modified (I contend this isn't an error and don't add it to error total)"; $err_lab{"400"} = "Bad request"; $err_lab{"401"} = "Unauthorized"; $err_lab{"403"} = "Forbidden"; $err_lab{"404"} = "Not found"; $err_lab{"500"} = "Internal server error"; $err_lab{"501"} = "Not implemented"; $err_lab{"502"} = "Bad gateway"; $err_lab{"503"} = "Service unavailable"; $errc_lab{"3"} = "Redirection"; $errc_lab{"4"} = "Client error"; $errc_lab{"5"} = "Server error"; } # --------- *** moved here by CE to make easier to read ------------------- sub getdomains { # read in all known domain names to save looking them up again if (open (DOMAINS, "$dnsnames")) { print "\nReading domain names from:\n $dnsnames\n\n"; while () { chop; ($ipaddress, $dnsname) = split(/\t/); $dnsname{$ipaddress} = $dnsname; } close DOMAINS; } else { print "\nCannot read from DNS names file $dnsnames!"; } } # sub getdomains # --------- *** moved here by CE to make easier to read ------------------- sub writedomains { # write out all the domain names to save looking them up again print "\nWriting domain names back to:\n $dnsnames\n"; if (open (DOMAINS, ">$dnsnames")) { foreach $ipaddress (keys(%dnsname)) { print DOMAINS "$ipaddress\t$dnsname{$ipaddress}\n"; } close DOMAINS; } else { print "\nCannot write to DNS names file $dnsnames\n"; } } # sub writedomains sub getdata { # stores all size data in one associative array open(FIND, "find . -name \"*.*\" -print |") || die "Couldn't run find: $!\n"; while ($filename = ) { undef $size; chop $filename; # get data on the file ($nsize,$time) = (stat($filename))[7,9]; #use the 7th & 9th items in the stat array # reformat the size if ($nsize <= 1000) { $size = "<1k"; } elsif ($nsize < 1000000) { $size = int($nsize/1000); $size = $size."kb"; } else { $size = int($nsize/1000000); $size = $size."Mb"; } # reformat the filename to the http server form $href = $filename; $href =~ s|.|$startdir|o; # put this associative array for future use $size{$href} = $size; } close TIME; } # &getdata sub two_digits { local ($_) = @_; if (length($_) == 0) { $_ = "00"; } elsif (length($_) == 1) { $_ = "0".$_; } elsif (length($_) > 2) { die "Supplied a number of more than 2 digits to two_digits\n"; } $_; } # --------- *** moved here by CE to make easier to duplicate -------------- sub crunchlog { # debug or real data? if ($debug) { print "\n**** DEBUG MODE RUN ****\n"; open (INPUT,$debuglog) || die "cannot open debug.log"; print " \nInputting from:\n $debuglog\n"; $logtype = "p"; $logtype =~ tr/a-z/A-Z/; # process log file line by line while () { if ($verbosemode) { print ".";} # *** I'm commenting this out as I don't think I need it # *** else { &progressmeter; } $lastentry = 0; &doentry; } } else { # process each log file one at a time foreach $logfile (@logfiles) { open (INPUT, "$inputdir$logfile") || die "cannot open $logfile"; print " \nInputting from:\n $inputdir$logfile"; $logtype = substr($logfile, 0, 1); $logtype =~ tr/a-z/A-Z/; # process log file line by line while () { if ($verbosemode) { print ".";} # *** I'm commenting this out as I don't think I need it # *** else { &progressmeter; } $lastentry = 0; &doentry; } # get next entry } # process next log file } # write details from current month to file print " \nFinished reading log files from:\n $inputdir$logfile\n"; if (!$extractonly) { $lastentry = 1; &newmonth; # *** added by CE, now print out the totals, before quitting $topfiles = 0; &total; &totsummary; &errsummary; } print "\n"; close ERRLOG; close PSYLOG; } # sub crunchlog # --------- *** moved here by CE to make easier to duplicate -------------- sub doentry { chop; # split fields separated by spaces $bytes = 0; if ($commonlogformat) { ($bit1, $bit2, $bit3) = split(/[\[\]]/); ($datetime, $zone) = split(/[ ]+/,$bit2); ($date, $hr, $min, $sec) = split(/:/,$datetime); ($mday, $month, $year) = split (/\//,$date); ($ip, $logonid, $authname) = split(/[ ]+/,$bit1); ($tmp,$bit3a,$bit3b) = split(/"/,$bit3); ($method, $file, $version) = split(/[ ]+/,$bit3a); ($tmp, $status, $bytes) = split(/[ ]+/,$bit3b); $ip =~ tr/A-Z/a-z/; # convert dns name to lowercase } else { ($wday, $month, $mday, $time, $year, $myip, $ip, $method, $file) = split; } $file =~ tr/A-Z/a-z/ if $filenamestolowercase; # *** added by CE to see if this will get rid of odd things that turn up in the log # may inflate the figures but I suspect that a lot of these things are handled # successfully by the browser/server $file =~ tr/"//d; $file =~ s/\.html/\.htm/; $file =~ s/>.*//; # if this entry is not one that we are ignoring # and if it is one that we are focussing on # then process it if ( !&ignorethisip($ip) && !&ignorethispath($file) && !&ignorethisext($file) && &focusonthisip($ip) && &focusonthispath($file) && &focusonthisext($file) ) { # gopher log has one less field than http log $file = $method if $logtype =~ /^g/i; # look up domain for this ip address $domain = &addname($ip); if ($extractonly) { # log the record regardless of error status print PSYLOG "$_\n"; } else { # if it was a failed access, put it in the error log $stat_class = int($status/100); if ($stat_class != 2) { print ERRLOG "$_\n"; } else { print PSYLOG "$_\n"; } # also compile the statistics if not just extracting the data if ($stat_class != 2) { # sort out the error counting if ($status != 304) { $error++; # @@@@@@@ this is where to add compilation of errlog.htm } else { $e304++; } $errcl{$stat_class}++; $error{$status}++; $toterrcl{$stat_class}++; $toterror{$status}++; if (!$errc_lab{$stat_class}) { $errc_lab{$stat_class} = "Unclassified error type"; } if (!$err_lab{$status}) { $err_lab{$status} = "Unclassified error type"; } } if (($stat_class == 2) || ($stat_class == 304)) { # not an error according to me # so add this access to cumulative counts # 3 separate counts are kept - overall, home and outside $access++; # note that e304 are added separately, see above $totalbytes += $bytes; $file{$file}++; $ip{$ip}++; if (&ishomeip($ip)) { $homeaccess++; $homebytes += $bytes; $homefile{$file}++; $homeip{$ip}++; $domain = $homedomain; } else { $outsideaccess++; $outsidebytes += $bytes; $outsidefile{$file}++; $outsideip{$ip}++; } $domain{$domain}++; } # end of # if month in current log entry not equal to month # being processed then it is the start of a new month if (($currentmonth ne $month) || ($currentlogtype ne $logtype)) { &newmonth; $currentmonth = $month; $currentyear = $year; $currentlogtype = $logtype; } } # end of if (!&extractonly) } # end of processing entry } # sub doentry # -------------------------------------------------------------------------- # --------- *** moved here by CE to make easier to read ------------------- sub config { # -------------------------------------------------------------------------- # CONFIGURE THESE VARIABLES # # File and path names should be absolute (e.g. C:/LOGS/) # or relative to current directory (e.g. ./LOGS/) # In Perl for Windows NT use forward slash (not backslash) # in pathnames (e.g. C:/LOGS/) # Directory names must always end with trailing slash # (e.g. ./ for current directory - don't just leave blank) # # Lists of items consist of a list surrounded by brackets, with # each item in the list separated by commas # e.g. @ignoreip = ('158.143.104.174', '158.143.104.209'); # Lists of IP addresses can also include DNS names # e.g. @homeip = ('158.143', 'lse.ac.uk'); # # Flags can be set to 1 (meaning 'true' or 'enabled') or # set to 0 or not defined (meaning 'false' or 'disabled') # @homeip -- list of ip addresses of local users (e.g. ('158.143.') ) # $homedomain -- domain of local users (e.g. lse.ac.uk) # # $homename -- name of home site (e.g. LSE, London School of Economics) # $servername -- name of server (not always the same as the name of # the home site - may be name of PART of overall server # e.g. Law Department) # # $topfiles -- present statistics for this many most popular files (e.g. 20) # $topips -- present statistics for this many most attentive ip addresses # # $dnsnames -- file containing DNS names (e.g. ./dnsnames.lst) # # $inputdir -- directory containing https & gophers log files (e.g. /logs/) # $outputdir -- directory to write usage statistics reports (e.g. ./) # # @ignoreip -- list of IP addresses to ignore (e.g. ('158.143.104.174') ) # @myownip -- list of IP addresses which are administrative accesses # $i_ipcomment -- comment about excluded IP addresses for reports # @ignorepath -- list of paths/files to ignore (e.g. () ) # @ignoreext -- list of file extensions to ignore (e.g. ('.gif') # @focusonip -- list of IP addresses to focus on (e.g. () ) # @focusonpath -- list of paths/files to focus on (e.g. () ) # $f_pathcomment -- comment abut path focus for reports # @focusonext -- list of file extensions to focus on (e.g. ('.htm') # # $verbosemode -- flag - disable if you don't want the detailed output # musage gives to indicate what it's doing # $lookupdnsnames -- flag - disable if you don't want to look up dnsname of # each ip address accessing your server # $commonlogformat -- flag - disable for use with EMWAC's free server, enable # to use MUSAGE with logs from an http server that uses # the common log format (e.g. the professional version # of EMWAC's server) # $filenamestolowercase # -- flag - enable to convert all filenames (URLs) to lower # case before processing them # $logfilepattern -- the pattern used to recognise log files (e.g. EMWAC's # server's log files match *.log - translate this into # a Perl regular expression and you get .+\.log) # # %servertype -- PERL associative array - MUSAGE looks at the # first letter of the log file to deduce what kind # of server it comes from (e.g. EMWAC gopher, WWW # & WAIS logs start with g, h & w respectively; # CERN3 httpd proxy, access & cache logs start # with p, a & c respectively) # $debug -- invokes a debugging run on a small file # $debuglog -- name of a debugging file $debug = 1; $debuglog = "/usr1/www/pages/mhs/psychotherapy/wwwstats/debug.log"; $dnsnames = "/usr1/www/pages/mhs/psychotherapy/wwwstats/dnsnames.lst"; @homeip = ("192.153", "sghms.ac.uk"); $homedomain = ("sghms.ac.uk"); $homename = "SGHMS"; $servername = "SGHMS Psychotherapy"; @ignorepath = (); @ignoreext = (); @focusonip = (); @myownip = ("194.80.201.27","cevanpc.sghms.ac.uk"); @focusonext = (); $verbosemode = 0; $lookupdnsnames = 1; $commonlogformat = 1; $filenamestolowercase = 0; # this block are default settings usually overridden by musage.set data $topfiles = 50; $topips = 30; $inputdir = "/usr1/www/pages/psychotherapy/wwwstats/"; $outputdir = "/dev/null/"; @focusonpath = ("/mhs/psychotherapy/"); $f_pathcomment = "files in any Psychotherapy Section directories considered"; @ignoreip = ("194.80.201.27","cevanpc.sghms.ac.uk","infoserv.sghms.ac.uk","192.153.12.1","192.153.12.30"); $i_ipcomment = "does not include my own accesses as administrator"; $servertype{"g"} = "Gopher"; $servertype{"h"} = "WWW"; $servertype{"w"} = "WAIS"; $servertype{"p"} = "Proxy"; $servertype{"a"} = "Access"; $servertype{"c"} = "Cache"; $logfilepattern = "ptotal.log"; # CE shifted this around to deal with local arrangements $psylog = "psylog.log"; $errlog = "errlog.log"; # *** added by CE to get date of statistics run into the output $dated = 1; # *** the next three variables contain text to go at the end of the menu, summary and error files # *** respectively $btm_m = < File created using Perl script: musage3.prl adapted by Chris Evans C.Evans@sghms.ac.uk from MUSAGE.TXT by Nick Philips END $btm_s = < A complete usage is also available.
          File created using Perl script: musage3.prl adapted by Chris Evans C.Evans@sghms.ac.uk from MUSAGE.TXT by Nick Philips END $btm_e = < An error log is available.
          File created using Perl script: musage3.prl adapted by Chris Evans C.Evans@sghms.ac.uk from MUSAGE.TXT by Nick Philips END } # sub config __END__ :endofperl