@rem = '-*- Perl -*-'; @rem = ' @echo off perl -S %0.cmd %1 %2 %3 %4 %5 %6 %7 %8 %9 goto endofperl '; # (Last update 1st May 1995 - Nick Phillips) # # musage.txt --- script to calculate monthly usage statistics from # log files generate by the EMWAC Windows NT World # Wide Web and gopher servers (https and gophers). # # Copyright (c) 1995 Nick Phillips (N.R.Phillips@lse.ac.uk) # # NB/. I'm leaving the LSE at the end of May so won't be contactable # at this email address from then onwards. Don't yet have an alternative # address. # # Configuration variables are hard-coded in # (see the section "CONFIGURE THESE VARIABLES"). # # File and path names should be absolute (e.g. C:/LOGS/) # or relative to current directory (e.g. ./LOGS/) # # Directory names must always end with trailing backslash # (e.g. ./ for current directory - don't just leave blank) # # In Perl for Windows NT use forward slash (not backslash) # in pathnames (e.g. C:/LOGS/) # # Uses the gethostbyaddr() function to look up the DNS name # corresponding to the IP addresses of users recorded in the logs - # this will only work if your version of Perl supports this function. # It is best to run this at a time when the Internet is quiet as it # could end up doing lots of DNS lookups. # # If you rename the script to MUSAGE.CMD then on an NT system you will # be able to run it from the command line (i.e. you will be able to just # type MUSAGE, rather than PERL MUSAGE.TXT) - note that MUSAGE.CMD is # a batch file (with Perl script embedded in it), so if you invoke it # from another batch file you will have to do CALL MUSAGE in order for # control to return to the original batch file. # # -------------------------------------------------------------------------- # CONFIGURE THESE VARIABLES # # File and path names should be absolute (e.g. C:/LOGS/) # or relative to current directory (e.g. ./LOGS/) # # In Perl for Windows NT use forward slash (not backslash) # in pathnames (e.g. C:/LOGS/) # # Directory names must always end with trailing slash # (e.g. ./ for current directory - don't just leave blank) # # Lists of items consist of a list surrounded by brackets, with # each item in the list separated by commas # e.g. @ignoreip = ('158.143.104.174', '158.143.104.209'); # # Lists of IP addresses can also include DNS names # e.g. @homeip = ('158.143', 'lse.ac.uk'); # # Flags can be set to 1 (meaning 'true' or 'enabled') or # set to 0 or not defined (meaning 'false' or 'disabled') # @homeip -- list of ip addresses of local users (e.g. ('158.143.') ) # $homedomain -- domain of local users (e.g. lse.ac.uk) # # $homename -- name of home site (e.g. LSE, London School of Economics) # $servername -- name of server (not always the same as the name of # the home site - may be name of PART of overall server # e.g. Law Department) # # $top -- analyse top nn statistics (e.g. 20) # # $dnsnames -- file containing DNS names (e.g. ./dnsnames.lst) # # $inputdir -- directory containing https & gophers log files (e.g. /logs/) # $outputdir -- directory to write usage statistics reports (e.g. ./) # # @ignoreip -- list of IP addresses to ignore (e.g. ('158.143.104.174') ) # @ignorepath -- list of paths/files to ignore (e.g. () ) # @ignoreext -- list of file extensions to ignore (e.g. ('.gif') # @focusonip -- list of IP addresses to focus on (e.g. () ) # @focusonpath -- list of paths/files to focus on (e.g. () ) # @focusonext -- list of file extensions to focus on (e.g. ('.htm') # # $verbosemode -- flag - disable if you don't want the detailed output # musage gives to indicate what it's doing # $lookupdnsnames -- flag - disable if you don't want to look up dnsname of # each ip address accessing your server # $commonlogformat -- flag - disable for use with EMWAC's free server, enable # to use MUSAGE with logs from an http server that uses # the common log format (e.g. the professional version # of EMWAC's server) # $filenamestolowercase # -- flag - enable to convert all filenames (URLs) to lower # case before processing them # $logfilepattern -- the pattern used to recognise log files (e.g. EMWAC's # server's log files match *.log - translate this into # a Perl regular expression and you get .+\.log) # # %servertype -- PERL associative array - MUSAGE looks at the # first letter of the log file to deduce what kind # of server it comes from (e.g. EMWAC gopher, WWW # & WAIS logs start with g, h & w respectively; # CERN3 httpd proxy, access & cache logs start # with p, a & c respectively) @homeip = ("192.153", "sghms.ac.uk"); $homedomain = ("sghms.ac.uk"); $homename = "SGHMS"; $servername = "Infoserv"; $top = 20; $dnsnames = "./dnsnames.lst"; $inputdir = "/usr1/www/log/"; $outputdir = "/usr1/www/pages/mhs/psychotherapy/"; @ignoreip = ("194.80.201.27"); @ignorepath = (); @ignoreext = (".gif"); @focusonip = (); @focusonpath = ("/mhs/psychotherapy/"); @focusonext = (); $verbosemode = 0; $lookupdnsnames = 1; $commonlogformat = 1; $filenamestolowercase = 1; $logfilepattern = "proxy-log.log"; # log files match *.log $servertype{"g"} = "Gopher"; $servertype{"h"} = "WWW"; $servertype{"w"} = "WAIS"; $servertype{"p"} = "Proxy"; $servertype{"a"} = "Access"; $servertype{"c"} = "Cache"; # -------------------------------------------------------------------------- print "\nMUSAGE --- script to calculate monthly usage statistics from\n"; print " v1.4 log files generated by the EMWAC Windows NT World\n"; print " Wide Web and gopher servers (https and gophers).\n"; print "\nCopyright (c) 1995 Nick Phillips\n"; # initialise list of names of countries (e.g. uk = United Kingdom etc.) &countries; # read in all known domain names to save looking them up again if ($lookupdnsnames) { if (open (DOMAINS, "$dnsnames")) { print "\nReading domain names from $dnsnames . . ."; while () { chop; ($ipaddress, $dnsname) = split(/\t/); $dnsname{$ipaddress} = $dnsname; } close DOMAINS; } else { print "\nCannot read from DNS names file $dnsnames!"; } } # get all log files from current directory sorted by filename # (extract only those files that match $logfilepattern) opendir(LOGDIR, "$inputdir") || die "\nCannot open input directory $inputdir"; print "\nReading log files from directory $inputdir . . ."; @logfiles = sort(grep(/$logfilepattern/i, readdir(LOGDIR))); closedir(LOGDIR); print "\nReading files . . . "; # process each log file one at a time foreach $logfile (@logfiles) { open (INPUT, "$inputdir$logfile") || die "cannot open $logfile"; print " \n$inputdir$logfile "; $logtype = substr($logfile, 0, 1); $logtype =~ tr/a-z/A-Z/; # process log file line by line while () { if ($verbosemode) { print ".";} else { &progressmeter; } chop; # split fields separated by spaces $bytes = 0; if ($commonlogformat) { ($ip, $logonid, $authname, $datetime, $zone, $method, $file, $version, $status, $bytes) = split; ($mday, $month, $year) = ($datetime =~ /\[(\d*)\/(\w*)\/(\d*)/); $ip =~ tr/A-Z/a-z/; # convert dns name to lowercase } else { ($wday, $month, $mday, $time, $year, $myip, $ip, $method, $file) = split; } $file =~ tr/A-Z/a-z/ if $filenamestolowercase; # if this entry is not one that we are ignoring # and if it is one that we are focussing on # then process it if ( !&ignorethisip($ip) && !&ignorethispath($file) && !&ignorethisext($file) && &focusonthisip($ip) && &focusonthispath($file) && &focusonthisext($file) ) { # gopher log has one less field than http log $file = $method if $logtype =~ /^g/i; # if month in current log entry not equal to month # being processed then it is the start of a new month if (($currentmonth ne $month) || ($currentlogtype ne $logtype)) { &newmonth; $currentmonth = $month; $currentyear = $year; $currentlogtype = $logtype; } # look up domain for this ip address $domain = &addname($ip); # add this access to culmulative counts # 3 separate counts are kept - overall, home and outside $access++; $totalbytes += $bytes; $file{$file}++; $ip{$ip}++; if (&ishomeip($ip)) { $homeaccess++; $homebytes += $bytes; $homefile{$file}++; $homeip{$ip}++; $domain = $homedomain; } else { $outsideaccess++; $outsidebytes += $bytes; $outsidefile{$file}++; $outsideip{$ip}++; } $domain{$domain}++; } # end of processing entry } # get next entry } # process next log file # write details from current month to file, before quitting print " \nFinishing . . ."; &newmonth; print "\n"; # write out all the domain names to save looking them up again if ($lookupdnsnames) { if (open (DOMAINS, ">$dnsnames")) { foreach $ipaddress (keys(%dnsname)) { print DOMAINS "$ipaddress\t$dnsname{$ipaddress}\n"; } close DOMAINS; } else { print "\nCannot write to DNS names file $dnsnames\n"; } } # -------------------------------------------------------------------------- # SUB-ROUTINES # -------------------------------------------------------------------------- sub newmonth { # write report then undefine all the culmulative total variables &summaryofmonth if $currentmonth; undef %domain; undef $access; undef $totalbytes; undef %file; undef %ip; undef $homeaccess; undef $homebytes; undef %homefile; undef %homeip; undef $outsideaccess; undef $outsidebytes; undef %outsidefile; undef %outsideip; undef $currentmonth; undef $currentyear; undef $currentlogtype; } # &newmonth # -------------------------------------------------------------------------- sub summaryofmonth { # make a new report file for that month # filename e.g. 1994-11.HTM ($_) = grep(/^$currentmonth/i, January01, February02, March03, April04, May05, June06, July07, August08, September09, October10, November11, December12); ($monthname, $mon) = /(\w+)(\d\d)$/; $report = "$currentlogtype$currentyear-$mon"; $servertype = $servertype{$currentlogtype}; open (SUMMARY, ">$outputdir$report.HTM") || die "cannot open $outputdir$report.HTM"; print SUMMARY "\n"; print SUMMARY "$servername $servertype server usage log $monthname $currentyear\n"; print SUMMARY "\n\n"; print SUMMARY "

$servername $servertype server usage log

\n"; print SUMMARY "

$monthname $currentyear

\n"; print SUMMARY "[Top $top files, "; print SUMMARY "sites, "; print SUMMARY "domains]\n"; if (@focusonip != 0) { print SUMMARY "

Focussing on accesses from the following IP addresses

\n"; print SUMMARY "\n"; } if (@focusonpath != 0) { print SUMMARY "

Focussing on accesses to the following paths or files

\n"; print SUMMARY "\n"; } print SUMMARY "

Total number of accesses

\n"; print SUMMARY "Every time a document is retrieved counts as one access.\n"; print SUMMARY "
";

    $length = length($homename) + 12; $length = 20 if $length < 20;

    printf SUMMARY "     %-${length}s %18s\n", "Overall total:", $access;

    printf SUMMARY "     %-${length}s %18s\n", "$homename users:", $homeaccess;

    printf SUMMARY "     %-${length}s %18s\n", "Outside users:", $outsideaccess;

    printf SUMMARY "\n";

    print SUMMARY "
\n\n"; print SUMMARY "

Total number of users

\n"; print SUMMARY "Each different IP address that accesses the server\n"; print SUMMARY "counts as one user.\n"; print SUMMARY "
";

    $length = length($homename) + 12; $length = 20 if $length < 20;

    printf SUMMARY "     %-${length}s %18s\n", "Overall total:",

        ($count = keys(%ip));

    printf SUMMARY "     %-${length}s %18s\n", "$homename users:",

        ($count = keys(%homeip));

    printf SUMMARY "     %-${length}s %18s\n", "Outside users:",

        ($count = keys(%outsideip));

    print SUMMARY "
\n\n"; if ($totalbytes) { print SUMMARY "

Total number of bytes transferred

\n"; print SUMMARY "
";

        $length = length($homename) + 12; $length = 20 if $length < 20;

        printf SUMMARY "     %-${length}s %18s\n", "Overall total:",

            &putcommasinnumber($totalbytes);

        printf SUMMARY "     %-${length}s %18s\n", "$homename users:",

            &putcommasinnumber($homebytes);

        printf SUMMARY "     %-${length}s %18s\n", "Outside users:",

            &putcommasinnumber($outsidebytes);

        print SUMMARY "
\n\n"; } print SUMMARY "
\n"; print SUMMARY "

Top $top files

\n"; print SUMMARY "This is based on the number of times each individual\n"; print SUMMARY "file is accessed.\n\n"; &topfile; print SUMMARY "
\n"; print SUMMARY "

Top $top sites

\n"; print SUMMARY "This is based on the numbers of documents accessed\n"; print SUMMARY "by users calling from each IP address\n\n"; &topip; print SUMMARY "
\n"; print SUMMARY "

Top $top domains

"; print SUMMARY "This is based on the numbers of documents accessed\n"; print SUMMARY "by users calling from each domain\n\n"; &topdomain; print SUMMARY "

A complete usage log for\n"; print SUMMARY "this month is also available.

\n"; print SUMMARY "
\n"; print SUMMARY "Statistics generated using \n"; print SUMMARY "MUSAGE Perl script.\n"; print SUMMARY "\n"; close SUMMARY; # print culmulative report and clear culmulative totals open (SUMMARY, ">$outputdir$report.TXT") || die "cannot open $report.TXT"; print SUMMARY "Complete $servername $servertype usage log\n\n"; print SUMMARY "$monthname $currentyear\n\n"; if (@focusonip != 0) { print SUMMARY "Focussing on accesses from the following IP addresses:-\n"; foreach $focusonip (@focusonip) { print SUMMARY " $focusonip\n"; } print SUMMARY "\n"; } if (@focusonpath != 0) { print SUMMARY "Focussing on accesses to the following paths or files:-\n"; foreach $focusonpath (@focusonpath) { print SUMMARY " $focusonpath\n"; } print SUMMARY "\n"; } print SUMMARY "Focussing on $focus\n\n" if $focus ne ''; print SUMMARY "Total number of accesses: $access\n"; print SUMMARY "Total number of users: ", ($count = keys(%ip)), "\n"; print SUMMARY "Total number of bytes: ", &putcommasinnumber($totalbytes), "\n"; print SUMMARY "\nBy file\n"; print SUMMARY "=======\n"; foreach $file (sort byfilename keys(%file)) { printf SUMMARY " %9s %-s\n", $file{$file}, $file; } print SUMMARY "\nBy ip\n"; print SUMMARY "=====\n"; foreach $ip (sort byip keys(%ip)) { printf SUMMARY " %9s %-18s %-s\n", $ip{$ip}, $ip, $dnsname{$ip}; } print SUMMARY "\nBy domain\n"; print SUMMARY "=========\n"; foreach $domain (sort bydnsname keys(%domain)) { printf SUMMARY " %9s %-15s %-s\n", $domain{$domain}, $domain, $country{$domain}; } print SUMMARY "\n (* = mainly US based)"; close SUMMARY; # if menu document does not exist, starts a new one # looks to see if this report file has already been added to menu # looks for