@rem = '-*- Perl -*-'; @rem = ' @echo off perl -S %0.cmd %1 %2 %3 %4 %5 %6 %7 %8 %9 goto endofperl '; # (Last update 1st May 1995 - Nick Phillips) # # musage.txt --- script to calculate monthly usage statistics from # log files generate by the EMWAC Windows NT World # Wide Web and gopher servers (https and gophers). # # Copyright (c) 1995 Nick Phillips (N.R.Phillips@lse.ac.uk) # NB/. I'm leaving the LSE at the end of May so won't be contactable # at this email address from then onwards. Don't yet have an alternative # address. # # Configuration variables are hard-coded in # (see the section "CONFIGURE THESE VARIABLES"). # # File and path names should be absolute (e.g. C:/LOGS/) # or relative to current directory (e.g. ./LOGS/) # Directory names must always end with trailing backslash # (e.g. ./ for current directory - don't just leave blank) # In Perl for Windows NT use forward slash (not backslash) # in pathnames (e.g. C:/LOGS/) # # Uses the gethostbyaddr() function to look up the DNS name # corresponding to the IP addresses of users recorded in the logs - # this will only work if your version of Perl supports this function. # It is best to run this at a time when the Internet is quiet as it # could end up doing lots of DNS lookups. # # If you rename the script to MUSAGE.CMD then on an NT system you will # be able to run it from the command line (i.e. you will be able to just # type MUSAGE, rather than PERL MUSAGE.TXT) - note that MUSAGE.CMD is # a batch file (with Perl script embedded in it), so if you invoke it # from another batch file you will have to do CALL MUSAGE in order for # control to return to the original batch file. # # -------------------------------------------------------------------------- # CONFIGURE THESE VARIABLES # # File and path names should be absolute (e.g. C:/LOGS/) # or relative to current directory (e.g. ./LOGS/) # In Perl for Windows NT use forward slash (not backslash) # in pathnames (e.g. C:/LOGS/) # Directory names must always end with trailing slash # (e.g. ./ for current directory - don't just leave blank) # # Lists of items consist of a list surrounded by brackets, with # each item in the list separated by commas # e.g. @ignoreip = ('158.143.104.174', '158.143.104.209'); # Lists of IP addresses can also include DNS names # e.g. @homeip = ('158.143', 'lse.ac.uk'); # # Flags can be set to 1 (meaning 'true' or 'enabled') or # set to 0 or not defined (meaning 'false' or 'disabled') # @homeip -- list of ip addresses of local users (e.g. ('158.143.') ) # $homedomain -- domain of local users (e.g. lse.ac.uk) # # $homename -- name of home site (e.g. LSE, London School of Economics) # $servername -- name of server (not always the same as the name of # the home site - may be name of PART of overall server # e.g. Law Department) # # $topfiles -- present statistics for this many most popular files (e.g. 20) # $topips -- present statistics for this many most attentive ip addresses # # $dnsnames -- file containing DNS names (e.g. ./dnsnames.lst) # # $inputdir -- directory containing https & gophers log files (e.g. /logs/) # $outputdir -- directory to write usage statistics reports (e.g. ./) # # @ignoreip -- list of IP addresses to ignore (e.g. ('158.143.104.174') ) # $i_ipcomment -- comment about excluded IP addresses for reports # @ignorepath -- list of paths/files to ignore (e.g. () ) # @ignoreext -- list of file extensions to ignore (e.g. ('.gif') # @focusonip -- list of IP addresses to focus on (e.g. () ) # @focusonpath -- list of paths/files to focus on (e.g. () ) # $f_pathcomment -- comment abut path focus for reports # @focusonext -- list of file extensions to focus on (e.g. ('.htm') # # $verbosemode -- flag - disable if you don't want the detailed output # musage gives to indicate what it's doing # $lookupdnsnames -- flag - disable if you don't want to look up dnsname of # each ip address accessing your server # $commonlogformat -- flag - disable for use with EMWAC's free server, enable # to use MUSAGE with logs from an http server that uses # the common log format (e.g. the professional version # of EMWAC's server) # $filenamestolowercase # -- flag - enable to convert all filenames (URLs) to lower # case before processing them # $logfilepattern -- the pattern used to recognise log files (e.g. EMWAC's # server's log files match *.log - translate this into # a Perl regular expression and you get .+\.log) # # %servertype -- PERL associative array - MUSAGE looks at the # first letter of the log file to deduce what kind # of server it comes from (e.g. EMWAC gopher, WWW # & WAIS logs start with g, h & w respectively; # CERN3 httpd proxy, access & cache logs start # with p, a & c respectively) @homeip = ("192.153", "sghms.ac.uk"); $homedomain = ("sghms.ac.uk"); $homename = "SGHMS"; $servername = "SGHMS Psychotherapy Section"; $topfiles = 50; $topips = 30; $dnsnames = "./dnsnames.lst"; $inputdir = "/usr1/www/pages/mhs/psychotherapy/wwwstats/"; $outputdir = "/usr1/www/pages/mhs/psychotherapy/info/"; @ignoreip = ("194.80.201.27","cevanpc.sghms.ac.uk"); $i_ipcomment = "does not include my own accesses as administrator"; @ignorepath = (); @ignoreext = (".gif"); @focusonip = (); @focusonpath = ("/mhs/psychotherapy/info/cassel2.htm"); $f_pathcomment = "only files in INFO directory considered"; @focusonext = (); $verbosemode = 0; $lookupdnsnames = 0; $commonlogformat = 1; $filenamestolowercase = 0; $logfilepattern = "psylog.log"; # log files match *.log $servertype{"g"} = "Gopher"; $servertype{"h"} = "WWW"; $servertype{"w"} = "WAIS"; $servertype{"p"} = "Proxy"; $servertype{"a"} = "Access"; $servertype{"c"} = "Cache"; $mylog = "psylog.log"; open (MYLOG, ">$outputdir$mylog") || die "cannot open $outputdir$mylog"; # -------------------------------------------------------------------------- # *** added by CE to get date of statistics run into the output $uptonow = 0; print "\nMUSAGE --- script to calculate monthly usage statistics from\n"; print " v1.4 log files generated by the EMWAC Windows NT World\n"; print " Wide Web and gopher servers (https and gophers).\n"; print "\nCopyright (c) 1995 Nick Phillips\n"; # initialise list of names of countries (e.g. uk = United Kingdom etc.) &countries; # read in all known domain names to save looking them up again if ($lookupdnsnames) { if (open (DOMAINS, "$dnsnames")) { print "\nReading domain names from $dnsnames . . ."; while () { chop; ($ipaddress, $dnsname) = split(/\t/); $dnsname{$ipaddress} = $dnsname; } close DOMAINS; } else { print "\nCannot read from DNS names file $dnsnames!"; } } # get all log files from current directory sorted by filename # (extract only those files that match $logfilepattern) opendir(LOGDIR, "$inputdir") || die "\nCannot open input directory $inputdir"; print "\nReading log files from directory $inputdir . . ."; @logfiles = sort(grep(/$logfilepattern/i, readdir(LOGDIR))); closedir(LOGDIR); print "\nReading files . . . "; # process each log file one at a time foreach $logfile (@logfiles) { open (INPUT, "$inputdir$logfile") || die "cannot open $logfile"; print " \n$inputdir$logfile "; $logtype = substr($logfile, 0, 1); $logtype =~ tr/a-z/A-Z/; # process log file line by line while () { if ($verbosemode) { print ".";} # *** # *** I'm commenting this out as I don't think I need it # *** else { &progressmeter; } # *** chop; # split fields separated by spaces $bytes = 0; if ($commonlogformat) { ($ip, $logonid, $authname, $datetime, $zone, $method, $file, $version, $status, $bytes) = split; ($mday, $month, $year) = ($datetime =~ /\[(\d*)\/(\w*)\/(\d*)/); $ip =~ tr/A-Z/a-z/; # convert dns name to lowercase } else { ($wday, $month, $mday, $time, $year, $myip, $ip, $method, $file) = split; } $file =~ tr/A-Z/a-z/ if $filenamestolowercase; # *** added by CE to see if this will get rid of odd things that turn up in the log # may inflate the figures but I suspect that a lot of these things are handled # successfully by the browser/server $file =~ tr/"//d; $file =~ s/\.html/\.htm/; $file =~ s/>.*//; # if this entry is not one that we are ignoring # and if it is one that we are focussing on # then process it if ( !&ignorethisip($ip) && !&ignorethispath($file) && !&ignorethisext($file) && &focusonthisip($ip) && &focusonthispath($file) && &focusonthisext($file) ) { # gopher log has one less field than http log $file = $method if $logtype =~ /^g/i; # *** added by CE to create a local log print MYLOG "$_ \n"; # if month in current log entry not equal to month # being processed then it is the start of a new month if (($currentmonth ne $month) || ($currentlogtype ne $logtype)) { &newmonth; $currentmonth = $month; $currentyear = $year; $currentlogtype = $logtype; } # look up domain for this ip address $domain = &addname($ip); # add this access to cumulative counts # 3 separate counts are kept - overall, home and outside $access++; $totalbytes += $bytes; $file{$file}++; $ip{$ip}++; if (&ishomeip($ip)) { $homeaccess++; $homebytes += $bytes; $homefile{$file}++; $homeip{$ip}++; $domain = $homedomain; } else { $outsideaccess++; $outsidebytes += $bytes; $outsidefile{$file}++; $outsideip{$ip}++; } $domain{$domain}++; } # end of processing entry } # get next entry } # process next log file # write details from current month to file print " \nFinishing . . ."; # *** added by CE, sort out today's date & time ($sec,$min,$hour,$mday,$mon,$year,$wday, $yday, $isdat) = localtime(time); $day = (Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday)[$wday]; $month = (January,February,March,April,May,June,July,August,September,October,November,December)[$mon]; if (($mday % 10) == 1) { $ord = "st"; } elsif (($mday % 10) == 2) { $ord = "nd"; } elsif (($mday % 10) == 3) { $ord = "rd"; } else { $ord = "th"; } $uptonow = $hour.":".$min.":".$sec.", ".$day.", the ".$mday.$ord." of ".$month." ".$year; &newmonth; # *** added by CE, now print out the totals, before quitting $topfiles = 0; &total; &totsummary; print "\n"; # write out all the domain names to save looking them up again if ($lookupdnsnames) { if (open (DOMAINS, ">$dnsnames")) { foreach $ipaddress (keys(%dnsname)) { print DOMAINS "$ipaddress\t$dnsname{$ipaddress}\n"; } close DOMAINS; } else { print "\nCannot write to DNS names file $dnsnames\n"; } } # -------------------------------------------------------------------------- # SUB-ROUTINES # -------------------------------------------------------------------------- sub newmonth { # write report then undefine all the cumulative total variables &summaryofmonth if $currentmonth; # *** the transfers to totals were added by CE foreach (keys %domain) { $totdomain{$_} += $domain{$_}; } undef %domain; $totaccess += $access; undef $access; $totbytes += $totalbytes; undef $totalbytes; foreach (keys %file) { $totfile{$_} += $file{$_}; } undef %file; foreach (keys %ip) { $totip{$_} += $ip{$_}; } undef %ip; $tothomeacc += $homeaccess; undef $homeaccess; $tothomebyt += $homebytes; undef $homebytes; foreach (keys %homefile) { $tothomefile{$_} += $homefile{$_}; } undef %homefile; foreach (keys %homeip) { $tothomeip{$_} += $homeip{$_}; } undef %homeip; $totoutacc += $outsideaccess; undef $outsideaccess; $totoutbyt += $outsidebytes; undef $outsidebytes; foreach (keys %outsidefile) { $totoutfile{$_} += $outsidefile{$_}; } undef %outsidefile; foreach (keys %outsideip) { $totoutip{$_} += $outsideip{$_}; } undef %outsideip; undef $currentmonth; undef $currentyear; undef $currentlogtype; } # &newmonth sub total { # *** added by CE, transfers all accumulated totals to usual variables # used to print monthly summaries %domain = %totdomain; $access = $totaccess; $totalbytes = $totbytes; %file = %totfile; %ip = %totip; $homeaccess = $tothomeacc; $homebytes = $tothomebyt; %homefile = %tothomefile; %homeip = %tothomeip; $outsideaccess = $totoutacc; $outsidebytes = $totoutbyt; %outsidefile = %totoutfile; %outsideip = %totoutip; } # &total # -------------------------------------------------------------------------- sub summaryofmonth { # make a new report file for that month # filename e.g. 1994-11.HTM ($_) = grep(/^$currentmonth/i, January01, February02, March03, April04, May05, June06, July07, August08, September09, October10, November11, December12); ($monthname, $mon) = /(\w+)(\d\d)$/; $report = "$currentlogtype$currentyear-$mon"; $servertype = $servertype{$currentlogtype}; open (SUMMARY, ">$outputdir$report.HTM") || die "cannot open $outputdir$report.HTM"; print SUMMARY "\n"; print SUMMARY "$servername $servertype server usage log $monthname $currentyear\n"; print SUMMARY "\n\n"; print SUMMARY "

$servername $servertype server usage log

\n"; if (!$uptonow) { print SUMMARY "

$monthname $currentyear

\n"; } else { print SUMMARY "

$monthname $currentyear

\n"; print SUMMARY "

$uptonow

\n"; } print SUMMARY "[Files, "; print SUMMARY "sites, "; print SUMMARY "domains]\n"; if (@focusonip != 0) { print SUMMARY "

Focussing on accesses from the following IP addresses

\n"; print SUMMARY "\n"; } if (@focusonpath != 0) { print SUMMARY "

Focussing on accesses to the following paths or files

\n"; print SUMMARY "\n"; } if ($i_ipcomment) { print SUMMARY "\n"; } print SUMMARY "

Total number of accesses

\n"; print SUMMARY "Every time a document is retrieved counts as one access.\n"; print SUMMARY "
";
    $length = length($homename) + 12; $length = 20 if $length < 20;
    printf SUMMARY "     %-${length}s %18s\n", "Overall total:", $access;
    printf SUMMARY "     %-${length}s %18s\n", "$homename users:", $homeaccess;
    printf SUMMARY "     %-${length}s %18s\n", "Outside users:", $outsideaccess;
    $opct = (int(10000*$outsideaccess/$access))/100;
    $width = 6; $dp = 2;
    printf SUMMARY "\nI.e. outside use is %${width}.${dp}f %\n", $opct;
    print SUMMARY "
\n\n"; print SUMMARY "

Total number of users

\n"; print SUMMARY "Each different IP address that accesses the server\n"; print SUMMARY "counts as one user. N.B. This may represent a considerably\n"; print SUMMARY "greater number of actual people\n"; print SUMMARY "
";
    $length = length($homename) + 12; $length = 20 if $length < 20;
    printf SUMMARY "     %-${length}s %18s\n", "Overall total:",
        ($totcount = keys(%ip));
    printf SUMMARY "     %-${length}s %18s\n", "$homename users:",
        ($count = keys(%homeip));
    printf SUMMARY "     %-${length}s %18s\n", "Outside users:",
        ($outcount = keys(%outsideip));
    $opct = (int(10000*$outcount/$totcount))/100;
    printf SUMMARY "\nI.e. outside addresses represent %${width}.${dp}f %\n", $opct;
    print SUMMARY "
\n\n"; if ($totalbytes) { print SUMMARY "

Total number of bytes transferred

\n"; print SUMMARY "
";
        $length = length($homename) + 12; $length = 20 if $length < 20;
        printf SUMMARY "     %-${length}s %18s\n", "Overall total:",
            &putcommasinnumber($totalbytes);
        printf SUMMARY "     %-${length}s %18s\n", "$homename users:",
            &putcommasinnumber($homebytes);
        printf SUMMARY "     %-${length}s %18s\n", "Outside users:",
            &putcommasinnumber($outsidebytes);
        $opct = (int(10000*$outsidebytes/$totalbytes))/100;
        printf SUMMARY "\nI.e. outside use is %${width}.${dp}f %\n", $opct;
        print SUMMARY "
\n\n"; } print SUMMARY "
\n"; print SUMMARY "

Files

\n"; print SUMMARY "This is based on the number of times each individual\n"; print SUMMARY "file is accessed.\n\n"; &topfile; print SUMMARY "
\n"; print SUMMARY "

Sites

\n"; print SUMMARY "This is based on the numbers of documents accessed\n"; print SUMMARY "by users calling from each IP address.\n\n"; &topip; print SUMMARY "
\n"; print SUMMARY "

Domains

"; print SUMMARY "This is based on the numbers of documents accessed\n"; print SUMMARY "by users calling from each domain\n\n"; &topdomain; print SUMMARY "

A complete usage log for\n"; print SUMMARY "this month is also available.

\n"; print SUMMARY "
\n"; print SUMMARY "Statistics generated using \n"; print SUMMARY "MUSAGE Perl script.\n"; print SUMMARY "\n"; close SUMMARY; # print cumulative report and clear cumulative totals open (SUMMARY, ">$outputdir$report.TXT") || die "cannot open $report.TXT"; print SUMMARY "Complete $servername $servertype usage log\n\n"; print SUMMARY "$monthname $currentyear\n\n"; if (@focusonip != 0) { print SUMMARY "Focussing on accesses from the following IP addresses:-\n"; foreach $focusonip (@focusonip) { print SUMMARY " $focusonip\n"; } print SUMMARY "\n"; } if (@focusonpath != 0) { print SUMMARY "Focussing on accesses to the following paths or files:-\n"; foreach $focusonpath (@focusonpath) { print SUMMARY " $focusonpath\n"; } if ($f_pathcomment) { print SUMMARY "$f_pathcomment\n"; } print SUMMARY "\n"; } if ($i_ipcomment) { print SUMMARY "$i_ipcomment\n"; if ($f_pathcomment) { print SUMMARY "$f_pathcomment\n\n"; } } print SUMMARY "Focussing on $focus\n\n" if $focus ne ''; print SUMMARY "Total number of accesses: $access\n"; print SUMMARY "Total number of users: ", ($count = keys(%ip)), "\n"; print SUMMARY "Total number of bytes: ", &putcommasinnumber($totalbytes), "\n"; print SUMMARY "\nBy file\n"; print SUMMARY "=======\n"; foreach $file (sort byfilename keys(%file)) { printf SUMMARY " %9s %-s\n", $file{$file}, $file; } print SUMMARY "\nBy ip\n"; print SUMMARY "=====\n"; foreach $ip (sort byip keys(%ip)) { printf SUMMARY " %9s %-18s %-s\n", $ip{$ip}, $ip, $dnsname{$ip}; } print SUMMARY "\nBy domain\n"; print SUMMARY "=========\n"; $uktotal = $ustotal = $total = 0; foreach $domain (sort bydnsname keys(%domain)) { printf SUMMARY " %9s %-15s %-s\n", $domain{$domain}, $domain, $country{$domain}; if ($domain eq "uk") { $uktotal = $domain{$domain}; } if (($country{$domain} =~ /\*/) || ($domain eq "us")) { $ustotal += $domain{$domain}; } $total += $domain{$domain}; } $blank = " "; $totals = "Grand total"; printf SUMMARY "\n %9s %-15s %-s\n", $total, $blank, $totals; print SUMMARY "\n (* = mainly US based)\n"; print SUMMARY " $ustotal from USA or \"mostly USA\" sites,"; $opct = (int(10000*$ustotal/$total))/100; $width = 6; $dp = 2; printf SUMMARY " i.e. %${width}.${dp}f%%\n", $opct; print SUMMARY " $uktotal from non-local British sites"; $opct = (int(10000*$uktotal/$total))/100; $width = 6; $dp = 2; printf SUMMARY " i.e. %${width}.${dp}f%%\n", $opct; close SUMMARY; # if menu document does not exist, starts a new one # looks to see if this report file has already been added to menu # looks for