#!/usr/bin/perl # ************************************************************************** # # keywords.prl written by Chris Evans (C.Evans@sghms.ac.uk) # # program collects all the keywords in the site.idx file created from # # file headers by site_ind.pl2 # # # # The program takes as input in the variables declared immediately below # # $filemask -- file mask (appropriate to the OS) for the find command # # $outputdir -- the directory into which to put these files # # $menu -- filename for the menu to all other reports # # $report -- filename for the directory order output # # $time_rep -- filename for the modification time sorted output # # $other_rep -- filename for the files other than *.htm? # # $startdir -- directory from which to start the recursive search # # $www_offset -- any additional directory reference that accounts for # # a possible difference between the root used by unix for the # # search versus that shown to the world by the HTTP server in use # # This last variable may seem odd but on our local machine the root # # shown to the outside world by the server is at: # # /usr1/www/pages/ # # but my pages (from which I want the search for *.htm files to # # start is at: # # /usr1/www/pages/mhs/psychotherapy/ # # giving the value for $www_offset of /usr1/www/pages # # # # file written by Chris Evans (C.Evans@sghms.ac.uk) 20.iii.96 # # copyright Chris Evans _BUT_ feel free to distribute this subject to the # # following requirements: # # 1) you do not make a profit on the distribution # # 2) you retain this header information in full # # I would also very much appreciate feedback (I know the programming is # # awful, it's the first piece of PERL I've written and I'm a psychodynamic # # psychotherapist with no intention of giving up my day job so no flames if # # you can resist the temptation) and a copy of enhanced versions if you do # # hack this into something better # # # # Chris Evans (C.Evans@sghms.ac.uk) Section of Psychotherapy, # # St. George's Hospital Medical School, Cranmer Terrace, # # London, SW17 0RE, Britain # # ************************************************************************** # $othkeyw = 0; # set to 1 if you want the other keyword info. given against each file $head = < Index of keywords in Psychotherapy Section pages: $letter

Use [find] button on your browser to find keyword of interest to you

As of today (23.iii.96) I am afraid the files on the site are still only very incompletely broken up and keyworded so most of the stuff I mounted in 1995 is not keyworded and won't appear here. I'll work on it when I've got time!

END $btm = <


File created using Perl script: keywords.prl by Chris Evans C.Evans@sghms.ac.uk END # open the file with all the info. in it: site.idx open(IDX,"site.idx") || die "Couldn't open site.idx\n"; $num_entry = 0; # ### now read through it while () { $len = length($_); if ($len == 1) { # blank line, get last field in $thisline &field; undef $thisline; # now use fields (and wipe them) &use_fields; next; } chop; if (/^ /) { # line must be continuation line s/^ *//; # strip the initial spaces for tidyness $thisline .= $_; # and append it to current line } else { # as line doesn't start with spaces it MUST be a new field # so process accumulated lines in $thisline and start anew &field; $thisline = $_; } } # end of reading in from site.idx if ($url) { # last entry still to process (not followed by a blank line) &use_fields; } # debugging # foreach $word (sort bylwrcase keys %refs) { # print "keyw = $word\n"; # hrefs = $refs{$word}\n"; # @files = split(/,/,$refs{$word}); # foreach $filenum (@files) { # print "$href{$filenum}\n"; # print "
$title{$filenum}\n"; # print "
$desc{$filenum}\n"; # } # } # sleep(2); #foreach $filenum (keys %keyw) { #print "href = $href{$filenum}, keywords:\n$keyw{$filenum}"; #} print "\n\n\n\nNOW creating the index itself\n"; ################ initialisation ############## $htm = "htm"; $first = 1; # this is needed to get proper handling of the #
      nesting # now set things up for anything starting with a $letter = "a"; print "$letter\n"; open(LET,">keyw/$letter.$htm") || die "Can't open keyw/$letter.$htm\n"; print LET "$head"; # and for anything starting with non-letter open(OTH,">keyw/other.htm") || die "Can't open keyw/other.htm\n"; print OTH "$head"; $oth_first = 1; ##### now for the loop through the keywords ####### foreach $word (sort bylwrcase keys %refs) { $w_let = substr($word,0,1); $w_let =~ tr/A-Z/a-z/; $refs{$word} =~ s/,\s*$//; # chop off trailing comma # check whether first letter is a letter, if not, stick in "other.htm" if ($w_let =~ /[^a-z]/) { if ($oth_first) { print LET "
        \n

        \n

      • $word

        \n
        \n"; $oth_first = 0; } else { print LET "
        \n

        \n

      • $word

        \n
        \n"; } undef @files; undef %n_refs; @files = split(/,/,$refs{$word}); foreach $filenum (@files) { print OTH "
        "; if ($title{$filenum}) { print OTH "$title{$filenum}\n"; } else { print OTH "$href{$filenum}\n"; } if ($desc{$filenum}) { print OTH "
        $desc{$filenum}
        \n"; } else { print OTH "
        no description available
        \n"; } if (($othkeyw) && $keyw{$filenum}) { print OTH " Other keywords are:"; @keyw = split(/,/,$keyw{$filenum}); foreach $keyw (sort bylwrcase @keyw) { if (($keyw =~ /\S/) && ($keyw ne $word)) { # contains non-whitespace print OTH "$keyw, "; } } print OTH "\n"; } } } else { # initial character must be a letter if you get through the above if ($w_let ne $letter) { # initial letter is not the current one so need to # put this keyword under the right letter of the alphabet # first find the right letter while (($w_let ne $letter) && ($letter ne "z")) { $letter++; $first = 1; } # now close current letter file and open new one print "$letter\n"; print LET $btm; close(LET); open(LET,">keyw/$letter.$htm") || die "Can't open keyw/$letter.$htm\n"; print LET "$head"; } if ($first) { print LET "
          \n

          \n

        • $word

          \n
          \n"; $first = 0; } else { print LET "
          \n

          \n

        • $word

          \n
          \n"; } undef @files; undef %n_refs; @files = split(/,/,$refs{$word}); foreach $filenum (@files) { print LET "
          "; if ($title{$filenum}) { print LET "$title{$filenum}\n"; } else { print LET "$href{$filenum}\n"; } if ($desc{$filenum}) { print LET "
          $desc{$filenum}
          \n"; } else { print LET "
          no description available
          \n"; } if (($othkeyw) && $keyw{$filenum}) { print LET " Other keywords are:"; @keyw = split(/,/,$keyw{$filenum}); foreach $keyw (sort bylwrcase @keyw) { if (($keyw =~ /\S/) && ($keyw ne $word)) { # contains non-whitespace print LET "$keyw, "; } } print LET "\n"; } } } } # now tail and close the open files print LET $btm; close(LET); print OTH $btm; close(OTH); open(KEYW,">keyw/alpha.lst") || die "Couldn't open to write to keyw/alpha.lst\n"; print "Writing out keyword usage in alphabetical order for keyw/alpha.lst\n"; foreach $keyw (sort bylwrcase keys %allkeyw) { printf KEYW "%40s %-s\n", $keyw, $allkeyw{$keyw}; } close(KEYW); open(KEYW,">keyw/keyw.lst") || die "Couldn't open to write to keyw/keyw.lst\n"; print "Writing out keyword usage in frequency order for keyw/keyw.lst\n"; foreach $keyw (sort byfreq keys %allkeyw) { printf KEYW "%40s %-s\n", $keyw, $allkeyw{$keyw}; } close(KEYW); # --- subroutines ------------------------------------------------------------------ sub field { #print "\$thisline = $thisline\n"; if ($thisline =~ /^Template-Type: DOCUMENT/) { $doc = 1; } if ($thisline =~ /^Title: (.*)$/) { $title = $1; } if ($thisline =~ /^URI: (.*)$/) { #print "URL\n"; $url = $1; } if ($thisline =~ /^Description: (.*)/) { $desc = $1; #print "Description: = $desc\n"; } if ($thisline =~ /^Keywords: (.*)/) { @keyw = split(/,/, $1); foreach $keyw (@keyw) { $correct = $keyw; # need to strip out: $correct =~ s/^\s*//; # any leading spaces $correct =~ s/\s*$//; # any trailing spaces if ($correct =~ /\S/) { # contains non-whitespace $clean_keyw .= $correct; $clean_keyw .= ","; } } @keyw = split(/,/,$clean_keyw); } } sub use_fields { if (($doc) && ($url =~ /\/psychotherapy\//)) { $txt_entry = $num_entry.","; #print "\$txt_entry = $txt_entry\n"; #print "\$doc = $doc\n"; #print "\$title = $title\n"; #print "\$desc = $desc\n"; #print "\$url = $url\n"; #print "\$correct = $correct\n"; #print "\$clean_keyw = $clean_keyw\n"; #print "\@keyw = @keyw\n\n\n"; #sleep(20); $href{$num_entry} = $url; if ($title ne "Error Message") { $title{$new_entry} = $title; } $desc{$num_entry} = $desc; $keyw{$num_entry} = $clean_keyw; foreach $keyw (@keyw) { $refs{$keyw} .= $txt_entry; $allkeyw{$keyw}++; } $num_entry++; } undef $doc; undef $title; undef $desc; undef $url; undef $correct; undef $clean_keyw; undef @keyw; } sub bylwrcase { # compare by lowercase of the strings local($stringa) = $a; local($stringb) = $b; $stringa =~ tr/A-Z/a-z/; $stringb =~ tr/A-Z/a-z/; $stringa cmp $stringb; } sub byfreq { # compare by number of uses in order to sort # into reverse order by access $allkeyw{$b} <=> $allkeyw{$a}; }