#!/usr/bin/perl -w
# textbooks.pl
# Generowana jest lista ksiek wydawnictwa O'Reilly uywanych
# jako podrczniki na 20 najlepszych uniwersytetach.
# Uycie: perl textbooks.pl

use strict;
use SOAP::Lite;

# wszystkie potrzebne informacje Google 
my $google_key  = "tutaj Twj klucz google";
my $google_wdsl = "GoogleSearch.wsdl";
my $gsrch       = SOAP::Lite->service("file:$google_wdsl");

my @toptwenty = ("site:cmu.edu", "site:mit.edu", "site:stanford.edu",
       "site:berkeley.edu", "site:uiuc.edu","site:cornell.edu",
       "site:utexas.edu", "site:washington.edu", "site:caltech.edu",
       "site:princeton.edu", "site:wisc.edu", "site:gatech.edu",
       "site:umd.edu", "site:brown.edu", "site:ucla.edu",
       "site:umich.edu", "site:rice.edu", "site:upenn.edu",
       "site:unc.edu", "site:columbia.edu");

my $twentycount = 0;
open (OUT,'>top20.txt')
 or die "Niemoliwe otwarcie: $!";

while ($twentycount < 20) {

   # pi uniwersytetw
   my $arrayquery =
      "( $toptwenty[$twentycount] | $toptwenty[$twentycount+1] ".
      "| $toptwenty[$twentycount+2] | $toptwenty[$twentycount+3] ".
      "| $toptwenty[$twentycount+4] )";

   # warunki wyszukiwania.
   my $googlequery = "\"o'reilly * associates\" syllabus $arrayquery"; 
   print "Szukanie: $googlequery\n"; 

   # i szukamy, maksymalnie do 50 wynikw.
   my $counter = 0; while ($counter < 50) {
       my $result = $gsrch->doGoogleSearch($google_key, $googlequery,
                            $counter, 10, "false", "",  "false",
                            "lang_en", "", "");
       # dla kadego wyniku.
       foreach my $hit (@{$result->{'resultElements'}}){
           my $urlcheck = $hit->{'URL'};
           my $titlecheck = $hit->{'title'}; 
           my $snip = $hit->{'snippet'};

           # jeli URL lub tytu zawiera trzycyfrow liczb,
           # przygotowujemy odpowiedni fragment i zapisujemy go w pliku.
           if ($urlcheck =~/http:.*?\/.*?\d{3}.*?/
                 or $titlecheck =~/\d{3}/) {
              $snip =~ s/<b>/ /g;
              $snip =~ s/<\/b>/ /g;
              $snip =~ s/&#39;/'/g;
              $snip =~ s/&quot;/"/g;
              $snip =~ s/&amp;/&/g;
              $snip =~ s/<br>/ /g;
              print OUT "$hit->{title}\n";
              print OUT "$hit->{URL}\n";
              print OUT "$snip\n\n";
           }
        }

        # nastpnych 10 wynikw szukania.
        $counter += 10;
   }

   # nastpna porcja szk.
   $twentycount += 5; 
}

