print "You have " . $messages . " message" . ($messages == 1 ?"" : "s") . ".\n";
---------------------------------
% perl -Mlingua::EN::Inflect=PL -le 'print "There are 2 ",PL("aide-de-camp")'
There are 2 aides-de-camp
---------------------------------
use Lingua::EN:Inflect qw(PL);
for my $catcount (0..2) {
    print "I saw $catcount ",  PL("cat", $catcount), "\n";
}

# I saw 0 cats
# I saw 1 cat
# I saw 2 cats
---------------------------------
print  "You have $message ", PL("message", $message), " ",
       " in $mailbox ", PL("mailbox", $mailbox), "\n";
---------------------------------
use Lingua::EN::Inflect qw(NO);
my $message = 0; my $mailbox = 4;

print "You have ".NO("message", $message). " in ".
      NO("mailbox", $mailbox)."\n";

# You have no messages in 4 mailboxes
---------------------------------
sub pl {
    my ($thing, $number) = @_;
    return NUMWORDS($number). " ".PL($thing, $number)
       if $number >= 1 and $number <= 10;

    NO($thing, $number);
}
---------------------------------
% perl -MLingua::EN::Words2Nums -e 'print words2nums("twenty-five")'
25
---------------------------------
%perl -MLingua::EN::Words2Nums -e 'print words2nums("fourty-two")'
42
---------------------------------
my $times;
do {
   print "How many times should we repeat the process? ";
   $times = words2nums(scalar <STDIN>);
   last if defined $times;
   print "Sorry, I didn't understand that number.\n";
} while 1;
---------------------------------
$order = { quantity => 45, title => "Advanced Perl Programming" };
---------------------------------
our %nametosub = (
    naught =>   [ \&num, 0 ],  # liczby gwne, z pominiciem tych,
    nought =>   [ \&num, 0 ],
    zero =>     [ \&num, 0 ],  # do ktrych dodaje si "th".
    one =>      [ \&num, 1 ],  first =>    [ \&num, 1 ],
...
);
# Zwr uwag na kolejno, np. ninety moe by dopasowane wczeniej ni nine.
my $numregexp = join("|", reverse sort keys %nametosub);
$numregexp=qr/($numregexp)/;
---------------------------------
 (?-xism:((?:b(?:akers?dozen|illi(?:ard|on))|centillion|d(?:ecilli(?:ard|on)|
ozen|u(?:o(?:decilli(?:ard|on)|vigintillion)|vigintillion))|e(?:ight(?:een|
ieth|[yh])?|leven(?:ty(?:first|one))?|s)|f(?:i(?:ft(?:een|ieth|[yh])|rst|ve)|
0(?:rt(?:ieth|y)|ur(?:t(?:ieth|[yh]))?))|g(?:oogol(?:plex)?|ross)|hundred|mi
(?:l(?:ion|li(?:ard|on))|nus)|n(?:aught|egative|in(?:et(?:ieth|y)|t(?:een|
[yh])|e)|o(?:nilli(?:ard|on)|ught|vem(?:dec|vigint)illion))|o(?:ct(?:illi
(?:ard|on)|o(?:dec|vigint)illion)|ne)|qu(?:a(?:drilli(?:ard|on)|ttuor
(?:decilli(?:ard|on)|vigintillion))|in(?:decilli(?:ard|on)|tilli(?:ard|on)|
vigintillion))|s(?:core|e(?:cond|pt(?:en(?:dec|vigint)illion|illi(?:ard|on))|
ven(?:t(?:ieth|y))?|x(?:decillion|tilli(?:ard|on)|vigintillion))|ix(?:t(?:ieth|
y))?)|t(?:ee?n|h(?:ir(?:t(?:een|ieth|y)|d|ousand|ree)|r(?e(?:decilli(?:ard|
on)|vigintillion)|i(?:gintillion|lli(?:ard|on)))|w(?:w(?:e(?:l(?:fth|ve)|nt(?:ieth|
y))|o)|h)|un(?:decilli(?:ard|on)|vigintillion)|vigintillion|zero|s)))
---------------------------------
s/\b(and|a|of)\b//g; # pomija wybrane czste wyrazy
s/[^A-Za-z0-9.]//g;  # pomija spacje i znaki interpunkcyjne za wyjtkiem kropki.
---------------------------------
my $ok_words = qr/\b(and|a|of)\b/;
my $ok_things = qr/[^A-Za-z0-9.]/;
my $number = qr/\b(($numbers($ok_words|$ok_things)*)+)\b/i;
# Where $numbers is the big mad expression above.
---------------------------------
use Lingua::EN::FindNumber;
print numify("Fourscore and seven years ago, our four fathers...");
---------------------------------
use Text::Sentence qw( split_sentences );
my $text = <<EOF;
This is the first sentence. Is this the second? This is the third sentence, with an additional clause!
EOF
print "#$_\n\n" for split_sentences($text);
---------------------------------
use Lingua::EN::Sentence qw( get_sentences add_acronyms );
my $test = <<EOF;
This punctuation-based assumption is generally good enough, but screws up messily on sentences containing abbreviations followed by capital letters, e.g., This one. Shlomo Yona's Lingua::EN::Sentence does a considerably better job:
EOF
my $sentences=get_sentences($text);
foreach my $sentence (@$sentences) {
     print "#", $sentence, "\n\n";
}
---------------------------------
use Lingua::EN::Splitter qw(words);

my $text = "Here is Edward Bear, coming downstairs now, bump, bump, bump, on the back of his head, behind Christofer Robin.";

my %histogram;
$histogram{lc $_}++ for @{ words($text) };
use Data::Dumper; print Dumper(\%histogram);
---------------------------------
$VAR1 = {
          'robin' => 1,
          'here' => 1,
          'edward' => 1,
          'now' => 1,
          'bear' => 1,
          'coming' => 1,
          'head' => 1,
          'his' => 1,
          'downstairs' => 1,
          'of' => 1,
          'bump' => 3,
          'on' => 1,
          'the' => 1,
          'behind' => 1,
          'back' => 1,
          'is' => 1,
          'christopher' => 1,
        };
---------------------------------
sub stem {
    require Lingua::Stem::En
    my ($stemmed) = @{ Lingua::Stem:En::stem({ -words => [shift] }) };
}

 while (<DICT>) {
    chomp;
    next if /[A-Z]/;
    $wordlist{stem($_)}=1;
}
---------------------------------
use Lingua::EN::StopWords qw(%StopWords);

my @words = qw(the second problem that arises is that there are a large number of English words that don't carry semantic content);

print join " ", grep { !$StopWords{$_} } @words;

second problem arises large number English words don't carry semantic content
---------------------------------
use Lingua::EN::StopWords qw(%StopWords);
use Lingua::Stem::En;
use Lingua::EN::Splitter qw(words);
use List::Util qw(sum);
print compare(
    "The AD 79 volcanic eruption of Mount Vesuvius",
    "The volcano, Mount Vesuvius, erupted in 79AD"
);

sub sentence2hash {
    my $words = words(lc(shift));
    my $stemmed = Lingua::Stem::En::stem({
                    -words => [ grep { !$StopWords{$_} } @$words ]
                  });
    return { map {$_ => 1} grep $_, @$stemmed };
}

sub compare {
    my ($h1, $h2) = map { sentences2hash($_) } @_;
    my %composite = %$h1;
    $composite{$_}++ for keys %$h2;
    return 100*(sum(values %composite)/keys %composite)/2;
}
---------------------------------
my $news = {
   now => 1, taipei => 2, join => 1, party => 1, us => 1,
   registration => 1, website => 1, welcome => 1, open => 1,
   calls => 1, yapc => 1
};
---------------------------------
my $categorizer = Algorithm::NaiveBayes->new;
$categorizer->add_instance( attributes => $news,
                            label => "interesting" );
---------------------------------
$categorizer->train;
my $probs = $categorizer->predict(  attributes => $new_news );
---------------------------------
if ( $probs->{interesting} >0,5 ) {
    # Prawdopodobnie interesujcy
}
---------------------------------
sub invert_string {
   my ($string, $weight, $hash) = @_;
   $hash->{$_} += $weight for
        grep { !$StopWords{$_} }
        @{words(lc($string))};
}
---------------------------------
sub invert_item {
    my $item = shift;
    my %hash;
    invert_string($item->{title}, 2, \%hash);
    invert_string($item->{description}, 1, \%hash);
    return \%hash;
}
---------------------------------
#!/usr/bin/perl

use XML::RSS;
use Algorithm::NaiveBayes;
use Lingua::EN::Splitter qw(words);
use Lingua::EN::StopWords qw(%StopWords);

my $nb = Algorithm::NaiveBayes->new();

for my $category (qw(interesting boring)) {
    my $rss = new XML::RSS;
    $rss->parsefile("$category.rdf");
    $nb->add_instance(attributes => invert_item($_),
                      label      => $category) for @{$rss->{'items'}};
}

$nb->train; # Wylicz wszystkie prawdopodobiestwa
---------------------------------
my $target = new XML::RSS;
$target->parsefile("incoming.rdf");
for my $item (@{$target->{'items'}}) {
    print "$item->{title}: ";

    my predict = $nb->predict(attributes => invert_item($item));
    print int($predict->{interesting}*100)."% interesting\n";
}
---------------------------------
my $keywords = "(is|are|was|were|will|have|)";
my $clauses = grep { /\b$keywords\b/i }
               map { split /(,|;|--)/ } split_sentences( $text );
---------------------------------
use Lingua::EN::Summarize;
print summarize($chapter5, maxlength => 300, wrap => 70);
---------------------------------
use Lingua::EN::Splitter qw(words);
use Lingua::EN::Sentence qw(get_sentences);
use File::Slurp;
use Lingua::Stem::En;
use Lingua::EN::StopWords qw(%StopWords);
use List::Util qw(sum);
use strict;
my %base;
my %per_file;

my $amount = shift;
for my $file (<*.txt>) {
    my $sentences = get_sentences ( scalar read_file($file) );
    for my $sentence (@$sentences) {
        my @words = grep { !$StopWords{$_} } @words(lc $sentence) };
        for my $word (@{ Lingua::Stem::En::stem({ -words => \@words }) }) {
            $base{$word}++;
            $per_file{$file}{$word}++;
        }
    }
}
---------------------------------
my $sum = sum values %base; $base{$_} /= $sum for keys %base;
my %totals;
for my $file (key %per_file) {
    $sum = sum values %{$per_file{$file}};
    $per_file{$file}{$_} /= $sum for keys %{$per_file{$file}};
}
---------------------------------
for my $file (<*.txt>) {
    print $file,":\n";
    my $sentences = get_sentences (scalar read_file($file) );
    my %markings;
    my $order = 0;
    for my $sentence (@$sentences) {
        my @words = grep { !$StopWords{$_} } @{words(lc $sentence) };
---------------------------------
        my @words = grep { !$StopWords{$_} } @{words(lc $sentence) };
        $markings{$sentence}->{order} = $order++;
        if (!@words) {
          $markings{$sentence}->{score} = 0;
          next;
        }
        for my $word (@{ Lingua::Stem::En::stem({ -words => \@words }) }) {
            my $score = $per_file{$file}{$word} / $base{$word};
            $markings{$sentence}->{score} += $score;
        }
---------------------------------
        $markings{$sentence}->{score} /= @words;
    }
---------------------------------
    my @sorted = sort
                 { $markings{$b}->{score} <=> $markings{$a}->{score} }
                 keys %markings;
    my @selected = sort
                 { $markings{$a}->{order} <=> $markings{$b}->{order} }
                 @sorted[0..9];
    print "@selected\n\n";
}
---------------------------------
use Lingua::EN::Tagger;
my $tag = Lingua::EN::Tagger->new(longest_noun_phrase => 5,
                                  weight_noun_phrases => 0);

my %wordlist = $tag->get_words("This is a test of the emergency warning system. This is only a test. If this had been an actual emergency, you would have been instructed to tune to either Cable Channel 3 or local emergency radio stations.");
---------------------------------
package My::Tagger;
use base 'Lingua::EN::Tagger';
my %known_stems;
sub stem {
    my ( $self, $word ) = @_;
    return $word unless $self->{'stem'};
    return $known_stems{ $word } if exists $known_stems{$word};
    my $stemref = Lingua::Stem::En::stem( -words => [ $word ] );

    $known_stems{ $word } = $stemref->[0] if exists $stemref->[0];
}

sub stems { reverse %known_stems; }
---------------------------------
use My::Tagger;
my $tag = My::Tagger->new(longest_noun_phrase => 5,
                          weight_noun_phrases => 0);

sub unstem {
    my %cache = $tag->stems;
    my $stem = shift;
    return $cache{$stem} || $stem;
}
---------------------------------
sub keywords {
    my %wordlist = $tag->get_words(shift);
    my %newwordlist;
    $newwordlist{unstem($_)} += $wordlist{$_} for keys %wordlist;
    my @keywords = sort { $newwordlist{$b} <=> $newwordlist{$a} } keys %newwordlist;
    return $keywords[0..4];
}
---------------------------------
use GATE::ANNIE::Simple;
$text = <<EOF;
The United States is to ask the United Nations to approve the creation of a multinational force in Iraq in return for ceding some political authority, US officials say.
EOF
%entities = annie_extract($text);
---------------------------------
use File::Slurp;
use Lingua::EN::NamedEntity;

my $file = read_file("summary.txt");
for my $ent (extract_entities($file)) {
    print $ent->{entity}, ": ", $ent->{class}, "\n";
}
