Sample XML Event Script

The following example event script monitors a specified log file.

The IDOL Server installer provides additional example scripts.

#!/usr/bin/perl

use strict;
use warnings;
use File::Tail;
use Encode;
use URI::Escape;

#--------------------------------------------------- 

# Script to monitor a specified log file ($ARGV[0]) generated by any IDOL 
# servers, constantly tailing it. When a query appears in the logs it
# gathers the appropriate information and sends it to the Stats server
# at the specified host ($ARGV[1]) and port ($ARGV[2]).
# 
# The log file has the following format: 
# <date> <time> [<thread number>] <log level> <information> 
# 
# For example: 
# (...) 
# 12/12/2007 08:22:21 [7] 30-Normal: 
#/action=suggest&maxresults=6&reference=doc_123123 (127.0.0.1) 
# 12/12/2007 08:22:23 [7] 30-Normal: Returning 6 matches 
# 12/12/2007 08:22:23 [7] 30-Normal: Suggest complete 
# (...) 
# 
# Parameters: Four main parameters are required to run the script. 
# 
# - logfile: The full path and name of the log file.
# - stathost: Host name of the stats server. 
# - statport: Event port of the Stats server to which the script can 
#send XML events. 
# - idolname: IDOL Server name generating the log file that you want to
# monitor. The value must be the same one specified in the Stats server
# configuration file when setting the "IDOLName" stat parameter.
# 
# For example: idolname = "myIDOLServer" 
# If only one IDOL Server is used, you do not have to use idolname.
# 
# Run the script as follows: 
# 
# perl stats.pl <logfile> <host> <port> <idolname>
# 
#----------------------------------------------------------------

use constant XML_HEADER => "<?xml version='1.0' encoding='ISO-8859-1'?>\n<events>\n";
use constant START_FROM_LOGFILE_END => 0;
use constant READ_ENTIRE_LOGFILE => -1;

if (!defined $ARGV[2]) {
 print "Syntax: $0 logfile host port [idolname]\n";
 exit(0);
}

my $name = $ARGV[0]; # The log file to monitor
my $statshost = $ARGV[1]; # Host of the Stats Server
my $statsport = $ARGV[2]; # Event port of the Stats Server
my $idolname = $ARGV[3]; # IDOL Server name. Set an IDOL Server name if Stats Server is running in Multiple IDOL Server mode.

my $BATCHSIZE=1; # Wait until we have this many events and send them all at once
my $tailn = START_FROM_LOGFILE_END; # Start tailing from the end
my $resetTail=0; # Start tailing after the file has been automatically closed and reopened
my $tail; # Structure returned while tailing a file
my %threadqueries; # The 'current query' on each thread
my %threadips; # The 'current ip' on each thread
my %threadtime; # The 'current time' on each thread
my $total = 0; # Number of XML events generated by the script
my %escapes = (); # Hash mapping characters to hex equivalent
my $data = ""; # Data being sent to the StatsServer
my $events = 0;
my $bIsWindows = 0;

sub buildQueryXML($$$$$@); # return event XML for a particular query with matches and terms
sub postEventsData($$$); # Send XML events to the Stats server
sub processLine($); #Processes a line returned from the log file
sub releaseAndReopenHandle($);

#Are we running on Windows?
eval
{
 require Win32::Process;
};

if ($@) {
 $bIsWindows = 0;
} else {
 $bIsWindows = 1;
}

#-------------------------------- 
# Main loop 
#--------------------------------
for (;;) {
    eval {
        # Tail the specified log file
        $tail = new File::Tail(name => $name, maxinterval => 2, interval => 1, tail => $tailn, adjustafter => 1);
        $tailn = START_FROM_LOGFILE_END;

        # Event Creating loop
        for (;;) {
            $data = XML_HEADER;
            $events = 0;

            # Event Writing loop
            while ($events < $BATCHSIZE) {
                # Wait for new input in the log file.
                my ($nfound, $timeleft, @pending) = File::Tail::select(undef, undef, undef, 1, $tail);


                # Exit if no new line is found
                last if $events > 0 && !$nfound;

                # Returns one line from the log file
                my $line = $tail->read();
                processLine($line);
            }

            # End of the XML event
            $data .= "</events>\n";

            print STDERR "DATA:\n$data\n";

            # End of the current batch. Post the response to the statsserver.
            if ($events) {
                $total += $events;

                eval {
                    postEventsData($statshost, $statsport, $data);
                };

                print "Posting events failed: $@" if $@;

                if ($total % 1000 == 0) {
                    print STDERR "Total events: $total\n";
                }

                $data = "";

                #The file handle used by File::Tail is used permanently until it
                #($tail) goes out of scope. On Windows, this means it keeps a lock
                #on the log file we are tailing: therefore, the log file has no
                #opportunity to roll over after it exceeds the maximum permitted
                #size. The following call closes the handle, sleeps, and reopens
                #the handle. This allows the rollover to occur correctly if logging
                #is made to the log file during the sleep, and the log file size 
                #has overstepped the limit.

                if ($bIsWindows) {
                    releaseAndReopenHandle($tail);
                }
            }
        }
    };

    warn unless $tailn;
    $tailn = READ_ENTIRE_LOGFILE;
    sleep 1;
} #end of the Main loop

sub processLine($) {
    my $line = shift;

    # Check whether the line contains the following format (that is, 16/09/2008 14:20:00 [1] 30-Normal: ....)
    if ($line =~ m,^(\d\d/\d\d/\d{4} \d\d:\d\d:\d\d) \[(\d+)\] \d\d-(Full|Normal|Always|Warning|Error): (.*),) {
        # A log line (beginning with a time)
        my $timeLog = $1; # Get the time when the IDOL Server received the query
        my $thread = $2; # The thread number the query is on
        my $message = $4; # The rest of the log line; $3 captures the log level (Full, Normal, and so on)

        # Check whether the message contains a query (action=termgetbest&...)
        if ($message =~ m,^/?(a.*?=.+) \(([\d\.]+)\)$,) {
            # The initial 'action received'
            $threadqueries{$thread} = $1; # Get the query
            $threadips{$thread} = $2; # Get IP address of the IDOL Server that sent the query
            $threadtime{$thread} = $timeLog; # Get the query time
        }

        # Check whether the message contains the number of hits, returned matches or completed query
        elsif ($threadqueries{$thread} && ($message =~ /^Completed Action, returning (\d+) hits$/ || $message =~ /^Returning (\d+) match/ || $message =~ /^.* complete$/)) {
            # The end of the query. Form the event xml and save
            my $matches = $1 || 0;
            my $query = $threadqueries{$thread};
            my $ip = $threadips{$thread};

            # If the query format is correct, generate the data for the XML event
            if (defined $query && defined $ip && $query =~ m!/?a.*?=(\w+)([?&].*(?<=[?&])text=([^?&]*))?!) {
                $events++;
                my $terms = $3 || "";
                $data .= buildQueryXML($idolname, $query, $1, $matches, $ip, split(/ /,uri_unescape($terms)));
            }

            print STDERR "$threadtime{$thread} $threadqueries{$thread}\n";

            # Reset
            $threadqueries{$thread} = "";
            $threadips{$thread} = "";
            $threadtime{$thread} = "";
        } # end of the action
    } # end of the line
}
#Windows-only: close and reopen the log file handle, with a suitable pause in 
#between, to allow the log file to be rolled over.
sub releaseAndReopenHandle($) {
    my $tail = shift;
    my $logfile = $tail->input();
    close($tail->{'handle'});
    sleep(5);
    open($tail->{'handle'}, "<$logfile") or die "Cannot reopen logfile handle: $!\n";
}

sub stripControlChars($) {
    my $x = shift;
    $x =~ tr/\x00-\x1F//d;
    return $x;
}

#-------------------------------------------------------------------- 
# Extract information from a query and build the XML to send 
#--------------------------------------------------------------------
sub buildQueryXML($$$$$@) {
    my $idolname = shift;
    my $query = shift;
    my $action = shift;
    my $matches = shift;
    my $ip = shift;
    my @terms = @_;
    my $xml = "<queryinfo>\n<ver>0.1</ver>\n<url><![CDATA[$query]]></url>\n";
    $xml .= "<action>$action</action>\n";
    $xml .= "<terms><term>" . uri_escape(stripControlChars($_)) . "</term></terms>\n" for @terms;
    $xml .= "<numhits>$matches</numhits>\n";
    $xml .= "<ip>$ip</ip>\n";

    if ($idolname) {
        $xml .= "<idolname>$idolname</idolname>\n";
    }

    $xml .= "</queryinfo>\n";
    return $xml;
}

#-------------------------------------------------------------------- 
# Post a batch of events data to the specified host and port 
#--------------------------------------------------------------------
sub postEventsData($$$) {
    my $host = shift;
    my $port = shift;
    my $data = shift;
    my $nConnectTry = 3;

    use Socket;
    socket(INDEXSOCK, Socket::PF_INET, Socket::SOCK_STREAM, getprotobyname('tcp')) || print STDERR "Socket Failure: $!";
    my $inet_addr = Socket::inet_aton($host) || print STDERR "Internet_addr Failure: $1\n";
    my $paddr= Socket::sockaddr_in($port, $inet_addr) || print STDERR "Sockaddr_in Failure: $!\n";

    while (!connect(INDEXSOCK, $paddr) && $nConnectTry > 0) {
        $nConnectTry--;
    }

    $nConnectTry > 0 or die "Connect problem: $|\n";
    select INDEXSOCK; $| =1;
    select STDOUT;
    print INDEXSOCK "POST ";
    print INDEXSOCK "/stats";
    print INDEXSOCK " HTTP/1.0\r\n";
    print INDEXSOCK "Content-Length: ".length($data)."\r\n\r\n";
    print INDEXSOCK "$data";
    my $buffer;
    read(INDEXSOCK, $buffer, 100);
    close INDEXSOCK; return(1);
}

_HP_HTML5_bannerTitle.htm