Pbsmysql

From In The Wings
Jump to navigation Jump to search

This script is used to enter PBS / Torque logs (in the server_priv/accounting directory) into a MySQL database. It should really be revamped so that everything goes into one table instead of the dozen or so tables that are there now, but I haven't gotten around to this. One of the problems of doing a revamping like that is that I would have to rebuild the database schema and then rebuild the database with all of the old data. I may still do this, but only at a later period when I have some free time.

Another issue that makes it problematic to do this change is that the web pages that reference the mysql database currently do not look for a record type. As such, all of the web pages that access this database would have to be changed in order to reflect the new searching methods.

Script

#!/usr/bin/perl

## pbsmysql: Script to push the contents of PBS log files into a MySQL
##           database.


use Switch;
use DBI();

# Database DSN and access credentials

$dsn="DBI:mysql:database=pbslogs;host=hpc.ufhpc";
$dbuser='pbslogs';
$dbpwd='';

# Where the log files are located:

$ACCT="/var/spool/torque/server_priv/accounting";

# Set this to either "Torque" or "PBSPro", depending on which scheduler
# you are using.

$scheduler = "Torque";

## Main program
if (!@ARGV) {
    print "No input files\n";
    exit;
}

for ($i=0; $i<=$#ARGV; $i++) {
    open(FILE, "$ACCT/@ARGV[$i]");
    while ($line = <FILE>) {
        chop($line);
        @fields = split(/ /,$line);
# Convert the date field for MySQL
        $date=&date_conv(@fields[0]);
        @record_type=&record_find(@fields[1]);
        switch (@record_type[1]) {
            case "A" { &job_aborted; }
            case "B" { &job_reserved; }
            case "C" { &job_checkpointed; }
            case "D" { &job_deleted; }
            case "E" { &job_ended; }
            case "F" { &job_finished_reserved; }
            case "K" { &job_reservation_deleted; }
            case "k" { &job_reservation_term; }
            case "Q" { &job_queued; }
            case "R" { &job_rerun; }
            case "S" { &job_started; }
            case "T" { &job_started_checkpoint; }
            case "U" { &job_reserve_unconfirmed; }
            case "Y" { &job_reserve_confirmed; }
        }
    }
    close(FILE);
}

## Subroutines

sub job_started {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    @USER=split(/=/,@record_type[3]);
    @GROUP=split(/=/,@fields[2]);
    @JOBNAME=split(/=/,@fields[3]);
    @QUEUE=split(/=/,@fields[4]);
    @CTIME=split(/=/,@fields[5]);
    @QTIME=split(/=/,@fields[6]);
    @ETIME=split(/=/,@fields[7]);
    @START=split(/=/,@fields[8]);
# Have to split apart the Execution hosts and put them together to look
# nice. Darned spaces.
    @EXEC_HOST=split(/=/,@fields[9]);
    $EXEC_HOST=join("=",@EXEC_HOST[1..$#EXEC_HOST]);
# For the resource list, since it is the last thing left in the line, we
# just concatenate everything into a joining via colons.
    $counter=10;
    while (@fields[$counter] =~ /^Resource_List/) {
        @RSC_TEMP2=split(/\./,@fields[$counter]);
        @RSC_TEMP=split(/:/,@RSC_TEMP2[1]);
        $counter2=0;
        while (@RSC_TEMP[$counter2]) {
            @RSC=split(/=/,@RSC_TEMP[$counter2]);
            switch (@RSC[0]) {
                case "arch" { $REQ_ARCH = @RSC[1]; }
                case "ncpus" { $REQ_NCPUS = @RSC[1]; }
                case "nodect" { $REQ_NODECT = @RSC[1]; }
                case "place" { $REQ_PLACE = @RSC[1]; }
                case "group" { $REQ_GROUP = @RSC[1]; }
                case "select" { $REQ_SELECT = @RSC[1]; }
# For the next two, it depends on whether this is PBSPro or Torque. PBSPro
# uses cput, while Torque uses walltime. As far as we are concerned in this
# case, either one will do.
                case "cput" { 
                    $REQ_CPUT = @RSC[1]; 
                    @REQ_CPUT_2 = split(/:/,$REQ_CPUT);
                    $REQ_CPUT = @REQ_CPUT_2[0]*3600 + @REQ_CPUT_2[1]*60 + @REQ_CPUT_2[2];
                }
                case "walltime" {
                    $REQ_CPUT = @RSC[1]; 
                    @REQ_CPUT_2 = split(/:/,$REQ_CPUT);
                    $REQ_CPUT = @REQ_CPUT_2[0]*3600 + @REQ_CPUT_2[1]*60 + @REQ_CPUT_2[2];
                }
                case "mem" { $REQ_MEM = @RSC[1]; }
                case "nodes" { $REQ_NODES = @RSC[1]; }
                case "ppn" { $REQ_PPN = @RSC[1]; }
                case "host" { 
                    if ($REQ_HOST) { 
                        $REQ_HOST = $REQ_HOST . ":" . @RSC[1];
                    }
                    else {
                        $REQ_HOST = @RSC[1];
                    }
                }
#               else { $REQ_OTHER = $REQ_OTHER . @RSC[0] . "=" . @RSC[1] 
#                          . ":"; }
            }
            my @RSC = ();
            $counter2++;
        }
        my @RSC_TEMP = ();
        my @RSC_TEMP2 = ();
        $counter++;
    }
#    $RESOURCE_LIST=join("^",@fields[10..$#fields]);
    $dbh->do("INSERT INTO jobs_started VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
             undef, undef, $date, @record_type[0], @record_type[2], @USER[1],
             @GROUP[1], @JOBNAME[1], @QUEUE[1], @CTIME[1], @QTIME[1],
             @ETIME[1], @START[1], $EXEC_HOST, $REQ_ARCH, $REQ_NCPUS,
             $REQ_NODECT, $REQ_PLACE, $REQ_GROUP, $REQ_SELECT, $REQ_CPUT,
             $REQ_MEM, $REQ_NODES, $REQ_PPN, $REQ_HOST, $REQ_OTHER);
    $dbh->disconnect();
    $REQ_HOST="";
}

sub job_ended {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    @USER=split(/=/,@record_type[3]);
    @GROUP=split(/=/,@fields[2]);
    @JOBNAME=split(/=/,@fields[3]);
    @QUEUE=split(/=/,@fields[4]);
    @CTIME=split(/=/,@fields[5]);
    @QTIME=split(/=/,@fields[6]);
    @ETIME=split(/=/,@fields[7]);
    @START=split(/=/,@fields[8]);
    @EXEC_HOST=split(/=/,@fields[9]);
    $EXEC_HOST=join("=",@EXEC_HOST[1..$#EXEC_HOST]);
# Now things get hairy. See, we have to go through the rest of the fields
# and check to see that they start with "Resource_List". Eventually they
# will no longer start with this and we will have to continue with the end
# time and other things. The problem is that the resource list can vary
# depending on the job.
    $counter=10;
    $RESOURCE_LIST="";
    while (@fields[$counter] =~ /^Resource_List/) {
        @RSC_TEMP2=split(/\./,@fields[$counter]);
        @RSC_TEMP=split(/:/,@RSC_TEMP2[1]);
        $counter2=0;
        while (@RSC_TEMP[$counter2]) {
            @RSC=split(/=/,@RSC_TEMP[$counter2]);
            switch (@RSC[0]) {
                case "arch" { $REQ_ARCH = @RSC[1]; }
                case "ncpus" { $REQ_NCPUS = @RSC[1]; }
                case "nodect" { $REQ_NODECT = @RSC[1]; }
                case "place" { $REQ_PLACE = @RSC[1]; }
                case "group" { $REQ_GROUP = @RSC[1]; }
                case "select" { $REQ_SELECT = @RSC[1]; }
# Again, here is the case to handle PBSPro versus Torque
                case "cput" { 
                    $REQ_CPUT = @RSC[1]; 
                    @REQ_CPUT_2 = split(/:/,$REQ_CPUT);
                    $REQ_CPUT = @REQ_CPUT_2[0]*3600 + @REQ_CPUT_2[1]*60 + @REQ_CPUT_2[2];
                }
                case "walltime" {
                    $REQ_CPUT = @RSC[1]; 
                    @REQ_CPUT_2 = split(/:/,$REQ_CPUT);
                    $REQ_CPUT = @REQ_CPUT_2[0]*3600 + @REQ_CPUT_2[1]*60 + @REQ_CPUT_2[2];
                }
                case "mem" { $REQ_MEM = @RSC[1]; }
                case "nodes" { $REQ_NODES = @RSC[1]; }
                case "ppn" { $REQ_PPN = @RSC[1]; }
                case "host" { 
                    if ($REQ_HOST) { 
                        $REQ_HOST = $REQ_HOST . ":" . @RSC[1];
                    }
                    else {
                        $REQ_HOST = @RSC[1];
                    }
                }
#               else { $REQ_OTHER = $REQ_OTHER . @RSC[0] . "=" . @RSC[1] 
#                          . ":"; }
            }
            my @RSC = ();
            $counter2++;
        }
        my @RSC_TEMP = ();
        my @RSC_TEMP2 = ();
        $counter++;
    }

# Now that is done, we also know that $counter happens to represent the
# token for the first field after all of this that does NOT have 
# "Resource_List" in it.
    @SESSION_ID=split(/=/,@fields[$counter++]);
    @ENDTIME=split(/=/,@fields[$counter++]);
    @EXIT_STATUS=split(/=/,@fields[$counter++]);
# Resources used come out in a specific order, so we can use this to our
# advantage. Unfortunately, different schedulers have slightly different
# things in them.
    if($scheduler eq "PBSPro") {
        @CPU_PERCENT=split(/=/,@fields[$counter++]);
        @CPU_TIME=split(/=/,@fields[$counter++]);
        @CPU_TIME_2=split(/:/,$CPU_TIME[1]);
        $CPU_TIME_3=@CPU_TIME_2[0]*3600 + @CPU_TIME_2[1]*60 + @CPU_TIME_2[2];
        @MEM_USED=split(/=/,@fields[$counter++]);
        @MEM_USED[1] =~ s/kb//g;
        @NUM_CPUS_USED=split(/=/,@fields[$counter++]);
        @VIRT_MEM_USED=split(/=/,@fields[$counter++]);
        @VIRT_MEM_USED[1] =~ s/kb//g;
        @WALLTIME_USED=split(/=/,@fields[$counter]);
        @WALLTIME_USED_2=split(/:/,$WALLTIME_USED[1]);
        $WALLTIME_USED_3=@WALLTIME_USED_2[0]*3600 + @WALLTIME_USED_2[0]*60 + @WALLTIME_USED_2[0];
    }
# The torque scheduler does not send out CPU Percent or the number of CPUs
# used, so we have to eliminate them here.
    if($scheduler eq "Torque") {
        @CPU_TIME=split(/=/,@fields[$counter++]);
        @CPU_TIME_2=split(/:/,$CPU_TIME[1]);
        $CPU_TIME_3=@CPU_TIME_2[0]*3600 + @CPU_TIME_2[1]*60 + @CPU_TIME_2[2];
        @MEM_USED=split(/=/,@fields[$counter++]);
        @MEM_USED[1] =~ s/kb//g;
        @VIRT_MEM_USED=split(/=/,@fields[$counter++]);
        @VIRT_MEM_USED[1] =~ s/kb//g;
        @WALLTIME_USED=split(/=/,@fields[$counter]);
        @WALLTIME_USED_2=split(/:/,$WALLTIME_USED[1]);
        $WALLTIME_USED_3=@WALLTIME_USED_2[0]*3600 + @WALLTIME_USED_2[0]*60 + @WALLTIME_USED_2[0];
    }

# OK, now we can push all of this into a line item in the database. We have to
# check to see that there were actually some CPU's used, then make sure that
# the CPU percentage is reasonable.
    $dbh->do("INSERT INTO jobs_ended VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
             undef, undef, $date, @record_type[0], @record_type[2], 
             @USER[1],
             @GROUP[1], @JOBNAME[1], @QUEUE[1], @CTIME[1], @QTIME[1],
             @ETIME[1], @START[1], $EXEC_HOST, $REQ_ARCH, $REQ_NCPUS,
             $REQ_NODECT, $REQ_PLACE, $REQ_GROUP, $REQ_SELECT, 
             $REQ_CPUT,
             $REQ_MEM, $REQ_NODES, $REQ_PPN, $REQ_HOST, $REQ_OTHER, 
             @SESSION_ID[1], @ENDTIME[1], @EXIT_STATUS[1], 
             @CPU_PERCENT[1], 
             $CPU_TIME_3, @MEM_USED[1], @NUM_CPUS_USED[1], 
             @VIRT_MEM_USED[1], 
             $WALLTIME_USED_3);

    
    $dbh->disconnect();
    $REQ_HOST="";
}

# From here on, all of the job based mysql adds are pretty generic:

sub job_reserve_confirmed {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_confirmed_resources_reservation VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_reserve_unconfirmed {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_unconfirmed_resources_reservation VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_started_checkpoint {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_restarted_checkpoint VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_rerun {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_rerun VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_queued {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_queued VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_reservation_term {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_reservation_terminated VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_reservation_deleted {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_requested_removal_reservation VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_finished_reserved {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_reserved_ended VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_deleted {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_deleted VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub job_checkpointed {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_checkpointed VALUES (?, ?, ?, ?)", 
             undef, $date, @record_type[0], @record_type[2], @record_type[3]);
    $dbh->disconnect();
}

sub job_aborted {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_aborted VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

# This one actually has a schema in the manual, but because I have no bloody
# clue what one actually looks like, this is how it is for now. It is very
# possible that if we do get one of these, this script will break.

sub job_reserved {
    my $dbh = DBI->connect($dsn, $dbuser, $dbpwd, {'RaiseError' => 1});
    $dbh->do("INSERT INTO jobs_reserved VALUES (?, ?, ?, ?, ?)", 
             undef, undef, $date, @record_type[0], @record_type[2], 
             @record_type[3]);
    $dbh->disconnect();
}

sub record_find {
# This breaks apart the second field into its' individual parts. The reason
# for this is that the second field actually holds four different pieces
# of data: time, record_type, id_string, and the first message text. If there
# are any more message texts, they were space delimited and thus in higher
# number fields in the @fields array.

    local($log_record) = @_;
    @record_type_local = split(/\;/,$log_record);
    @record_type_local;
}

sub date_conv {
# Convert the date string in the log file to something that MySQL
# will understand. MySQL wants it in the form of YYYY-MM-DD, and
# it is presented in the file as MM/DD/YYYY.
    local($log_date) = @_;
    @date_fields = split(/\//,$log_date);
    $new_date = @date_fields[2] . "-" . @date_fields[0] . "-" .
        @date_fields[1];
    $new_date;
}