#!/usr/bin/perl # # example Perl script to extract job info from a log in SWF. # then you can tabulate various statistics about something of interest. # # the data format is one line per job, with 18 fields: # 0 - Job Number # 1 - Submit Time # 2 - Wait Time # 3 - Run Time # 4 - Number of Processors # 5 - Average CPU Time Used # 6 - Used Memory # 7 - Requested Number of Processors # 8 - Requested Time # 9 - Requested Memory # 10 - status (1=completed, 0=killed) # 11 - User ID # 12 - Group ID # 13 - Executable (Application) Number # 14 - Queue Number # 15 - Partition Number # 16 - Preceding Job Number # 17 - Think Time from Preceding Job # use warnings; use strict; use Time::Local; use POSIX; my $oldhandle = select(STDERR); $| = 1; # Turn off buffering on STDOUT select($oldhandle); # count bad things my $cnt_fmt = 0; my $cnt_t0 = 0; my $cnt_p0 = 0; my $cnt_stat = 0; my $cnt_bad = 0; # some useful globals my $start; my $jobs; my $procs; my $nodes; # # scan trace and collect job info # while (<>) { # # empty or comment line # # such lines are skipped, but note that some header comments # may include useful data # if (/^\s*$|^;/) { # maintain data about log start time if (/^;\s*UnixStartTime:\s*(\d+)\s*$/) { $start = $1; } if (/^;\s*TimeZoneString:\s*([\w\/]+)\s*$/) { $ENV{TZ} = $1; POSIX::tzset(); } # about jobs if (/^;\s*MaxJobs:\s*(\d+)\s*$/) { $jobs = $1; printf(STDERR "there are $jobs jobs\n"); } # and about system size if (/^;\s*MaxProcs:\s*(\d+)\s*$/) { $procs = $1; } if (/^;\s*MaxNodes:\s*(\d+)\s*$/) { $nodes = $1; } next; } # # parse job line # $_ =~ /^\s*(.*)\s*$/; my $line = $1; my @fields = split(/\s+/,$line); if ($#fields != 17) { warn "bad format at $line"; $cnt_fmt++; } # or alternatively my ($job, $sub, $wait, $t, $p, $cpu, $mem, $preq, $treq, $mreq, $status, $u, $gr, $app, $q, $part, $prec, $think) = split(/\s+/,$line); # show progress... if ($job % 1000 == 0) { printf(STDERR "\rdid job $job..."); } # # skip if this job is not meaningful # if ( ! ($job =~ /^\s*\d/)) { # not a job at all -- line does not start with job ID. $cnt_fmt++; next; } if ($t == 0) { # someting potentially fishy, as job took 0 time. # but this can also be a resolution problem. $cnt_t0++; #next; } if ($p == 0) { # someting really fishy: job did not use any processors. # could mean job was cancelled before running. $cnt_p0++; next; } if (($sub == -1) || ($t == -1) || ($p == -1)) { # something very fishy: job arrival, runtime, or processors undefined. $cnt_bad++; next; } if ($status != 1) { # another fishy: # job failed (status 0) # job was cancelled (status 5) # or job is only part of a whole job (status 2, 3, 4) $cnt_stat++; next; } # example of parsing submit time $sub += $start; my ($sec,$min,$hr,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($sub); $year += 1900; $mon += 1; # $mon is 0-based for use as index to array; add 1 to get month number $wday += 1; # also 0-based, add 1 to get day number # example of printing submit, processors, and runtime about first 10 jobs if ($job <= 10) { printf("%s %3d %5d\n", strftime("%d/%m/%y-%H:%M:%S", localtime($sub)), $p, $t ); } # # COLLECT DATA # # your code here... } printf(STDERR "\n"); printf(STDERR "$cnt_fmt lines had a bad format\n"); printf(STDERR "$cnt_t0 jobs had 0 time\n"); printf(STDERR "$cnt_p0 jobs had 0 processors\n"); printf(STDERR "$cnt_stat jobs had non-1 status\n"); printf(STDERR "$cnt_bad jobs had bad data (undefined arrival, runtime, or processors)\n");