#!/usr/bin/perl # # scan job execution log files from the SuperCluster repository (the OSC Linux cluster), # and use convert2swf.pm to convert the data into the standard workload format. # use warnings; use strict; use Time::Local; use POSIX; use convert2swf_v4; ############################ # # DO THE CONVERSION: # my $tz_str = "US/Eastern"; my $max_nodes = 57; my $max_procs = 178; my @queues; SWF::init( $tz_str, $max_procs, \@queues ); my $format_bugs; my $jobs_data = get_jobs(); my ($zero, $maxtime) = SWF::convert( $jobs_data, $format_bugs ); my $header = generate_header( $zero, $maxtime, $max_procs ); SWF::print( $header ); ##################################### # SPECIFICS for Maui workloads # the format of each entry includes the following space-separated fields: # 0 JobID 1 (unique) # 1 Nodes Requested (0 = no node request count specified) # 2 Tasks Requested (default 1) # 3 User Name # 4 Group Name # 5 Wallclock Limit in seconds # 6 Job Completion State One of Completed, Removed, NotRun # 7 Required Class Class/queue required by job (e.g. [batch:1]) # 8 Submission Time seconds from epoch # 9 Dispatch Time seconds from epoch when scheduler requested that the job start # 10 Start Time seconds from epoch when job began executing (NOTE: usually identical to 'Dispatch Time') # 11 Completion Time seconds from epoch # 12 Required Network Adapter # 13 Required Node Architecture # 14 Required Node Operating System # 15 Required Node Memory Comparison , >=, =, <=, < >= > # 16 Required Node Memory MB per node, using field 15 # 17 Required Node Disk Comparison , >=, =, <=, < >= > # 18 Required Node Disk MB per node, using field 17 # 19 Required Node Attributes (e.g. '[fast][ethernet]') # 20 System Queue Time seconds from epoch when job met all fairness policies # 21 Tasks Allocated (NOTE: in most cases identical to field 2) # 22 Required Tasks Per Node ('-1' if not specified) # 23 QOS QOS requested:delivered (e.g. 'hipriority:bottomfeeder') # 24 JobFlags list of job attributes (e.g. [BACKFILL][BENCHMARK][PREEMPTEE]) # 25 Account Name # 26 Executable # 27 Comment Resource manager specific list of job attributes # 28 Bypass Count Number of times job was bypassed by lower priority jobs via backfill # 29 ProcSeconds Utilized # 30 Partition Name # 31 Dedicated Processors per Task # 32 Dedicated Memory per Task in MB # 33 Dedicated Disk per Task in MB # 34 Dedicated Swap per Task virtual memory in MB # 35 Start Date seconds from epoch indicating earliest time job can start # 36 End Date seconds from epoch indicating latest time by which job must complete # 37 Allocated Host List colon delimited list of hosts (e.g. node001:node004) # 38 Resource Manager Name # 39 Required Host Mask list of hosts required by job # 40 Reservation name of reservation required by job # 41 Set Description constraints required by node (e.g. 'ONEOF:PROCSPEED:350:450:500') # 42 Application Simulator Data (e.g. 'HSM:IN=infile.txt:140000;OUT=outfile.txt:500000') # 43 RESERVED FIELD 1 my $job_cnt; sub get_jobs { ############## # parse input format and create a hash for each job # # note: the original format provides times in human-readable format (hh:mm:ss), # but these seem to be wrong, and specifically the result of using gmtime instead # of localtime. so here we also need to use gmtime. my @list_of_jobs; # # scan trace and collect job info # while (<>) { # # parse line and extract info # if (/^\s*$/) { # empty line next; } my %nj; $nj{trace} = $_; # used for error messages $_ =~ /\s*(.*)\s*/; # get rid of white space at ends $_ = $1; # normalize representation of void $_ =~ s/\[NONE\]/-1/g; $_ =~ s/DEFAULT/-1/g; my @line = split(' '); # split into fields if ($#line != 43) { warn(">>>Format problem on $_\ngot $#line fields (should be 44)"); next; } # # set the desired fields as specified in convert2swf.pm # # map status first my $status = 0; if ($line[6] eq "Completed") { $status = 1; } elsif (($line[6] eq "Removed") || ($line[6] eq "NotRun")) { $status = 5; } else { $format_bugs .= "unknown_status_problem [$line[6]] $_\n"; } if ($status != 0) { $nj{status} = $status; } # get submit, start, and end times my $submit = $line[8]; $nj{submit} = $submit; my $start = $line[9]; if ($start == 0) { if ($status == 1) { # job is OK -- a real problem $format_bugs .= "start_zero $_\n"; } # removed before started $start = -1; } $nj{start} = $start; my $end = $line[11]; $nj{end} = $end; # number of processors # regrettably $line[37] only contains one node, not the list. # also, $line[1] is always 0. # so need to use allocated tasks and hope for the best. my $procs = $line[21]; $nj{procs} = $procs; # CPU time: convert to average my $cpu = -1; if ($procs > 0) { $cpu = $line[29] / $procs; } $nj{cpu} = $cpu; # requested number of processors # again, use requested tasks instead $nj{req_procs} = $line[2]; # requested runtime # should be $line[5], but empirically always less than 3 min... #my $req_time = $line[5]; #$nj{req_time} = $req_time; # no memory usage data ? # but have requested memory my $req_mem = $line[16]; if (($line[15] eq "=") || ($line[15] =~ />/)) { $nj{req_mem} = $req_mem; } # user my $user = $line[3]; $nj{user} = $user; # group my $group = $line[4]; $nj{group} = $group; # application my $app = $line[26]; $nj{app} = $app; # queue my $queue = $line[7]; $nj{queue} = $queue; # partition my $partition = $line[30]; $nj{partition} = $partition; $list_of_jobs[$job_cnt++] = \%nj; } return \@list_of_jobs; } sub generate_header { ##################### my ($zero, $maxtime, $max_procs) = @_; my ($sec, $min, $hr, $mday, $mon, $year, $wday, $yday, $isdst); $header = "; Version: 2.2 ; Computer: Linux cluster ; Installation: OSC ; Acknowledge: SuperCluster HPC Workload/Resource Trace Respository ; Information: http://http://www.supercluster.org/research/traces/ ; http://www.cs.huji.ac.il/labs/parallel/workload/ ; Note: the cluster is composed of 32 quad-processor nodes + 25 dual-processor nodes ; Note: Scheduler was Maui 3.0.7 "; $header .= sprintf("; Conversion: David Talby and Dror Feitelson (feit\@cs.huji.ac.il) %s\n", strftime("%d %b %Y", localtime())); $header .= sprintf("; MaxJobs: %d\n", $job_cnt); $header .= sprintf("; MaxRecords: %d\n", $job_cnt); $header .= "; Preemption: No\n"; $header .= sprintf("; UnixStartTime: %d\n", $zero); $header .= "; TimeZone: -18000\n"; $header .= sprintf("; TimeZoneString: %s\n", $tz_str); $header .= sprintf("; StartTime: %s\n", strftime("%a %b %2d %H:%M:%S %Z %Y", localtime($zero))); $header .= sprintf("; EndTime: %s\n", strftime("%a %b %2d %H:%M:%S %Z %Y", localtime($maxtime))); $header .= "; MaxNodes: $max_nodes\n"; $header .= "; MaxProcs: $max_procs\n"; return $header; }