/*
 * generate workload based on statistical model
 *
 * for each job, decide the size (number of nodes used), the runtime,
 * and the repetition count (how many times it will be run).
 * the output is one line per job, with these 3 values.
 *
 * the size is based on a complex discrete distribution.
 * the runtime is hyperexponential, based on the size.
 * the repetition is generalized Zipf (harmonic)
 *
 * the arrival process was not modeled in the original version; it was
 * part of the simulation that uses this workload as input. The original
 * papers used a simple Poisson process, and varied the mean of the
 * interarrival times to modify the load. this version has the option
 * to include such arrival times.
 *
 * author: Dror Feitelson, 1995
 * modifications:
 * 1996: improved model (see below)
 * 18.11.99: bug fix in adding weight at multiples of 10 (thanks to Kento Aida)
 * 27.6.00: bug fix in extending the runtime at powers of two
 * 6.7.00: add option of arrival times, change floats to doubles
 */

#include	<math.h>
#include	<stdio.h>

/*
 * two versions of the model are available.
 * the original version, from the 1996 paper on "packing schemes for
 * gang scheduling", is obtained by defining ORIGINAL_MODEL below.
 * the somewhat improved model from the 1997 paper on "improved utilization
 * and responsiveness with gang acheduling" is the default.
 */
#if 0
#define ORIGINAL_MODEL
#endif

/*
 * the program can include arrival times, or leave them undefined.
 * no arrival times is the default.
 */
#if 0
#define WITH_ARRIVALS
#endif

/*
 * the program will create gnuplot data files of the model distributions
 * if the following definition is made effective. the files are created in
 * a subdirectory "plt".
 */
#if 0
#define MAKE_PLOTS
#endif

/*
 * The following causes output in the standard format.
 * The alternative is a line with 3 numbers for each job:
 * the number of processors, the runtime, and the number of repetitions.
 */
#define STD_FORMAT

/*
 * the following constants may be changed to fit your needs, but there are
 * no guarantees that everything will work as it should... so check the code!
 */
#define	LEN		300000	/* jobs generated */

#define	MAX_SIZE	128	/* machine size >= 8 */

#define	TIME_LIM	64800	/* longest job allowed - 18 hr */
#define	FACTOR		9	/* about MAX_TIME / log(TIME_LIM) */

#define	MAX_REP		1000	/* far upper bound on number of repetitions */
#define	MAX_TIME	100	/* for distribution of runtimes */

#define	ARR_FACTOR	1500	/* mean of interarrival times distribution */

#define	MAXINT		0x7fffffff

double	size_dist[ MAX_SIZE ];	/* cummulative distribution of sizes */
int 	time_dist[ MAX_TIME ];	/* cummulative distribution of runtimes */
double	rep_dist[ MAX_REP ];	/* cummulative distribution of repetitions */

#ifdef MAKE_PLOTS
char	filename[ 20 ];

int	size_hist[ MAX_SIZE ];
double	time_hist[ MAX_SIZE ];
int	rep_hist[ MAX_REP ];

struct {
    double	rtime;
    int		rep;
    int		next;
} jobs[ LEN ];
int	head[ MAX_SIZE ];
#endif


main()
{
    int		i, j, k, n, ntot, size, c, rep, tot, rtbkt;
    double	sum, p, runtime, m, t;
    FILE	*hist, *times, *reps;
#ifdef WITH_ARRIVALS
    double	next_arr;
#endif

    /* initialize */
#ifdef MAKE_PLOTS
    tot = 0;
    for (i=1 ; i<MAX_SIZE ; i++)
      head[i] = -1;
#endif

    /* PHASE 1: create cummulative distributions of sizes and repetitions */

#ifdef WITH_ARRIVALS
    next_arr = 0;
#endif

    /* first create a semi-harmonic distribution */
    size_dist[0] = 1;
    for (i=1 ; i<MAX_SIZE ; i++) {
	size_dist[i] = (double)1/sqrt((double)i);
    }

    /* add weight at powers of 2 */
    for (i=1 ; i<=MAX_SIZE ; i<<=1) {
#ifdef ORIGINAL_MODEL
	size_dist[i-1] += 30 + i;
#else
	size_dist[i-1] += 35 + 1.5*i;
#endif
    }

    /* add weight at squares */
    for (i=2, j=i*i ; j<=MAX_SIZE ; i++, j=i*i) {
#ifdef ORIGINAL_MODEL
	size_dist[j-1] += 4;
#else
	size_dist[j-1] += 5;
#endif
    }

    /* add weight at multiples of 10 */
    for (i=10 ; i<=MAX_SIZE ; i+=10) {
#ifdef ORIGINAL_MODEL
	size_dist[i-1] += 3;
#else
	size_dist[i-1] += 5;
#endif
    }

#ifdef ORIGINAL_MODEL
    /* reduce weight at 1,2 */
    size_dist[0] /= 2;
    size_dist[1] /= 2;
    /* add weight at 3,5-7 */
    size_dist[2] += 15;
    size_dist[4] += 8;
    size_dist[5] += 4;
    size_dist[6] += 3;
#else
    /* reduce weight at 1,2,4 */
    size_dist[0] /= 4;
    size_dist[1] /= 4;
    size_dist[3] /= 3;
    /* add weight at 3,5-7 */
    size_dist[2] += 5;
    size_dist[4] += 7;
    size_dist[5] += 5;
    size_dist[6] += 3;
#endif

    /* impose a harmonic envelope to emphasize small sizes  */
    /* for non-special sizes, this becomes harmonic of order 1.5 */
    for (i=1 ; i<MAX_SIZE ; i++) {
/*	size_dist[i] /= sqrt((double)i); semi-harmonic - no good */
	size_dist[i] /= (double)i;
/*	size_dist[i] /= exp(i); exponential decreasing - no good*/
    }

    /* normalize */
    sum = 0;
    for (i=0 ; i<MAX_SIZE ; i++) {
	sum += size_dist[i];
    }
    for (i=0 ; i<MAX_SIZE ; i++) {
	size_dist[i] /= sum;
    }

    /* accumulate */
    for (i=1 ; i<MAX_SIZE ; i++) {
	size_dist[i] += size_dist[i-1];
    }

    /* create harmonic repetition distribution of order 2.5 */
    sum = 0;
    for (i=0 ; i<MAX_REP ; i++) {
	rep_dist[i] = ((double)1)/pow((double)(i+1),(double)2.5);
	sum += rep_dist[i];
    }

    /* normalize */
    for (i=0 ; i<MAX_REP ; i++) {
	rep_dist[i] /= sum;
    }

    /* accumulate */
    for (i=1 ; i<MAX_REP ; i++) {
	rep_dist[i] += rep_dist[i-1];
    }


    /* PHASE 2: create workload file */

#ifdef STD_FORMAT
    /* print header comments */
    printf("; Version: 2\n");
    printf("; Information: Feitelson'96 Workload Model\n");
    printf(";              From the Parallel Workloads Archive\n");
    printf(";              http://www.cs.huji.ac.il/labs/parallel/workload\n");
    printf("; Acknowledge: Dror Feitelson, feit@cs.huji.ac.il\n");
    printf("; MaxNodes: %d\n", MAX_SIZE);
    printf("; MaxRuntime: %d\n\n", TIME_LIM);
    printf("; MaxJobs: %d\n", LEN);
    printf("; MaxRecords: %d\n", LEN);
#endif

    for (n=0, ntot=0 ; ntot<LEN ; n++) {

	/* choose size from distribution */
	p = ((double)random())/MAXINT;
	i = 0;
	while (p > size_dist[i])
	  i++;
	size = i + 1;

	/* assign runtime */
#ifdef ORIGINAL_MODEL
	/*
	 * in the original model, runtimes were from a two-stage
	 * hyperexponential distribution, and the probability
	 * of choosing each branch depended on the size
	 */
	p = ((double)random())/MAXINT;
	m = (p < (0.95 - 0.20*(((double)size)/MAX_SIZE))) ? 1 : 7;
	runtime = -m * (float)log( ((double)random()) / MAXINT );
#else
	/*
	 * in the improved model, runtimes are from a three-stage
	 * hyperexponential distribution, and the probability of
	 * choosing each branch depends on the size. in addition,
	 * there is an upper bound on the runtime.
	 */
	do {
	    p = ((double)random())/MAXINT;
	    if (p < (0.90 - 0.65*( sqrt(((double)size)/MAX_SIZE) )))
	      m = 50;
	    else if (p < (0.97 - 0.37*( sqrt(((double)size)/MAX_SIZE) )))
	      m = 900;
	    else
	      m = 20000;
	    if ((size == 2) || (size == 4) || (size == 8) || (size == 16) || 
		(size == 32) || (size == 64) || (size == 128))
	      m *= 2;
	    runtime = -m * log( ((double)random()) / MAXINT );
	}
	while (runtime > TIME_LIM);
#endif

	/* assign repetition */
	p = ((double)random())/MAXINT;
	i = 0;
	while (p > rep_dist[i])
	  i++;
	rep = i;

#ifdef STD_FORMAT
      /* Now output the data for this job, using the standard format.
       * the fields are:
       * job number = ntot [start from 1, not from 0, and count repetitions]
       * submit time = -1 [if not modeled] or next_arr
       * wait time = -1 [queueing is a scheduler's problem, not
       *		       part of the workload model]
       * run time = runtime
       * used processors = p_needed
       * average CPU time = (cpu_time_used / p_needed) [same as runtime]
       * used memory = -1 [not modeled]
       * requested processors = -1 [not modeled]
       * requested time = -1 [not modeled]
       * requested memory = -1 [not modeled]
       * completed = 1 [all jobs complete OK in the model]
       * user ID = -1 [not modeled]
       * group ID = -1 [not modeled]
       * application = -1  [not modeled]
       * queue = -1  [not modeled]
       * partition = -1  [not modeled]
       * preceding job = -1 [first in sequence does not depend on anything]
       * think time = -1 [not modeled]
       */
#ifndef WITH_ARRIVALS
      printf("%6d -1 -1 %12.4f %3d %12.4f -1 -1 -1 -1 1 -1 -1 -1 -1 -1  -1 -1\n",
	     ++ntot,
	     runtime,
	     size,
	     runtime );
#else
      printf("%6d %12.4f -1 %12.4f %3d %12.4f -1 -1 -1 -1 1 -1 -1 -1 -1 -1  -1 -1\n",
	     ++ntot,
	     next_arr,
	     runtime,
	     size,
	     runtime );

      next_arr += -ARR_FACTOR * log( ((double)random()) / MAXINT );
#endif
      /*
       * if the number of repetitions is >1, generate additional jobs
       * that depend on each other
       */
      for (i=0 ; i<rep ; i++) {
      printf("%6d -1 -1 %12.4f %3d %12.4f -1 -1 -1 -1 1 -1 -1 -1 -1 -1 %6d -1\n",
	     ++ntot,
	     runtime,
	     size,
	     runtime,
	     ntot );
      }
#else
	printf("%d	%f	%d\n", size, runtime, rep+1);
#endif

#ifdef MAKE_PLOTS
	size--;		/* make zero-based */
	size_hist[ size ] += rep + 1;
	time_hist[ size ] += runtime * (rep+1);
	rep_hist[rep]++;

	tot += rep + 1;
	jobs[n].rtime = runtime;
	jobs[n].rep   = rep + 1;
	jobs[n].next  = head[size];
	head[size] = n;
#endif
    }

#ifdef MAKE_PLOTS
    /* print histograms */
    hist  = fopen("../plt/modhist.dat",  "w");
    times = fopen("../plt/modtimes.dat", "w");

    for (i=0 ; i<MAX_SIZE ; i++) {
	if (size_hist[i] != 0)
	  time_hist[i] /= (double)size_hist[i];
    }

    for (i=0 ; i<MAX_SIZE ; i++) {
	fprintf(hist,  "%d	%d\n", i+1, size_hist[i]);
	fprintf(times, "%d	%f\n", i+1, time_hist[i]);
    }

    fclose( hist );
    fclose( times );

    /* print mean runtimes and distributions for 4 buckets */

    tot = tot - size_hist[0];	/* use only parallel jobs here */

    hist = fopen("../plt/modbkt.dat", "w");
    j = 1;
    for (i=0 ; i<4 ; i++) {

	n = 0;
	size = 0;
	runtime = 0;
	for (k=0 ; k<MAX_TIME ; k++)
	  time_dist[k] = 0;
	sprintf( filename, "../plt/modrt%d.dat", i+1 );
	times = fopen(filename, "w");

	while ((n < tot/4) && (j < MAX_SIZE)) {
	    if (head[j] == -1) {
		j++;
		continue;
	    }

	    if (n + jobs[head[j]].rep > tot/4) {
		rep = tot/4 - n;
	    }
	    else {
		rep = jobs[head[j]].rep;
	    }

	    n += rep;
	    size += (j+1) * rep;
	    runtime += jobs[head[j]].rtime * rep;
	    rtbkt = (int)(log((double)jobs[head[j]].rtime)*FACTOR);
	    if (rtbkt < 0) rtbkt = 0;
	    if (rtbkt >= MAX_TIME) rtbkt = MAX_TIME-1;
	    time_dist[ rtbkt ] += rep;

	    if (rep == jobs[head[j]].rep) {
		head[j] = jobs[head[j]].next;
	    }
	    else {
		jobs[head[j]].rep -= rep;
	    }
	}
	fprintf(hist, "%f	%f\n", ((double)size)/n, runtime/n);

	sum = 0;
	for (k=0 ; k<MAX_TIME ; k++) {
	    sum += ((double)time_dist[k]) / n;	/* get cummulative distribution */
	    t = exp((double)k/FACTOR);
	    fprintf( times, "%f	%f\n", t, sum );
	}
	fclose( times );
    }
    fclose( hist );

    reps = fopen("../plt/modrunlen.dat", "w");
    for (i=0 ; i<MAX_REP ; i++)
      if (rep_hist[i] != 0)
	j = i;
    for (i=0 ; i<=j ; i++)
      fprintf(reps, "%d	%d\n", i+1, rep_hist[i]);

    fclose( reps );
#endif

    return( 0 );
}