#!/bin/bash # # To run this script, make use of the PBS system. For example: # # qsub run_boss_worker_mpi.job -q long -N PandaRootEMC -l nodes=5:ppn=2 -v NWORKERS=10,JOBFILE="/home/rugkvi04/jobs/jobs.in,LOGFILE="/home/rugkvi04/johan/jobs/logs/jobs.log",MPIVERSION=2 # # will run this job in the "long" queue (max. 24 hours), allocating 5 computing nodes and # for each node two cpus (ppn). The name of the process will be called "PandaRootEMC". You can set # a couple of environmental variables, which are passed to this script using the "-v" option. # A description of the variables can be found below: # # JOBFILE = MANDATORY: the directory and filename containing the job descriptions. # NWORKERS = OPTIONAL: number of workers, preferably take at least (nodes*ppn). If not defined, # this script will automatically set it for you to (nodes*ppn). # LOGFILE = OPTIONAL: directory and filename in which the output streams are dumped. # This option allows you to monitor the output while the program is running. # MPIVERSION = OPTIONAL: specify which MPI version you like to use, by default this is "1". # # # Load stuff needed to find MPI or MPI2 # export MPI=/opt/mpi/mpich2-gcc export LD_LIBRARY_PATH=$MPI/lib:$LD_LIBRARY_PATH export PATH=$MPI/bin:$PATH # # MPDBOOTRSH: the communication protocol used by mpdboot (MPI2 only!), # possible options: rsh of ssh MPDBOOTRSH="rsh" # # Calculate the number of workers in case not defined by user # NWORKERS will be the number of nodes * ncpus reserved in the batch process # NNODES=`qstat -f $PBS_JOBID | grep "Resource_List.nodes" | sed 's/:ppn=/ /' | awk '{print $3}'` NPPN=`qstat -f $PBS_JOBID | grep "Resource_List.nodes" | sed 's/:ppn=/ /' | awk '{print $4}'` if [ "$NPPN" = "" ]; then NPPN=1 fi if [ "$NWORKERS" = "" ]; then NWORKERS=`expr $NNODES \* $NPPN` fi echo "Number of nodes is $NNODES" echo "Number of cpus is $NPPN" echo "Number of workers is $NWORKERS" echo # # In case MPI2 is going to be used, start mpd daemons by making use of mpdboot. # if [ "$MPIVERSION" = "2" ]; then echo " MPI Version 2, MPDs are started ..." sort $PBS_NODEFILE | uniq -c | awk '{ printf("%s\n", $2); }' > /tmp/mpd_nodes.$PBS_JOBID mpdallexit > /dev/null 2>&1 mpdboot -f /tmp/mpd_nodes.$PBS_JOBID -n $NNODES --rsh=$MPDBOOTRSH rm -f /tmp/mpd_nodes.$PBS_JOBID mpdtrace -l echo fi # # NPROCS = represents the number of processes, e.g. NWORKERS + 1 BOSS, # this variable is calculated automatically by this script. # NPROCS=`expr $NWORKERS + 1` # # Check whether the starting runid is set. If not, use "date". # if [ "$RUNID" = "" ]; then RUNID=`date +%N` fi # # Run the MPI-based program... # ARGS="-j $JOBFILE -r $RUNID -v" if [ "$LOGFILE" = "" ]; then mpirun -np $NPROCS $HOME/bin/boss_worker_mpi $ARGS else if [ "$LOGFILE" = "/dev/null" ]; then mpirun -np $NPROCS $HOME/bin/boss_worker_mpi $ARGS 1> $LOGFILE 2> $LOGFILE else JOBID=`echo $PBS_JOBID | sed 's/\./ /' | awk '{print $1}'` mpirun -np $NPROCS $HOME/bin/boss_worker_mpi $ARGS 1> $LOGFILE\_$JOBID 2> $LOGFILE\_$JOBID fi fi if [ "$MPIVERSION" = "2" ]; then echo " MPDs are terminated ..." mpdallexit fi exit 1