#!/bin/bash # # To run this script, make use of the PBS system. For example: # # qsub run_boss_worker_mpi.job -N PandaRootEMC -l nodes=5:ppn=8 -v NWORKERS=40,JOBFILE="$HOME/jobs/jobs.in,LOGFILE="$HOME/jobs/logs/jobs.log",MPIVERSION=2 # # will run this job in the default queue, allocating 5 computing nodes and # for each node eight cpus (ppn). The name of the process will be called "PandaRootEMC". You can set # a couple of environmental variables, which are passed to this script using the "-v" option. # A description of the variables can be found below: # # JOBFILE = MANDATORY: the directory and filename containing the job descriptions. # NWORKERS = OPTIONAL: number of workers, preferably take at least (nodes*ppn). If not defined, # this script will automatically set it for you to (nodes*ppn). # LOGFILE = OPTIONAL: directory and filename in which the output streams are dumped. # This option allows you to monitor the output while the program is running. # MPIVERSION = OPTIONAL: specify which MPI version you like to use, by default this is "1". # # # Load stuff needed to find MPI or MPI2 # if [ "$MPIVERSION" = "2" ]; then export MPI=/opt/exp_soft/mpich2 else export MPI=/opt/exp_soft/mpich fi export LD_LIBRARY_PATH=$MPI/lib:$LD_LIBRARY_PATH export PATH=$MPI/bin:$PATH # # MPDBOOTRSH: the communication protocol used by mpdboot (MPI2 only!), # possible options: rsh of ssh MPDBOOTRSH="ssh" # # Calculate the number of workers in case not defined by user # NWORKERS will be the number of nodes * ncpus reserved in the batch process # NNODES=`ssh ${PBS_O_LOGNAME}@$PBS_SERVER qstat -f $PBS_JOBID | grep "Resource_List.nodes" | sed 's/:ppn=/ /' | awk '{print $3}'` NPPN=`ssh ${PBS_O_LOGNAME}@$PBS_SERVER qstat -f $PBS_JOBID | grep "Resource_List.nodes" | sed 's/:ppn=/ /' | awk '{print $4}'` if [ "$NPPN" = "" ]; then NPPN=1 fi if [ "$NWORKERS" = "" ]; then NWORKERS=`expr $NNODES \* $NPPN` fi echo "Number of nodes is $NNODES" echo "Number of cpus is $NPPN" echo "Number of workers is $NWORKERS" echo # # In case MPI2 is going to be used, start mpd daemons by making use of mpdboot. # if [ "$MPIVERSION" = "2" ]; then echo " MPI Version 2, MPDs are started ..." sort $PBS_NODEFILE | uniq -c | awk '{ printf("%s\n", $2); }' > /tmp/mpd_nodes.$PBS_JOBID mpdallexit > /dev/null 2>&1 mpdboot -f /tmp/mpd_nodes.$PBS_JOBID -n $NNODES --rsh=$MPDBOOTRSH rm -f /tmp/mpd_nodes.$PBS_JOBID mpdtrace -l echo fi # # NPROCS = represents the number of processes, e.g. NWORKERS + 1 BOSS, # this variable is calculated automatically by this script. # NPROCS=`expr $NWORKERS + 1` # # Check whether the starting runid is set. If not, use "date". # if [ "$RUNID" = "" ]; then RUNID=`date +%N` fi # # Run the MPI-based program... # if [ "$MPIVERSION" = "2" ]; then PROG=$HOME/bin/boss_worker_mpi2 MPIPROG=mpirun else PROG=$HOME/bin/boss_worker_mpi MPIPROG="mpirun -machinefile $PBS_NODEFILE" fi ARGS="-j $JOBFILE -r $RUNID -v" if [ "$LOGFILE" = "" ]; then $MPIPROG -np $NPROCS $PROG $ARGS else if [ "$LOGFILE" = "/dev/null" ]; then $MPIPROG -np $NPROCS $PROG $ARGS 1> $LOGFILE 2> $LOGFILE else JOBID=`echo $PBS_JOBID | sed 's/\./ /' | awk '{print $1}'` $MPIPROG -np $NPROCS $PROG $ARGS 1> $LOGFILE\_$JOBID 2> $LOGFILE\_$JOBID fi fi if [ "$MPIVERSION" = "2" ]; then echo " MPDs are terminated ..." mpdallexit fi exit 1