|   |     | 
| (16 intermediate revisions by 6 users not shown) | 
| Line 3: | Line 3: | 
|  | =General= |  | =General= | 
|  |  |  |  | 
| − | To check out the ET: | + | To check out the ET, follow the instructions at http://einsteintoolkit.org/download/ under "Current development version". | 
|  |  |  |  | 
| − |   mkdir etrelease
 | + | Now configure simfactory: | 
| − |   cd etrelease
 |  | 
| − |   curl -O https://github.com/gridaphobe/CRL/raw/master/GetComponents 
 |  | 
| − |   chmod a+x GetComponents
 |  | 
| − |   ./GetComponents --root=. -a https://svn.einsteintoolkit.org/manifest/trunk/einsteintoolkit.th
 |  | 
| − |   cp udb.example.pm udb.pm
 |  | 
|  |  |  |  | 
| − | In udb.pm, 
 | + |   cd Cactus | 
|  | + |   cp simfactory/etc/defs.local.ini.simple simfactory/etc/defs.local.ini | 
|  | + |   nano simfactory/etc/defs.local.ini | 
|  |  |  |  | 
| − | * Replace "redshift" with the name of your localmachine, unless you are running on one of the supported machines.
 | + | Edit defs.local.ini and replace | 
|  |  |  |  | 
| − | * Replace YOUR_LOGIN, YOUR_EMAIL_ADDRESS and YOUR_ALLOCATION as appropriate.
 | + |   YOUR_LOGIN with your username | 
| − |   | + |   YOUR@EMAIL.ADDRESS with your usual email address | 
| − | * Set sourcebasedir for yourlocal machine to be the directory containing etrelease (do not set it to etrelease itself).
 | + |   YOUR_ALLOCATION with your project allocation | 
| − |   | + |   YOUR_THORNLIST with manifest/einsteintoolkit.th | 
| − | * Set yourusername on each machine:
 |  | 
| − |     set_option 'MACHINE'  , 'user', 'USERNAME';
 |  | 
|  |  |  |  | 
|  | See the machine-specific notes below for any additional steps for each machine. |  | See the machine-specific notes below for any additional steps for each machine. | 
|  |  |  |  | 
| − |   touch par/testsuite.par
 |  | 
|  |    sim sync <machine> |  |    sim sync <machine> | 
| − |    sim build --thornlist manifest/einsteintoolkit.th --scriptfile <machine>-testsuite.sh | + |    sim login <machine> | 
| − |    sim remote <machine> create-submitpar/testsuite.par NSLOTS 6:00:00 | + |    sim build | 
| − |   | + |   sim create-submit ettests_1proc --testsuite --procs <N> --num-threads <N> --walltime 3:00:00 | 
| − | This is a dummy parameter file.
 | + |    sim create-submit ettests_2proc --testsuite --procs <N> --num-threads <N/2> --walltime 3:00:00 | 
| − |   |  | 
| − | When the job is finished, you should have the required <machine>_i_j.log files in machine:etrelease/*.log.  The testsuite output will also be stored in machine:TEST_XXX, where XXX is constructed from the date at which the job was run.
 |  | 
| − |   |  | 
| − | Update the testsuite status page by adding the log files to the release-info repository:
 |  | 
| − |   |  | 
| − |   svn co https://svn.einsteintoolkit.org/www/release-info
 |  | 
| − |   cd release-info
 |  | 
| − |   scp machine:etrelease/*.log .
 |  | 
| − |   |  | 
| − | Then commit the new/updated files.  
 |  | 
| − |   |  | 
| − | To re-run the tests with an updated checkout, run the GetComponents command above with the --update flag, rebuild, delete the "testsuites" output simulation, and resubmit the job.
 |  | 
| − |   |  | 
| − | =Machines=
 |  | 
| − |   |  | 
| − | ==Kraken==
 |  | 
| − |   |  | 
| − | Edit simfactory/mdb.pm
 |  | 
| − |   |  | 
| − |   -        myproxy-logon -p 7514 -s myproxy.teragrid.org -T -l @USER@ -o @SOURCEDIR@/.globus/proxy-teragrid
 |  | 
| − |   +        myproxy-logon -p 7514 -s myproxy.teragrid.org -T -l <username> -o @SOURCEDIR@/.globus/proxy-teragrid
 |  | 
| − |   |  | 
| − | where <username> is your user name on Kraken.  This is related to [https://trac.einsteintoolkit.org/ticket/381 Ticket #381]
 |  | 
| − |   |  | 
| − | Change the sourcebasedir in simfactory/udb.pm to be under /lustre/scratch.  This is necessary because the Cactus directory must be visible from the compute node when running the tests (to see the parameter files, testsuite reference output and test.ccl files), and the home directory on Kraken is not visible from the compute nodes.
 |  | 
| − |   |  | 
| − |     set_option 'kraken', 'sourcebasedir', '/lustre/scratch/USERNAME';
 |  | 
| − |   |  | 
| − | Copy kraken.sh as kraken-testsuite.sh in simfactory/scriptfiles.  Replace the aprun command with the following:
 |  | 
| − |   |  | 
| − | <pre> |  | 
| − | cd @SOURCEDIR@
 |  | 
| − |   |  | 
| − | export CCTK_TESTSUITE_RUN_COMMAND="aprun -n \$nprocs -d 1 \$exe \$parfile"
 |  | 
| − |   |  | 
| − | test_dir=TEST_$(date +%Y-%m-%d-%H%M%S)
 |  | 
| − | mkdir $test_dir
 |  | 
| − |   |  | 
| − | CONFIGNAME=$(ls configs|tail) # SimFactory should provide the configuration name
 |  | 
| − |   |  | 
| − | for i in 1 2 3; do
 |  | 
| − |   case $i in
 |  | 
| − |     1 )
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=1
 |  | 
| − |       export OMP_NUM_THREADS=1;;
 |  | 
| − |     2 )
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=2
 |  | 
| − |       export OMP_NUM_THREADS=1;;
 |  | 
| − |     3 )
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=2
 |  | 
| − |       export OMP_NUM_THREADS=2;;
 |  | 
| − |   esac
 |  | 
| − |   make $CONFIGNAME-testsuite PROMPT=no
 |  | 
| − |   cp TEST/$CONFIGNAME/summary.log kraken__${CCTK_TESTSUITE_RUN_PROCESSORS}_${OMP_NUM_THREADS}.log
 |  | 
| − |   mv TEST $test_dir/TEST.$i
 |  | 
| − | done
 |  | 
| − | </pre>
 |  | 
| − |   |  | 
| − | ==Datura==
 |  | 
| − |   |  | 
| − | Copy datura.sh as datura-testsuite.sh in simfactory/scriptfiles.  Replace the mpirun command with the following:
 |  | 
| − |   |  | 
| − | <pre>
 |  | 
| − | cd @SOURCEDIR@
 |  | 
| − |   |  | 
| − | test_dir=TEST_$(date +%Y-%m-%d-%H%M%S)
 |  | 
| − | mkdir $test_dir
 |  | 
| − |   |  | 
| − | CONFIGNAME=$(ls configs|tail) # SimFactory should provide the configuration name
 |  | 
| − |   |  | 
| − | export CACTUS_STARTTIME=$(date +%s)
 |  | 
| − |   |  | 
| − | for i in 1 2 3; do
 |  | 
| − |    case $i in |  | 
| − |     1 )
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=1
 |  | 
| − |       export OMP_NUM_THREADS=1;;
 |  | 
| − |     2 )
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=2
 |  | 
| − |       export OMP_NUM_THREADS=1;;
 |  | 
| − |     3 )
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=2
 |  | 
| − |       export OMP_NUM_THREADS=2;;
 |  | 
| − |   esac
 |  | 
| − |   export CCTK_TESTSUITE_RUN_COMMAND="${MPIDIR}/bin/mpirun -v --mca btl openib,self --mca mpi_leave_pinned 0 -np \$nprocs -npernode $CCTK_TESTSUITE_RUN_PROCESSORS \$exe -L 3 \$parfile"
 |  | 
| − |   make $CONFIGNAME-testsuite PROMPT=no
 |  | 
| − |   cp TEST/$CONFIGNAME/summary.log datura__${CCTK_TESTSUITE_RUN_PROCESSORS}_${OMP_NUM_THREADS}.log
 |  | 
| − |   mv TEST $test_dir/TEST.$i
 |  | 
| − | done
 |  | 
| − | </pre> |  | 
| − |   |  | 
| − | ==BlueDrop==
 |  | 
| − |   |  | 
| − | The following script is a slightly modified version of run_tests adapted to run on 
 |  | 
| − | the Power7 machine BlueDrop:
 |  | 
| − |   |  | 
| − |  set -e
 |  | 
| − |  
 |  | 
| − |  cd Cactus
 |  | 
| − |  
 |  | 
| − |  #Blue drop (Note that we need to specify the absolute path for the bluedrop_run*.ll files):
 |  | 
| − |  
 |  | 
| − |  for i in 1 2 3; do
 |  | 
| − |   case $i in
 |  | 
| − |     1 )
 |  | 
| − |      hostname | grep ^bd && export CCTK_TESTSUITE_RUN_COMMAND="poe \$exe \$parfile -retry -1 -llfile /home/bcmundim/bluedrop_run_1.ll"
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=1
 |  | 
| − |       export OMP_NUM_THREADS=1;;
 |  | 
| − |     2)
 |  | 
| − |       hostname | grep ^bd && export CCTK_TESTSUITE_RUN_COMMAND="poe \$exe \$parfile -retry -1 -llfile /home/bcmundim/bluedrop_run_2.ll"
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=2
 |  | 
| − |       export OMP_NUM_THREADS=1;;
 |  | 
| − |     3)
 |  | 
| − |       hostname | grep ^bd && export CCTK_TESTSUITE_RUN_COMMAND="poe \$exe \$parfile -retry -1 -llfile /home/bcmundim/bluedrop_run_2.ll"
 |  | 
| − |       export CCTK_TESTSUITE_RUN_PROCESSORS=2
 |  | 
| − |       export OMP_NUM_THREADS=2;;
 |  | 
| − |   esac
 |  | 
| − |   make sim-testsuite PROMPT=no
 |  | 
| − |   mv TEST/sim/summary.log ../${HOSTNAME}__${CCTK_TESTSUITE_RUN_PROCESSORS}_${OMP_NUM_THREADS}.log
 |  | 
| − |  done
 |  | 
| − |   |  | 
| − | Remember to change the absolute path to bluedrop_run_*.ll, as in /home/bcmundim above, to the appropriate location 
 |  | 
| − | of your LoadLeveler batch job script files. The following job scripts were used to run the test suites; bluedrop_run_1.ll and bluedrop_run_2.ll, respectively:
 |  | 
|  |  |  |  | 
|  | + | Replace <N> and <N/2> with the number of cores on each node, and half of this, respectively, for the machine you are using.  Remember that "procs" here means "cores" and "num-threads" means "number of threads per process". The idea is to use a full node, i.e. all the cores, and then either one or two MPI processes. | 
|  |  |  |  | 
| − |  ## Comment theclass line to run on short queue:
 | + | When the jobs have finished, you should have the summary.log files in | 
| − |  #@ class = debug
 |  | 
| − |  #@ job_type = parallel
 |  | 
| − |  ##@ node_usage = not_shared
 |  | 
| − |  #@ node_usage = shared
 |  | 
| − |  #@ environment = COPY_ALL
 |  | 
| − |  #@ tasks_per_node = 1
 |  | 
| − |  #@ node = 1
 |  | 
| − |  #@ wall_clock_limit = 0:30:00
 |  | 
| − |  ### uncomment below for a normal batch job
 |  | 
| − |  # #@ output = $(host).$(jobid).$(stepid).out
 |  | 
| − |  # #@ error = $(host).$(jobid).$(stepid).err
 |  | 
|  |  |  |  | 
| − |  #@ queue
 | + |   <simulations>/ettests_1proc/output-0000/TEST/sim/summary.log | 
| − |  ## uncomment for a normal batch job
 | + |   <simulations>/ettests_2proc/output-0000/TEST/sim/summary.log | 
| − |  # $HOME/a.out
 |  | 
|  |  |  |  | 
|  | + | Update the testsuite status page by adding the log files to the <code>testsuite_results</code> repository: | 
|  |  |  |  | 
| − |  ## Comment the class line to run on short queue:
 | + |   git clone git@bitbucket.org:einsteintoolkit/testsuite_results | 
| − |  #@ class = debug
 | + |   cd testsuite_results/results | 
| − |  #@ job_type = parallel
 | + |   scp machine:<simulations>/ettests_1proc/output-0000/TEST/sim/summary.log <machine>__1_<N>.log | 
| − |  ##@ node_usage = not_shared
 | + |   scp machine:<simulations>/ettests_2proc/output-0000/TEST/sim/summary.log <machine>__2_<N/2>.log | 
| − |  #@ node_usage = shared
 | + |   git add <machine>*.log | 
| − |  #@ environment = COPY_ALL
 | + |   git commit "results: added updates for <machine>" | 
| − |  #@ tasks_per_node = 2
 | + |   git push | 
| − |  #@ node = 1
 |  | 
| − |  #@ wall_clock_limit = 0:30:00
 |  | 
| − |  ### uncomment below for a normal batch job
 |  | 
| − |  # #@ output= $(host).$(jobid).$(stepid).out
 |  | 
| − |  # #@ error = $(host).$(jobid).$(stepid).err
 |  | 
|  |  |  |  | 
| − |  #@ queue
 | + | To re-run the tests with an updated checkout, run the GetComponents command above with the --update flag, rebuild, delete the "ettests_*" simulations, and resubmit the simulations. | 
| − |  ## uncomment for a normal batch job
 |  | 
| − |  # $HOME/a.out
 |  | 
This page contains notes and instructions for people running the ET testsuites on various different machines.  If you have experience running testsuites on a machine which is not listed here, please consider adding some information which might help others (or yourself!) in future.  
General
To check out the ET, follow the instructions at http://einsteintoolkit.org/download/ under "Current development version".
Now configure simfactory:
 cd Cactus
 cp simfactory/etc/defs.local.ini.simple simfactory/etc/defs.local.ini
 nano simfactory/etc/defs.local.ini
Edit defs.local.ini and replace
 YOUR_LOGIN with your username
 YOUR@EMAIL.ADDRESS with your usual email address
 YOUR_ALLOCATION with your project allocation
 YOUR_THORNLIST with manifest/einsteintoolkit.th
See the machine-specific notes below for any additional steps for each machine.
 sim sync <machine>
 sim login <machine>
 sim build
 sim create-submit ettests_1proc --testsuite --procs <N> --num-threads <N> --walltime 3:00:00
 sim create-submit ettests_2proc --testsuite --procs <N> --num-threads <N/2> --walltime 3:00:00
Replace <N> and <N/2> with the number of cores on each node, and half of this, respectively, for the machine you are using.  Remember that "procs" here means "cores" and "num-threads" means "number of threads per process". The idea is to use a full node, i.e. all the cores, and then either one or two MPI processes.
When the jobs have finished, you should have the summary.log files in
 <simulations>/ettests_1proc/output-0000/TEST/sim/summary.log
 <simulations>/ettests_2proc/output-0000/TEST/sim/summary.log
Update the testsuite status page by adding the log files to the testsuite_results repository:
 git clone git@bitbucket.org:einsteintoolkit/testsuite_results
 cd testsuite_results/results
 scp machine:<simulations>/ettests_1proc/output-0000/TEST/sim/summary.log <machine>__1_<N>.log
 scp machine:<simulations>/ettests_2proc/output-0000/TEST/sim/summary.log <machine>__2_<N/2>.log
 git add <machine>*.log
 git commit "results: added updates for <machine>"
 git push
To re-run the tests with an updated checkout, run the GetComponents command above with the --update flag, rebuild, delete the "ettests_*" simulations, and resubmit the simulations.