The first example runs a stress test on a compute node:
#!/bin/bash
#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# Slurm Construction Section
#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# job name
#SBATCH --job-name=job-1
# partition (queue) declaration
#SBATCH --partition=dept_24
# number of requested nodes
#SBATCH --nodes=1
# number of tasks
#SBATCH --ntasks=1
# number of requested cores
#SBATCH --ntasks-per-node=24
# call a Slurm Feature
# #SBATCH --constraint=4C
# ask a specific node to run the job
# #SBATCH --nodelist=n096
# requested runtime
# #SBATCH --time=00:05:00
#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# User Construction Section
#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# current (working) directory
work_dir=$(pwd)
# username
user=$(whoami)
# directory name where job will be run (on compute node)
job_dir="${user}_${SLURM_JOB_ID}.dcb.private.net"
# creating directory on /scr folder of compute node
mkdir /scr/$job_dir
# change to the newly created directory
cd /scr/$job_dir
# copy the submit file (and all other related files/directories)
rsync -a ${work_dir}/* .
# put date and time of starting job in a file
date > date.txt
# put hostname of compute node in a file
hostname > hostname.txt
# copy files on exit or interrupt
trap "echo 'copying files'; rsync -avz *.log *.txt ${work_dir}" EXIT
# runs stress-ng (to put stress on node) for 120 seconds
stress-ng --cpu $SLURM_TASKS_PER_NODE --timeout 120s --metrics-brief > stress-ng.log
# append date and time of finished job in a file
date >> date.txt
The second example demonstrates how to run an array of jobs:
#!/bin/bash
#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# Slurm Construction Section
#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# job name
#SBATCH --job-name=job-1
# partition (queue) declaration
#SBATCH --partition=dept_24
# number of requested nodes
#SBATCH --nodes=1
# requested array dimension
#SBATCH --array=1-4
# number of tasks
#SBATCH --ntasks=1
# number of requested cores
#SBATCH --ntasks-per-node=2
#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# User Construction Section
#::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# current (working) direcotry
work_dir=$(pwd)
# username
user=$(whoami)
# directory name where job will be run (on compute node)
job_dir="${user}_${SLURM_JOB_ID}.dcb.private.net"
# create directory on /scr folder of compute node
mkdir /scr/$job_dir
# change to the newly created directory
cd /scr/$job_dir
# copy the submit file (and all other related files/directories)
rsync -a ${work_dir}/* .
# put date and time of starting job in a file
date > date-$SLURM_ARRAY_TASK_ID.txt
# copy files on exit or interrupt
trap "echo 'copying files'; rsync -avz *.log *.txt ${work_dir}" EXIT
# runs stress-ng (to put stress on node) for 120 seconds
stress-ng --cpu $SLURM_TASKS_PER_NODE --timeout 60s --metrics-brief > stress-ng-$SLURM_ARRAY_TASK_ID.log $SLURM_ARRAY_TASK_ID
# put hostname of compute node in a file
hostname > hostname-$SLURM_ARRAY_TASK_ID.txt
# append date and time of finished job in a file
date >> date-$SLURM_ARRAY_TASK_ID.txt