From febe814b78e422170be37ee93ceb0e41aaf8ccca Mon Sep 17 00:00:00 2001 From: Rasmus Ringdahl <rasmus.ringdahl@liu.se> Date: Wed, 22 Jan 2025 09:31:57 +0100 Subject: [PATCH 1/3] docs: changing links and uniforming style --- 1_single_core_job/README.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/1_single_core_job/README.md b/1_single_core_job/README.md index 0f92e67..acda651 100644 --- a/1_single_core_job/README.md +++ b/1_single_core_job/README.md @@ -31,4 +31,4 @@ The job step with the single task is allocated and performed with the __srun__ c #### The python script The python script represents the taskt to be done. In this case the task is to print out some environment variables that are set by Slurm. -The environment variable __JOB_ID__ can be used to create temporary files and folders. In this example it creates a file named <JOB_ID>.txt and writes the job name into it. \ No newline at end of file +The environment variable __JOB_ID__ can be used to create temporary files and folders. In this example it creates a file named _<JOB_ID>_.txt and writes the job name into it. \ No newline at end of file diff --git a/README.md b/README.md index 22c72cf..24c88c7 100644 --- a/README.md +++ b/README.md @@ -20,4 +20,4 @@ A single core job is a job with only a single thread. This type of job is used w A simple example could be a data parser that reads a file and transforms it into a more suitable format. -Learn more about the [example](https://gitlab.liu.se/rasri17/lundgren-examples/-/blob/main/1_single_core_job/README.md). +Learn more about the [example](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/1_single_core_job). -- GitLab From c89d2ecd18d59d0de16942cb0818ab10db58abbc Mon Sep 17 00:00:00 2001 From: Rasmus Ringdahl <rasmus.ringdahl@liu.se> Date: Wed, 22 Jan 2025 14:17:31 +0100 Subject: [PATCH 2/3] refactor: change to logging framework --- 1_single_core_job/single_core_task.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/1_single_core_job/single_core_task.py b/1_single_core_job/single_core_task.py index eb3087e..9e8e26d 100644 --- a/1_single_core_job/single_core_task.py +++ b/1_single_core_job/single_core_task.py @@ -1,6 +1,10 @@ from datetime import datetime -import time + +import logging import os +import time + +logger = logging.getLogger(__name__) def main(): # Read environment variables. @@ -13,12 +17,12 @@ def main(): # This represents the calculations current_time = datetime.now() sleep_time = 60 - current_time.second - print('{} - Sleeping for {} seconds.'.format(current_time.strftime('%Y-%m-%d %H:%M:%S'), sleep_time)) + logger.info('%s - Sleeping for %d seconds.',current_time.strftime('%Y-%m-%d %H:%M:%S'), sleep_time) time.sleep(sleep_time) # Printing some things to standard output. - print('\nJob ID:\t\t\t{name}\nJob name:\t\t{id}\nAllocated cores:\t{cores}\nAllocated memory:\t{mem}'.format( - id=JOB_ID, name=JOB_NAME, cores=NUMBER_OF_CORES,mem=MAXIMUM_MEMORY)) + logger.info('\nJob ID:\t\t\t%s\nJob name:\t\t%s\nAllocated cores:\t%s\nAllocated memory:\t%s', + JOB_ID, JOB_NAME, NUMBER_OF_CORES,MAXIMUM_MEMORY) # Writing some output to a file based on the Slurm job id. output_file = '{}.txt'.format(JOB_ID) @@ -26,7 +30,8 @@ def main(): file.write('This file was created by the job {} with id {}\n'.format (JOB_NAME, JOB_ID)) - print('\nJob completed.') + logger.info('Job completed.') if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) main() -- GitLab From 29e86c083f5c292cced662fc5d54a4678af46a28 Mon Sep 17 00:00:00 2001 From: Rasmus Ringdahl <rasmus.ringdahl@liu.se> Date: Wed, 22 Jan 2025 14:19:09 +0100 Subject: [PATCH 3/3] feat: add multi core job example Closes #4 --- 2_multi_core_job/README.md | 44 ++++++++++++++++++++++++++ 2_multi_core_job/multi_core_job.sh | 13 ++++++++ 2_multi_core_job/multi_core_task.py | 49 +++++++++++++++++++++++++++++ README.md | 5 +++ 4 files changed, 111 insertions(+) create mode 100644 2_multi_core_job/README.md create mode 100644 2_multi_core_job/multi_core_job.sh create mode 100644 2_multi_core_job/multi_core_task.py diff --git a/2_multi_core_job/README.md b/2_multi_core_job/README.md new file mode 100644 index 0000000..e89274c --- /dev/null +++ b/2_multi_core_job/README.md @@ -0,0 +1,44 @@ +# Multi core jobs +A multi core job is a job that splits the computation to multiple cores. This type of job is the most suitable and most common ones to run on Lundgren. This includes optimization problems and heavy computations. + +## How to run +To run the example do the following steps: +1. Log in to Lundgren +2. Change directory to the example code +3. Run `sbatch multi_core_job.sh` +4. Check queue status by running `squeue` +5. When the job is completed check the file _multi_core_job.log_ + +Try changing the number of cpus in _multi_core_job.sh_ and see the changes in processing time. + +## Detailed description of the example +The batch script is the main file for the job allocation and preparation. Inside the python script a few environmental variables are fetched and printed out. + +### The batch script +The batch script, multi_core_job.sh_, contains three sections. The first section contains input arguments to the Slurm scheduler. The second section loads Python into environment so it is accessible and lastly the a job step is performed. + +The input arguments are defined with a comment beginning with SBATCH followed by the argument key and value. For easier readablility the -- method is used. + +- __job-name:__ The name of the job is set to demo_multi_core +- __time:__ The requeted time is set to 5 minutes, _00:05:00_ +- __ntasks:__ The number of tasks to be performed in this job is set to _1_. +- __cpus-per-task:__ The requested number of cores per task is set to _2_ +- __mem:__ The requested memory is set to _50 MB_ +- __output:__ The standard output should be sent to the file multi_core_job.log_ + +Python needs to be loaded into the environment in order to be accessible this is done in the next step with the __module__ command. + +The job step with the single task is allocated and performed with the __srun__ command. + +#### The python script +The python script represents the taskt to be done. In this case the task is to wait a random time and print the waiting is done. + +The environment variable __SLURM_CPUS_PER_TASK__ is used to restrict the worker pool to the allocated number of cores. + +### How to set the number of cores in different programing languages and softwares +Most programming languages and softwares tries to make use of all cores that are available. This can lead to an oversubscription on the resources. On a shared resource one must match the maximum used resources with the allocated ones. This section gives a reference in how to do it in commonly used softwares. + +- __CPLEX:__ Use the parameter _global thread count_. Read more in the [documentation](https://www.ibm.com/docs/en/icos/22.1.2?topic=parameters-global-thread-count) +- __Gurobi:__ Use the configuration parameter _ThreadLimit_. Read more in the [documentation](https://docs.gurobi.com/projects/optimizer/en/current/reference/parameters.html#threadlimit) +- __MATLAB:__ Create a instance of the parpool object with the _poolsize_ set to the number of cores and use the pool when running in parallell. Read more in the [documentation](https://se.mathworks.com/help/parallel-computing/parpool.html) +- __Python:__ If the multiprocessing package is used, create an instance of the pool class with the _processes_ set to the number of cores and use the pool when running in parallell. Read more in the [documentation](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool) diff --git a/2_multi_core_job/multi_core_job.sh b/2_multi_core_job/multi_core_job.sh new file mode 100644 index 0000000..139ff3a --- /dev/null +++ b/2_multi_core_job/multi_core_job.sh @@ -0,0 +1,13 @@ +#! /bin/bash +#SBATCH --job-name=demo_multi_core +#SBATCH --time=00:05:00 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem-per-cpu=50MB +#SBATCH --output=multi_core_job.log + +# Loading Python into the environment +module load python/anaconda3-2024.02-3.11.7 + +# Start job stage +srun python multi_core_task.py \ No newline at end of file diff --git a/2_multi_core_job/multi_core_task.py b/2_multi_core_job/multi_core_task.py new file mode 100644 index 0000000..1879628 --- /dev/null +++ b/2_multi_core_job/multi_core_task.py @@ -0,0 +1,49 @@ +from datetime import datetime +from multiprocessing import Pool +import logging +import os +import random +import time + +logger = logging.getLogger(__name__) + +def sleep(input): + time.sleep(input[1]) + logger.info('Task %d done.',input[0]) + +def main(): + # Read environment variables. + NUMBER_OF_CORES = os.environ.get('SLURM_CPUS_PER_TASK','Unknown') + if NUMBER_OF_CORES in 'Unknown': + logger.error('Unkown number of cores, exiting.') + return + + NUMBER_OF_CORES = int(NUMBER_OF_CORES) + logger.info('Running program with %d cores.',NUMBER_OF_CORES) + + # Creating a list of tasks to be performed + # This represents the calculations + random.seed(1) + tasks = [] + total_time = 0 + for i in range(10): + time = random.randrange(1,29) + tasks.append((i, time)) + total_time = total_time + time + + # Creating a multiprocessing pool to perform the tasks + pool = Pool(processes=NUMBER_OF_CORES) + + # Running submitting the tasks to the worker pool + tic = datetime.now() + logger.info('Submitting tasks to pool.') + pool.map(sleep, tasks) + toc = datetime.now() + + logger.info('All tasks are done, took %d seconds, compared to %d seconds with single thread.', + (toc-tic).seconds, total_time) + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + main() diff --git a/README.md b/README.md index 24c88c7..5e8d57d 100644 --- a/README.md +++ b/README.md @@ -21,3 +21,8 @@ A single core job is a job with only a single thread. This type of job is used w A simple example could be a data parser that reads a file and transforms it into a more suitable format. Learn more about the [example](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/1_single_core_job). + +#### Example 2 - Mutli core job +A multi core job is a job that splits the computation to multiple cores. This type of job is the most suitable and most common ones to run on Lundgren. This includes optimization problems and heavy computations. + +Learn more about the [example](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/2_multi_core_job). \ No newline at end of file -- GitLab