diff --git a/1_single_job_step/README.md b/1_single_job_step/README.md index 8261b611f7b8198eff6b0e01c28589914633506d..117447e295974afb0dee83e52d6ee1adafcd92b2 100644 --- a/1_single_job_step/README.md +++ b/1_single_job_step/README.md @@ -35,7 +35,7 @@ Python needs to be loaded into the environment in order to be accessible this is The single job step is allocated and performed with the __srun__ command. #### The python script -The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file. +The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file. The python script can be found in the _[code](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/code)_ folder. - The environment variable __JOB_ID__ can be used to create temporary files and folders. - The environment variable __SLURM_CPUS_PER_TASK__ is used to restrict the worker pool to the allocated number of cpus when running in parallel. diff --git a/1_single_job_step/single_job_step_parallel.sh b/1_single_job_step/single_job_step_parallel.sh index 70be4b091ac463c4f7c600504b9551f7669ad16a..1ebf18993af067f462e7a34fba769ce08f01142c 100644 --- a/1_single_job_step/single_job_step_parallel.sh +++ b/1_single_job_step/single_job_step_parallel.sh @@ -10,4 +10,4 @@ module load python/anaconda3-2024.02-3.11.7 # Start job stage -srun python parallel_task.py ../data/data_1.txt output_paralell.csv \ No newline at end of file +srun python ../code/parallel_task.py ../data/data_1.txt output_paralell.csv \ No newline at end of file diff --git a/1_single_job_step/single_job_step_sequential.sh b/1_single_job_step/single_job_step_sequential.sh index 133033dc9c2d86201ef074f766e4212d81fbe53e..376cf5299ff6f2183dcc84b13ee2791dff3b23bd 100644 --- a/1_single_job_step/single_job_step_sequential.sh +++ b/1_single_job_step/single_job_step_sequential.sh @@ -10,4 +10,4 @@ module load python/anaconda3-2024.02-3.11.7 # Start job stage -srun python sequential_task.py ../data/data_1.txt output_output_sequential.csv \ No newline at end of file +srun python ../code/sequential_task.py ../data/data_1.txt output_output_sequential.csv \ No newline at end of file diff --git a/2_multiple_job_steps/README.md b/2_multiple_job_steps/README.md index e0a9cb393d4a8b1c8536dfbfb91a3de3cf7f68b0..e8fcb279422c14479ca3018077adcd5956d9b51f 100644 --- a/2_multiple_job_steps/README.md +++ b/2_multiple_job_steps/README.md @@ -39,7 +39,7 @@ The job steps is allocated and performed with the __srun__ commands. _In this example only the computational step needs multiple CPU's therefore the srun for all job steps except for step 3 are set to use 1 CPU per task._ #### The python script -The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file. +The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file. The python script can be found in the _[code](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/code)_ folder. - The environment variable __JOB_ID__ can be used to create temporary files and folders. - The environment variable __SLURM_CPUS_PER_TASK__ is used to restrict the worker pool to the allocated number of cpus when running in parallel. diff --git a/2_multiple_job_steps/multiple_job_steps.sh b/2_multiple_job_steps/multiple_job_steps.sh index ecc37a1796092a9b603e54d196825160029a6121..fa10b1c1a78c65acd4ae3b69c51a8534409540bc 100644 --- a/2_multiple_job_steps/multiple_job_steps.sh +++ b/2_multiple_job_steps/multiple_job_steps.sh @@ -21,7 +21,7 @@ srun --cpus-per-task=1 mkdir -v -p ${working_folder} srun --cpus-per-task=1 cp -v ${PWD}/../data/${file} ${working_folder} # Step 3 - Start job stage -srun python parallel_task.py ${working_folder}/${file} ${working_folder}/output.csv +srun python ../code/parallel_task.py ${working_folder}/${file} ${working_folder}/output.csv # Step 4 - Compress data all csv files. srun --cpus-per-task=1 tar -czvf ${working_folder}/output.tar.gz -C ${working_folder} $(cd ${working_folder} && ls *.csv) diff --git a/2_multiple_job_steps/parallel_task.py b/2_multiple_job_steps/parallel_task.py deleted file mode 100644 index 46984d841b63a4f35c0ad53f5a336efb9b696fdb..0000000000000000000000000000000000000000 --- a/2_multiple_job_steps/parallel_task.py +++ /dev/null @@ -1,68 +0,0 @@ -from datetime import datetime -from multiprocessing import Pool - -import json -import logging -import os -import sys -import time - -logger = logging.getLogger(__name__) - -def sleep(input) -> int: - time.sleep(input[1]) - logger.info('Task %d done.',input[0]) - - return input[1] - -def main(input_file: str, output_file: str): - # Read environment variables. - JOB_NAME = os.environ.get('SLURM_JOB_NAME','Unknown') - JOB_ID = os.environ.get('SLURM_JOB_ID','Unknown') - NUMBER_OF_CPUS = os.environ.get('SLURM_CPUS_PER_TASK','Unknown') - if NUMBER_OF_CPUS in 'Unknown': - logger.error('Unkown number of CPU''s, exiting.') - return - - NUMBER_OF_CPUS = int(NUMBER_OF_CPUS) - logger.info('**** Output for job %s (%s) ****', JOB_NAME, JOB_ID) - logger.info('Running program with %d CPU''s.',NUMBER_OF_CPUS) - - # Reading configuration file and create a list of tasks - # This represents the reading of parameters and calculations - logger.info('Reading configuration from %s.',input_file) - with open(input_file, 'r') as file: - data = json.load(file) - - tasks = [] - total_time = 0 - for i in range(len(data['sleep'])): - time = data['sleep'][i] - tasks.append((i, time)) - total_time = total_time + time - - # Creating a multiprocessing pool to perform the tasks - pool = Pool(processes=NUMBER_OF_CPUS) - - # Running submitting the tasks to the worker pool - tic = datetime.now() - logger.info('Submitting tasks to pool.') - results = pool.map(sleep, tasks) - toc = datetime.now() - - logger.info('All tasks are done, took %d seconds, compared to %d seconds with single thread.', - (toc-tic).seconds, total_time) - - logger.info('Writing result to %s', output_file) - with open(output_file, 'w') as file: - file.write('time\n') - for result in results: - file.write('{}\n'.format(result)) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - input_file = sys.argv[1] - output_file = sys.argv[2] - main(input_file, output_file) - sys.exit(0) diff --git a/3_job_array/README.md b/3_job_array/README.md index d1ae2b4f041c105e848111d0d6a46c96d0a975a7..4ebc688dfb76b5bdc584699f1981018f9ccfa719 100644 --- a/3_job_array/README.md +++ b/3_job_array/README.md @@ -37,7 +37,7 @@ The _config.txt_ is a text file containing a simple table, the first column cont For simpler applications the data files could be ignored and the _config.txt_ contains all relevant data. #### The python script -The python script represents the task to be done. In this case the task is to wait a time based on the input data file and print the waiting is done. +The python script represents the task to be done. In this case the task is to wait a time based on the input data file and print the waiting is done. The python script can be found in the _[code](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/code)_ folder. The environment variable __SLURM_CPUS_PER_TASK__ is used to restrict the worker pool to the allocated number of cores. diff --git a/3_job_array/job_array.sh b/3_job_array/job_array.sh index 11353b11eab8b12197204f00e078cace5d4b7a48..e8732bc8fb4be466a62d0b99ef145ae5958cea2d 100644 --- a/3_job_array/job_array.sh +++ b/3_job_array/job_array.sh @@ -17,4 +17,4 @@ config=config.txt file=$(awk -v task=$SLURM_ARRAY_TASK_ID '$1==task {print $2}' $config) # Start job stage -srun python job_array_task.py ${file} output_${SLURM_ARRAY_TASK_ID}.csv \ No newline at end of file +srun python ../code/parallel_task.py ${file} output_${SLURM_ARRAY_TASK_ID}.csv \ No newline at end of file diff --git a/3_job_array/job_array_task.py b/3_job_array/job_array_task.py deleted file mode 100644 index b80836931cfe2d7baacdd64687ca4e984f61b075..0000000000000000000000000000000000000000 --- a/3_job_array/job_array_task.py +++ /dev/null @@ -1,67 +0,0 @@ -from datetime import datetime -from multiprocessing import Pool - -import json -import logging -import os -import sys -import time - -logger = logging.getLogger(__name__) - -def sleep(input) -> int: - time.sleep(input[1]) - logger.info('Task %d done.',input[0]) - - return input[1] - -def main(input_file: str, output_file: str): - # Read environment variables. - JOB_NAME = os.environ.get('SLURM_JOB_NAME','Unknown') - JOB_ID = os.environ.get('SLURM_JOB_ID','Unknown') - NUMBER_OF_CPUS = os.environ.get('SLURM_CPUS_PER_TASK','Unknown') - if NUMBER_OF_CPUS in 'Unknown': - logger.error('Unkown number of cpu''s, exiting.') - return - - NUMBER_OF_CPUS = int(NUMBER_OF_CPUS) - logger.info('**** Output for job %s (%s) ****', JOB_NAME, JOB_ID) - logger.info('Running program with %d cpu''s.',NUMBER_OF_CPUS) - - # Reading configuration file and create a list of tasks - # This represents the reading of parameters and calculations - logger.info('Reading configuration from %s.',input_file) - with open(input_file, 'r') as file: - data = json.load(file) - - tasks = [] - total_time = 0 - for i in range(len(data['sleep'])): - time = data['sleep'][i] - tasks.append((i, time)) - total_time = total_time + time - - # Creating a multiprocessing pool to perform the tasks - pool = Pool(processes=NUMBER_OF_CPUS) - - # Running submitting the tasks to the worker pool - tic = datetime.now() - logger.info('Submitting tasks to pool.') - results = pool.map(sleep, tasks) - toc = datetime.now() - - logger.info('All tasks are done, took %d seconds, compared to %d seconds with single thread.', - (toc-tic).seconds, total_time) - - logger.info('Writing result to %s', output_file) - with open(output_file, 'w') as file: - file.write('time\n') - for result in results: - file.write('{}\n'.format(result)) - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - input_file = sys.argv[1] - output_file = sys.argv[2] - main(input_file, output_file) - sys.exit(0) diff --git a/1_single_job_step/parallel_task.py b/code/parallel_task.py similarity index 100% rename from 1_single_job_step/parallel_task.py rename to code/parallel_task.py diff --git a/1_single_job_step/sequential_task.py b/code/sequential_task.py similarity index 100% rename from 1_single_job_step/sequential_task.py rename to code/sequential_task.py