Skip to content
Snippets Groups Projects
Commit ae274cb1 authored by Rasmus Ringdahl's avatar Rasmus Ringdahl
Browse files

refactor: move python scripts to code folder

parent b28f9db2
Branches
No related tags found
1 merge request!4Dev
......@@ -35,7 +35,7 @@ Python needs to be loaded into the environment in order to be accessible this is
The single job step is allocated and performed with the __srun__ command.
#### The python script
The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file.
The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file. The python script can be found in the _[code](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/code)_ folder.
- The environment variable __JOB_ID__ can be used to create temporary files and folders.
- The environment variable __SLURM_CPUS_PER_TASK__ is used to restrict the worker pool to the allocated number of cpus when running in parallel.
......
......@@ -10,4 +10,4 @@
module load python/anaconda3-2024.02-3.11.7
# Start job stage
srun python parallel_task.py ../data/data_1.txt output_paralell.csv
\ No newline at end of file
srun python ../code/parallel_task.py ../data/data_1.txt output_paralell.csv
\ No newline at end of file
......@@ -10,4 +10,4 @@
module load python/anaconda3-2024.02-3.11.7
# Start job stage
srun python sequential_task.py ../data/data_1.txt output_output_sequential.csv
\ No newline at end of file
srun python ../code/sequential_task.py ../data/data_1.txt output_output_sequential.csv
\ No newline at end of file
......@@ -39,7 +39,7 @@ The job steps is allocated and performed with the __srun__ commands.
_In this example only the computational step needs multiple CPU's therefore the srun for all job steps except for step 3 are set to use 1 CPU per task._
#### The python script
The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file.
The python script represents the taskt to be done. In this case the task is read an input file and wait to simulate a calculation and afterwards print to an output file. The python script can be found in the _[code](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/code)_ folder.
- The environment variable __JOB_ID__ can be used to create temporary files and folders.
- The environment variable __SLURM_CPUS_PER_TASK__ is used to restrict the worker pool to the allocated number of cpus when running in parallel.
......
......@@ -21,7 +21,7 @@ srun --cpus-per-task=1 mkdir -v -p ${working_folder}
srun --cpus-per-task=1 cp -v ${PWD}/../data/${file} ${working_folder}
# Step 3 - Start job stage
srun python parallel_task.py ${working_folder}/${file} ${working_folder}/output.csv
srun python ../code/parallel_task.py ${working_folder}/${file} ${working_folder}/output.csv
# Step 4 - Compress data all csv files.
srun --cpus-per-task=1 tar -czvf ${working_folder}/output.tar.gz -C ${working_folder} $(cd ${working_folder} && ls *.csv)
......
from datetime import datetime
from multiprocessing import Pool
import json
import logging
import os
import sys
import time
logger = logging.getLogger(__name__)
def sleep(input) -> int:
time.sleep(input[1])
logger.info('Task %d done.',input[0])
return input[1]
def main(input_file: str, output_file: str):
# Read environment variables.
JOB_NAME = os.environ.get('SLURM_JOB_NAME','Unknown')
JOB_ID = os.environ.get('SLURM_JOB_ID','Unknown')
NUMBER_OF_CPUS = os.environ.get('SLURM_CPUS_PER_TASK','Unknown')
if NUMBER_OF_CPUS in 'Unknown':
logger.error('Unkown number of CPU''s, exiting.')
return
NUMBER_OF_CPUS = int(NUMBER_OF_CPUS)
logger.info('**** Output for job %s (%s) ****', JOB_NAME, JOB_ID)
logger.info('Running program with %d CPU''s.',NUMBER_OF_CPUS)
# Reading configuration file and create a list of tasks
# This represents the reading of parameters and calculations
logger.info('Reading configuration from %s.',input_file)
with open(input_file, 'r') as file:
data = json.load(file)
tasks = []
total_time = 0
for i in range(len(data['sleep'])):
time = data['sleep'][i]
tasks.append((i, time))
total_time = total_time + time
# Creating a multiprocessing pool to perform the tasks
pool = Pool(processes=NUMBER_OF_CPUS)
# Running submitting the tasks to the worker pool
tic = datetime.now()
logger.info('Submitting tasks to pool.')
results = pool.map(sleep, tasks)
toc = datetime.now()
logger.info('All tasks are done, took %d seconds, compared to %d seconds with single thread.',
(toc-tic).seconds, total_time)
logger.info('Writing result to %s', output_file)
with open(output_file, 'w') as file:
file.write('time\n')
for result in results:
file.write('{}\n'.format(result))
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
input_file = sys.argv[1]
output_file = sys.argv[2]
main(input_file, output_file)
sys.exit(0)
......@@ -37,7 +37,7 @@ The _config.txt_ is a text file containing a simple table, the first column cont
For simpler applications the data files could be ignored and the _config.txt_ contains all relevant data.
#### The python script
The python script represents the task to be done. In this case the task is to wait a time based on the input data file and print the waiting is done.
The python script represents the task to be done. In this case the task is to wait a time based on the input data file and print the waiting is done. The python script can be found in the _[code](https://gitlab.liu.se/rasri17/lundgren-examples/-/tree/main/code)_ folder.
The environment variable __SLURM_CPUS_PER_TASK__ is used to restrict the worker pool to the allocated number of cores.
......
......@@ -17,4 +17,4 @@ config=config.txt
file=$(awk -v task=$SLURM_ARRAY_TASK_ID '$1==task {print $2}' $config)
# Start job stage
srun python job_array_task.py ${file} output_${SLURM_ARRAY_TASK_ID}.csv
\ No newline at end of file
srun python ../code/parallel_task.py ${file} output_${SLURM_ARRAY_TASK_ID}.csv
\ No newline at end of file
from datetime import datetime
from multiprocessing import Pool
import json
import logging
import os
import sys
import time
logger = logging.getLogger(__name__)
def sleep(input) -> int:
time.sleep(input[1])
logger.info('Task %d done.',input[0])
return input[1]
def main(input_file: str, output_file: str):
# Read environment variables.
JOB_NAME = os.environ.get('SLURM_JOB_NAME','Unknown')
JOB_ID = os.environ.get('SLURM_JOB_ID','Unknown')
NUMBER_OF_CPUS = os.environ.get('SLURM_CPUS_PER_TASK','Unknown')
if NUMBER_OF_CPUS in 'Unknown':
logger.error('Unkown number of cpu''s, exiting.')
return
NUMBER_OF_CPUS = int(NUMBER_OF_CPUS)
logger.info('**** Output for job %s (%s) ****', JOB_NAME, JOB_ID)
logger.info('Running program with %d cpu''s.',NUMBER_OF_CPUS)
# Reading configuration file and create a list of tasks
# This represents the reading of parameters and calculations
logger.info('Reading configuration from %s.',input_file)
with open(input_file, 'r') as file:
data = json.load(file)
tasks = []
total_time = 0
for i in range(len(data['sleep'])):
time = data['sleep'][i]
tasks.append((i, time))
total_time = total_time + time
# Creating a multiprocessing pool to perform the tasks
pool = Pool(processes=NUMBER_OF_CPUS)
# Running submitting the tasks to the worker pool
tic = datetime.now()
logger.info('Submitting tasks to pool.')
results = pool.map(sleep, tasks)
toc = datetime.now()
logger.info('All tasks are done, took %d seconds, compared to %d seconds with single thread.',
(toc-tic).seconds, total_time)
logger.info('Writing result to %s', output_file)
with open(output_file, 'w') as file:
file.write('time\n')
for result in results:
file.write('{}\n'.format(result))
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
input_file = sys.argv[1]
output_file = sys.argv[2]
main(input_file, output_file)
sys.exit(0)
File moved
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment