From 8a17e7132af903cb2df5401d95c74f40792d03fb Mon Sep 17 00:00:00 2001 From: Rasmus Ringdahl <rasmus.ringdahl@liu.se> Date: Fri, 21 Feb 2025 08:19:20 +0100 Subject: [PATCH 1/3] fix: set permissions on folder creation. Closes #7 --- 2_multiple_job_steps/multiple_job_steps.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/2_multiple_job_steps/multiple_job_steps.sh b/2_multiple_job_steps/multiple_job_steps.sh index fa10b1c..5cb6614 100644 --- a/2_multiple_job_steps/multiple_job_steps.sh +++ b/2_multiple_job_steps/multiple_job_steps.sh @@ -15,7 +15,7 @@ temporary_folder=/local/data1/${USER} working_folder=${temporary_folder}/${SLURM_JOB_ID} # Step 1 - Create a temporary folder to store data in. -srun --cpus-per-task=1 mkdir -v -p ${working_folder} +srun --cpus-per-task=1 mkdir -v -m 700 -p ${working_folder} # Step 2 - Copy indata to the temporary folder. srun --cpus-per-task=1 cp -v ${PWD}/../data/${file} ${working_folder} -- GitLab From 964e1eb705153ac604e6cce502bb108e16ac7d96 Mon Sep 17 00:00:00 2001 From: Rasmus Ringdahl <rasmus.ringdahl@liu.se> Date: Fri, 21 Feb 2025 10:43:53 +0100 Subject: [PATCH 2/3] refactor: reorginize folder and file access Closes #8 --- 2_multiple_job_steps/multiple_job_steps.sh | 58 ++++++++++++++++------ 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/2_multiple_job_steps/multiple_job_steps.sh b/2_multiple_job_steps/multiple_job_steps.sh index 5cb6614..1740757 100644 --- a/2_multiple_job_steps/multiple_job_steps.sh +++ b/2_multiple_job_steps/multiple_job_steps.sh @@ -9,28 +9,54 @@ # Loading Python into the environment module load python/anaconda3-2024.02-3.11.7 -# Specify_ input file -file=data_4.txt -temporary_folder=/local/data1/${USER} -working_folder=${temporary_folder}/${SLURM_JOB_ID} - -# Step 1 - Create a temporary folder to store data in. -srun --cpus-per-task=1 mkdir -v -m 700 -p ${working_folder} - -# Step 2 - Copy indata to the temporary folder. -srun --cpus-per-task=1 cp -v ${PWD}/../data/${file} ${working_folder} +# Defining folders on Lundgren's local storage. +lundgren_local_folder=/local/data1/${USER} +lundgren_working_folder=${lundgren_local_folder}/${SLURM_JOB_ID} + +# Setting location and filename of the data and output files on the home folder. +# ${PWD} = curent working directory. +# In this case: +# The home_data_folder points to the folder data that is one directory above the current one. +# The home_output_folder points to the current working directory. +# The data file to use is data_4.txt +home_data_folder=${PWD}/../data +home_output_folder=${PWD} +data_file=data_4.txt + +# Setting location and filename of the data and output files on Lundgren's local storage. +# The folders and files set here will be contained in the lundgren_working_folder. +# In this case: +# The lundgren_data_folder points to the folder data in lundgren_working_folder +# The lundgren_output_folder points to the folder output in lundgren_working_folder +# The output file is set to output.csv +lundgren_data_folder=data +lundgren_output_folder=output +output_file=output.csv + + +# Step 1a - Create temporary working folder. +srun --cpus-per-task=1 mkdir -v -m 700 -p ${lundgren_working_folder} + +# Step 1b - Create a temporary folder to store data in. +srun --cpus-per-task=1 mkdir -v -m 700 -p ${lundgren_working_folder}/${lundgren_data_folder} + +# Step 1c - Create a temporary folder to store output in. +srun --cpus-per-task=1 mkdir -v -m 700 -p ${lundgren_working_folder}/${lundgren_output_folder} + +# Step 2 - Copy data to the temporary folder. +srun --cpus-per-task=1 cp -v ${home_data_folder}/${data_file} $ ${lundgren_working_folder}/${lundgren_data_folder} # Step 3 - Start job stage -srun python ../code/parallel_task.py ${working_folder}/${file} ${working_folder}/output.csv +srun python ${PWD}/../code/parallel_task.py ${lundgren_working_folder}/${lundgren_data_folder}/${data_file} ${lundgren_working_folder}/${lundgren_output_folder}/${output_file} -# Step 4 - Compress data all csv files. -srun --cpus-per-task=1 tar -czvf ${working_folder}/output.tar.gz -C ${working_folder} $(cd ${working_folder} && ls *.csv) +# Step 4 - Compress data csv file. +srun --cpus-per-task=1 tar -czvf ${lundgren_working_folder}/${lundgren_output_folder}/output.tar.gz -C ${lundgren_working_folder}/${lundgren_output_folder} $(cd ${lundgren_working_folder}/${lundgren_output_folder} && ls ${output_file}) # Step 5 - Move output data to home folder -srun --cpus-per-task=1 mv -v ${working_folder}/output.tar.gz ${PWD} +srun --cpus-per-task=1 mv -v ${lundgren_working_folder}/${lundgren_output_folder}/output.tar.gz ${home_output_folder} # Step 6a - Remove temporary files. -srun --cpus-per-task=1 rm -rfv ${working_folder} +srun --cpus-per-task=1 rm -rfv ${lundgren_working_folder} # Step 6b - Clear folder -srun --cpus-per-task=1 test -n "$(ls -A "$temporary_folder")" || rmdir -v "$temporary_folder" \ No newline at end of file +srun --cpus-per-task=1 test -n "$(ls -A "$lundgren_local_folder")" || rmdir -v "$lundgren_local_folder" \ No newline at end of file -- GitLab From e6f30e14f1e0955b95495e17d71cdb3bcb330c98 Mon Sep 17 00:00:00 2001 From: Rasmus Ringdahl <rasmus.ringdahl@liu.se> Date: Fri, 21 Feb 2025 10:50:06 +0100 Subject: [PATCH 3/3] docs: clarifying folder creation --- 2_multiple_job_steps/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/2_multiple_job_steps/README.md b/2_multiple_job_steps/README.md index e8fcb27..81b972a 100644 --- a/2_multiple_job_steps/README.md +++ b/2_multiple_job_steps/README.md @@ -29,8 +29,8 @@ The input arguments are defined with a comment beginning with SBATCH followed by Python needs to be loaded into the environment in order to be accessible this is done in the next step with the __module__ command. The job steps is allocated and performed with the __srun__ commands. -1. A folder is created with the same name as the Job ID on the local hard drive in the data folder of Lundgren _/local/data1/<LiU-ID>_. -2. Input data files are copied to the newly created folder. +1. A folder is created with the same name as the Job ID on the local hard drive in the data folder of Lundgren _/local/data1/\<LiU-ID>_. A folder for data and output is also created. +2. Input data files are copied to the newly created folder for data. 3. The third step is the computational step of the job. 4. The output files are compressed. 5. The compressed output files are moved to the home folder. -- GitLab