diff --git a/README.md b/README.md
index 8f9488c05795a63fd22114988fc082e278eebd9d..8238bd2e77bfa62ad6ea5604ceda9fe11a68c6f5 100644
--- a/README.md
+++ b/README.md
@@ -88,9 +88,9 @@ The input arguments are:
 2. Number of gpus per node,
 3. Number of iterations for each parameter setting.
 
-We will average the benchmark performance over the iterations. We use a batch size of 128 which is the maximum usable batch size without a OOM error.
+We will average the benchmark performance over the iterations. The maximum usable (without a OOM error) batch size is 256 and 128 for single and multi-node, respectively.
 ```
-bash scripts/benchmark_sbatch_submit.sh 1 8 100
+bash scripts/benchmark_sbatch_submit.sh 1 8 100 128
 ```
 
 ### Results  
diff --git a/scripts/benchmark_sbatch_submit.sh b/scripts/benchmark_sbatch_submit.sh
index 44ac79bc2fde0e8acea54650fdd764b79e621658..5ad230bcd04f9ca1accd39fc3716afd0e1d7a819 100644
--- a/scripts/benchmark_sbatch_submit.sh
+++ b/scripts/benchmark_sbatch_submit.sh
@@ -6,10 +6,11 @@ dim=2
 NUM_NODES=$1
 NUM_GPUS=$2
 NUM_ITERATIONS=$3
+BATCH_SIZE=$4
 if [ $NUM_NODES -eq 1 ]; then
     for nodes in {1..1}; do
         for gpus in {1..$NUM_GPUS}; do
-            for batch_size in 128; do
+            for batch_size in $BATCH_SIZE; do
                 for iteration in {1..$NUM_ITERATIONS}; do
 
                     echo dim ${dim}, nodes ${nodes}, gpus ${gpus}, batch_size ${batch_size}, iteration ${iteration}
@@ -25,7 +26,7 @@ if [ $NUM_NODES -eq 1 ]; then
 else
     for nodes in {2..$NUM_NODES}; do
         for gpus in {$NUM_GPUS}; do
-            for batch_size in 128; do
+            for batch_size in $BATCH_SIZE; do
                 for iteration in {1..$NUM_ITERATIONS}; do
 
                     # For multi node