diff --git a/src/FluidSimulation.cpp b/src/FluidSimulation.cpp
index fd2c974973de35a4b0576db73d057a46a553c63f..1701db343d3ab7066ea8773348878607f045eff7 100644
--- a/src/FluidSimulation.cpp
+++ b/src/FluidSimulation.cpp
@@ -42,7 +42,7 @@ namespace FluidSimulation {
         // Initialize boundary
         std::vector<float3> boundary_positions;
         std::vector<float3> boundary_velocities;
-        int boundaryLayers = 3;
+        int boundaryLayers = 1;
         float spacing = 0.5 * h_H; // Using 0.01 as per your code
 
         createBoundaryParticles(boundary_positions, boundary_velocities, boundaryLayers, spacing);
@@ -63,7 +63,7 @@ namespace FluidSimulation {
     }
 
     void FluidSimulation::updateSimulation() {
-        for (int i = 0; i < 4; ++i) {
+        for (int i = 0; i < 2; ++i) {
             FluidSimulationGPU::updateParticles(0.001);
         }
     }
diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu
index 704f2b7d3eb3e03ce94d9a5ce9fe28da0755ad0d..615b9bf309185baeb96de8645206bbe3869e0bcd 100644
--- a/src/cuda/pcisph_kernels.cu
+++ b/src/cuda/pcisph_kernels.cu
@@ -232,7 +232,7 @@ __global__ void computePressureForces(
                             fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS;
                         }
                     }
-                }
+                } 
                 */
             }
         }
diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu
index 4aad44d170f1055f50b2986c5caf79b72d0068c2..b3042718ac006189be0e8edec9eb0e1d787e5645 100644
--- a/src/cuda/pcisph_update.cu
+++ b/src/cuda/pcisph_update.cu
@@ -89,22 +89,8 @@ void updateParticles(float deltaTime)
     float totalSumGradW2 = thrust::reduce(dev_sumGradW2_ptr, dev_sumGradW2_ptr + NUM_PARTICLES, 0.0f, thrust::plus<float>());
 
     float r0 = 0.5f * h_H;          // Penetration threshold
-    float normalDamping = 0.5f;   // Damping factor (adjust as needed, e.g., 0.5f)
+    float normalDamping = 0.1f;   // Damping factor (adjust as needed, e.g., 0.5f)
 
-    // Launch the boundary correction kernel
-    correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>(
-        d_positions_pred,
-        d_velocities_pred,
-        d_boundary_positions,
-        d_cellIndices,
-        d_boundaryCellStart,
-        d_boundaryCellEnd,
-        NUM_PARTICLES,
-        gridSizeX, gridSizeY, gridSizeZ,
-        r0,
-        normalDamping
-    );
-    cudaDeviceSynchronize();
 
     // Compute A
     float m = h_PARTICLE_MASS;
@@ -116,6 +102,21 @@ void updateParticles(float deltaTime)
     const int maxIterations = 5;
 
     for (int iter = 0; iter < maxIterations; ++iter) {
+    // Launch the boundary correction kernel
+        correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>(
+            d_positions_pred,
+            d_velocities_pred,
+            d_boundary_positions,
+            d_cellIndices,
+            d_boundaryCellStart,
+            d_boundaryCellEnd,
+            NUM_PARTICLES,
+            gridSizeX, gridSizeY, gridSizeZ,
+            r0,
+            normalDamping
+        );
+        cudaDeviceSynchronize();
+
         computeDensityError<<<blocksPerGrid, threadsPerBlock>>>(
             d_positions_pred,
             d_densities,
@@ -161,6 +162,22 @@ void updateParticles(float deltaTime)
         cudaDeviceSynchronize();
     }
 
+    
+    // Launch the boundary correction kernel
+    correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>(
+        d_positions_pred,
+        d_velocities_pred,
+        d_boundary_positions,
+        d_cellIndices,
+        d_boundaryCellStart,
+        d_boundaryCellEnd,
+        NUM_PARTICLES,
+        gridSizeX, gridSizeY, gridSizeZ,
+        r0,
+        normalDamping
+    );
+    cudaDeviceSynchronize();
+
     // -------------------- Finalization --------------------
 
     cudaMemcpy(d_velocities, d_velocities_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice);
diff --git a/src/settings/constants.h b/src/settings/constants.h
index b96e1ec77dd818195053ed203dfa893035933845..5ea4afd18d43e36e5d9a761a1237c6fcbd6f9722 100644
--- a/src/settings/constants.h
+++ b/src/settings/constants.h
@@ -10,10 +10,10 @@ const float h_BOUND_Z_MIN = -4.0f;
 const float h_BOUND_Z_MAX = 4.0f;
 
 const float h_GAS_CONSTANT = 0.02f;
-const float h_VISCOSITY = 0.2f;
+const float h_VISCOSITY = 0.04f;
 
-const float h_PARTICLE_MASS = 0.4f;
-const float h_REST_DENSITY = 7500.0f;
+const float h_PARTICLE_MASS = 0.04f;
+const float h_REST_DENSITY = 800.0f;
 
 const float h_PI = 3.14159265358979323846f;
 const float h_H = 0.25f; // Smoothing radius
diff --git a/src/spawn_particles.cpp b/src/spawn_particles.cpp
index f5800c01ec76a257fb6a4356e277fa40735d020c..0499834886b206f90fb21e322913448bc02014c3 100644
--- a/src/spawn_particles.cpp
+++ b/src/spawn_particles.cpp
@@ -27,8 +27,8 @@ void initParticles(
             for (int k = 0; k < particlesPerAxis && index < NUM_PARTICLES; ++k) {
                 // Center particles around the origin
                 float x = (i - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING;
-                float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 4.0f;
-                float z = (k - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 2;
+                float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 2.0f;
+                float z = (k - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 0.5;
 
                 // Add random perturbation within [-0.1*dx, 0.1*dx]
                 float perturbation = 0.1f * GRID_SPACING;
@@ -55,8 +55,8 @@ void initParticles(
             for (int k = 0; k < particlesPerAxis && index < NUM_PARTICLES; ++k) {
                 // Center particles around the origin
                 float x = (i - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 9;
-                float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 4.0f;
-                float z = (k - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING - 2;
+                float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 2.0f;
+                float z = (k - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING - 0.5;
 
                 // Add random perturbation within [-0.1*dx, 0.1*dx]
                 float perturbation = 0.1f * GRID_SPACING;