diff --git a/src/cuda/include/pcisph_kernels.h b/src/cuda/include/pcisph_kernels.h
index f629f97d9f25f631220a5a9c3241874b7f52a8aa..98b22b2669ce12135e10d35a248d020f4a55d3c5 100644
--- a/src/cuda/include/pcisph_kernels.h
+++ b/src/cuda/include/pcisph_kernels.h
@@ -12,7 +12,6 @@ __global__ void computeDensityError(
     float3* positions,
     float* densities,
     float* density_errors,
-    float* sum_gradW2,
     unsigned int* cellIndices,
     unsigned int* cellStart,
     unsigned int* cellEnd,
@@ -28,7 +27,6 @@ __global__ void computeDensityError(
 __global__ void updatePressures(
     float* pressure_increments,
     float* density_errors,
-    float A,
     int numParticles
 );
 
diff --git a/src/cuda/init_cuda.cu b/src/cuda/init_cuda.cu
index fa7bc1a959c526a80be7b39aa22b367bec678c1d..307f00e4db4fa2e4d9f5af8a8ab18fa7717877cc 100644
--- a/src/cuda/init_cuda.cu
+++ b/src/cuda/init_cuda.cu
@@ -178,7 +178,7 @@ void initializeConstants()
         h_PARTICLE_MASS,
         h_REST_DENSITY,
         h_H,
-        h_H/96.0f,
+        h_H/2.0f,
         h_SPIKY_GRAD_CONST
     );
 }
diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu
index d5c995e5071b86513d6dc15a9b203d74d8ca19bb..e5693df29f8ab1122547a39de2e412e81742c6c1 100644
--- a/src/cuda/pcisph_kernels.cu
+++ b/src/cuda/pcisph_kernels.cu
@@ -28,7 +28,6 @@ __global__ void computeDensityError(
     float3* positions,
     float* densities,
     float* density_errors,
-    float* sum_gradW2,
     unsigned int* cellIndices,
     unsigned int* cellStart,
     unsigned int* cellEnd,
@@ -79,13 +78,6 @@ __global__ void computeDensityError(
                         if (r2 < H2 && r2 > 0.0001f) {
                             float W = poly6Kernel(r2);
                             density += PARTICLE_MASS * W;
-
-                            if (sum_gradW2 != nullptr) {
-                                float dist = sqrtf(r2);
-                                float gradW_coeff = spikyKernelGrad(dist);
-                                float3 gradW = gradW_coeff * (r / dist);
-                                sumGradW2_local += dot(gradW, gradW);
-                            }
                         }
                     }
                 }
@@ -105,13 +97,6 @@ __global__ void computeDensityError(
                         if (r2 < H2 && r2 > 0.0001f) {
                             float W = poly6Kernel(r2);
                             density += PARTICLE_MASS * W; // Boundary treated similarly here
-
-                            if (sum_gradW2 != nullptr) {
-                                float dist = sqrtf(r2);
-                                float gradW_coeff = spikyKernelGrad(dist);
-                                float3 gradW = gradW_coeff * (r / dist);
-                                sumGradW2_local += dot(gradW, gradW);
-                            }
                         }
                     }
                 }
@@ -121,10 +106,6 @@ __global__ void computeDensityError(
 
     densities[idx] = density;
     density_errors[idx] = density - REST_DENSITY;
-
-    if (sum_gradW2 != nullptr) {
-        sum_gradW2[idx] = sumGradW2_local;
-    }
 }
 
 
@@ -132,14 +113,13 @@ __global__ void computeDensityError(
 __global__ void updatePressures(
     float* pressure_increments,
     float* density_errors,
-    float A,
     int numParticles
 )
 {
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
     if (idx < numParticles) {
-        float deltaP = -density_errors[idx] / K_PCI;
+        float deltaP = -density_errors[idx] * K_PCI;
         pressure_increments[idx] = deltaP;
     }
 }
@@ -193,7 +173,8 @@ __global__ void computePressureForces(
                     for (unsigned int j = startIdx; j < endIdx; ++j) {
                         if (j == idx) continue;
                         float3 pos_j = positions[j];
-                        float deltaP_j = pressure_increments[j];
+                        //float deltaP_j = pressure_increments[j];
+                        float deltaP_j = deltaP_i;
 
                         float3 r = pos_i - pos_j;
                         float distSq = dot(r, r);
@@ -212,27 +193,27 @@ __global__ void computePressureForces(
                 // ------------
             
                 
-                // unsigned int b_startIdx = boundaryCellStart[neighborCellIndex];
-                // unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex];
-
-                // if (b_startIdx != 0xFFFFFFFF) {
-                //     for (unsigned int j = b_startIdx; j < b_endIdx; ++j) {
-                //         float3 pos_j = boundaryPositions[j];
-                //         // Mirror pressure: p_b = p_i, and use REST_DENSITY for boundary
-                //         float deltaP_j = deltaP_i; // Pressure mirroring
-
-                //         float3 r = pos_i - pos_j;
-                //         float distSq = dot(r, r);
-                //         if (distSq < H2 && distSq > 0.0001f) {
-                //             float dist = sqrtf(distSq);
-                //             float gradW = spikyKernelGrad(dist);
-                //             float3 gradWij = gradW * (r / dist);
-                //             // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0
-                //             float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY);
-                //             fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS;
-                //         }
-                //     }
-                // } 
+                unsigned int b_startIdx = boundaryCellStart[neighborCellIndex];
+                unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex];
+
+                if (b_startIdx != 0xFFFFFFFF) {
+                    for (unsigned int j = b_startIdx; j < b_endIdx; ++j) {
+                        float3 pos_j = boundaryPositions[j];
+                        // Mirror pressure: p_b = p_i, and use REST_DENSITY for boundary
+                        float deltaP_j = deltaP_i; // Pressure mirroring
+
+                        float3 r = pos_i - pos_j;
+                        float distSq = dot(r, r);
+                        if (distSq < H2 && distSq > 0.0001f) {
+                            float dist = sqrtf(distSq);
+                            float gradW = spikyKernelGrad(dist);
+                            float3 gradWij = gradW * (r / dist);
+                            // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0
+                            float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY);
+                            fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS;
+                        }
+                    }
+                } 
                 
             }
         }
diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu
index 4b296bea67dfe70e04d7a16b1c3f4992a0372f10..eea44b15370931defeb6a87a411f63afae1eba82 100644
--- a/src/cuda/pcisph_update.cu
+++ b/src/cuda/pcisph_update.cu
@@ -68,45 +68,31 @@ void updateParticles(float deltaTime)
     // Initialize pressure increments to zero
     cudaMemset(d_pressure_increments, 0, NUM_PARTICLES * sizeof(float));
 
-    // Compute densities and density errors, and per-particle sumGradW2
-    computeDensityError<<<blocksPerGrid, threadsPerBlock>>>(
-        d_positions_pred,
-        d_densities,
-        d_density_errors,
-        d_sum_gradW2,
-        d_cellIndices,
-        d_cellStart,
-        d_cellEnd,
-        d_boundary_positions,
-        d_boundaryCellStart,
-        d_boundaryCellEnd,
-        NUM_PARTICLES,
-        gridSizeX, gridSizeY, gridSizeZ);
-    cudaDeviceSynchronize();
-
-    // Reduce sum_gradW2 over all particles
-    thrust::device_ptr<float> dev_sumGradW2_ptr(d_sum_gradW2);
-    float totalSumGradW2 = thrust::reduce(dev_sumGradW2_ptr, dev_sumGradW2_ptr + NUM_PARTICLES, 0.0f, thrust::plus<float>());
-
     float r0 = 0.5f * h_H;          // Penetration threshold
     float epsilon = 1.0f; // Moderate to high friction
     float delta = 0.5f;   // Low to moderate elasticity
     float normalDamping = 0.5f; // Adjust as needed
 
-
-    // Compute A
-    float m = h_PARTICLE_MASS;
-    float rho0 = h_REST_DENSITY;
-    float A = 2.0f * timeStep * timeStep * m * m / (rho0 * rho0) * totalSumGradW2;
-
-    //printf("K_PCI: %f \n", A);
-
     // ------------ Iterative Pressure Correction Loop --------------------
 
     const int maxIterations = 3;
 
     for (int iter = 0; iter < maxIterations; ++iter) {
     // Launch the boundary correction kernel
+        computeDensityError<<<blocksPerGrid, threadsPerBlock>>>(
+            d_positions_pred,
+            d_densities,
+            d_density_errors,
+            d_cellIndices,
+            d_cellStart,
+            d_cellEnd,
+            d_boundary_positions,
+            d_boundaryCellStart,
+            d_boundaryCellEnd,
+            NUM_PARTICLES,
+            gridSizeX, gridSizeY, gridSizeZ);
+        cudaDeviceSynchronize();
+        
         correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>(
             d_positions_pred,
             d_velocities_pred,
@@ -128,7 +114,6 @@ void updateParticles(float deltaTime)
         updatePressures<<<blocksPerGrid, threadsPerBlock>>>(
             d_pressure_increments,
             d_density_errors,
-            A,
             NUM_PARTICLES);
         cudaDeviceSynchronize();
 
@@ -154,20 +139,6 @@ void updateParticles(float deltaTime)
             NUM_PARTICLES);
         cudaDeviceSynchronize();
         
-        computeDensityError<<<blocksPerGrid, threadsPerBlock>>>(
-            d_positions_pred,
-            d_densities,
-            d_density_errors,
-            nullptr,
-            d_cellIndices,
-            d_cellStart,
-            d_cellEnd,
-            d_boundary_positions,
-            d_boundaryCellStart,
-            d_boundaryCellEnd,
-            NUM_PARTICLES,
-            gridSizeX, gridSizeY, gridSizeZ);
-        cudaDeviceSynchronize();
     }