diff --git a/src/cuda/include/pcisph_kernels.h b/src/cuda/include/pcisph_kernels.h
index 18c65f885237b033780e2869ec41d48806c9a210..07be87cb0ed361f5743354e6da751f6f44015ba3 100644
--- a/src/cuda/include/pcisph_kernels.h
+++ b/src/cuda/include/pcisph_kernels.h
@@ -16,6 +16,10 @@ __global__ void computeDensityError(
     unsigned int* cellIndices,
     unsigned int* cellStart,
     unsigned int* cellEnd,
+    // Boundary data
+    float3* boundaryPositions,
+    unsigned int* boundaryCellStart,
+    unsigned int* boundaryCellEnd,
     int numParticles,
     int gridSizeX, int gridSizeY, int gridSizeZ
 );
@@ -36,6 +40,10 @@ __global__ void computePressureForces(
     unsigned int* cellIndices,
     unsigned int* cellStart,
     unsigned int* cellEnd,
+    // Boundary data
+    float3* boundaryPositions,
+    unsigned int* boundaryCellStart,
+    unsigned int* boundaryCellEnd,
     int numParticles,
     int gridSizeX, int gridSizeY, int gridSizeZ
 );
diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu
index f2efd40677d2f06175404a1d6a7f80950a4733db..e42cda376bc9a29282087928e88d283418cc2ab2 100644
--- a/src/cuda/pcisph_kernels.cu
+++ b/src/cuda/pcisph_kernels.cu
@@ -32,76 +32,115 @@ __global__ void computeDensityError(
     unsigned int* cellIndices,
     unsigned int* cellStart,
     unsigned int* cellEnd,
+    // Boundary data
+    float3* boundaryPositions,
+    unsigned int* boundaryCellStart,
+    unsigned int* boundaryCellEnd,
     int numParticles,
     int gridSizeX, int gridSizeY, int gridSizeZ
 )
 {
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (idx < numParticles) {
-        float3 pos_i = positions[idx];
-        float density = 0.0f;
-        float sumGradW2_local = 0.0f;
-
-        unsigned int cellIndex = cellIndices[idx];
-
-        // Compute cell coordinates
-        int cellX = cellIndex % gridSizeX;
-        int cellY = (cellIndex / gridSizeX) % gridSizeY;
-        int cellZ = cellIndex / (gridSizeX * gridSizeY);
-
-        // Iterate over neighboring cells
-        for (int dz = -1; dz <= 1; ++dz) {
-            int nz = cellZ + dz;
-            if (nz < 0 || nz >= gridSizeZ) continue;
-            for (int dy = -1; dy <= 1; ++dy) {
-                int ny = cellY + dy;
-                if (ny < 0 || ny >= gridSizeY) continue;
-                for (int dx = -1; dx <= 1; ++dx) {
-                    int nx = cellX + dx;
-                    if (nx < 0 || nx >= gridSizeX) continue;
-
-                    unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY;
-
-                    unsigned int startIdx = cellStart[neighborCellIndex];
-                    unsigned int endIdx = cellEnd[neighborCellIndex];
-
-                    if (startIdx != 0xFFFFFFFF) {
-                        for (unsigned int j = startIdx; j < endIdx; ++j) {
-                            float3 pos_j = positions[j];
-
-                            float3 r = pos_i - pos_j;
-                            float r2 = dot(r, r);
-
-                            if (r2 < H2 && r2 > 0.0001f) {
+    if (idx >= numParticles) return;
+
+    float3 pos_i = positions[idx];
+    float density = 0.0f;
+    float sumGradW2_local = 0.0f;
+    
+    unsigned int cellIndex = cellIndices[idx];
+
+    int cellX = cellIndex % gridSizeX;
+    int cellY = (cellIndex / gridSizeX) % gridSizeY;
+    int cellZ = cellIndex / (gridSizeX * gridSizeY);
+
+    // ----------------------
+    // Fluid neighbors
+    // ----------------------
+    for (int dz = -1; dz <= 1; ++dz) {
+        int nz = cellZ + dz;
+        if (nz < 0 || nz >= gridSizeZ) continue;
+        for (int dy = -1; dy <= 1; ++dy) {
+            int ny = cellY + dy;
+            if (ny < 0 || ny >= gridSizeY) continue;
+            for (int dx = -1; dx <= 1; ++dx) {
+                int nx = cellX + dx;
+                if (nx < 0 || nx >= gridSizeX) continue;
+
+                unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY;
+                unsigned int startIdx = cellStart[neighborCellIndex];
+                unsigned int endIdx = cellEnd[neighborCellIndex];
+
+                if (startIdx != 0xFFFFFFFF) {
+                    for (unsigned int j = startIdx; j < endIdx; ++j) {
+                        float3 pos_j = positions[j];
+                        float3 r = pos_i - pos_j;
+                        float r2 = dot(r, r);
+
+                        if (r2 < H2 && r2 > 0.0001f) {
+                            float W = poly6Kernel(r2);
+                            density += PARTICLE_MASS * W;
+
+                            if (sum_gradW2 != nullptr) {
                                 float dist = sqrtf(r2);
+                                float gradW_coeff = spikyKernelGrad(dist);
+                                float3 gradW = gradW_coeff * (r / dist);
+                                sumGradW2_local += dot(gradW, gradW);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
 
-                                float W = poly6Kernel(r2);
-                                density += PARTICLE_MASS * W;
-
-                                if (sum_gradW2 != nullptr) {
-                                    // Compute gradW
-                                    float gradW_coeff = spikyKernelGrad(dist);
-                                    float3 gradW = gradW_coeff * (r / dist);
-
-                                    sumGradW2_local += dot(gradW, gradW);
-                                }
+    // ----------------------
+    // Boundary neighbors
+    // ----------------------
+    for (int dz = -1; dz <= 1; ++dz) {
+        int nz = cellZ + dz;
+        if (nz < 0 || nz >= gridSizeZ) continue;
+        for (int dy = -1; dy <= 1; ++dy) {
+            int ny = cellY + dy;
+            if (ny < 0 || ny >= gridSizeY) continue;
+            for (int dx = -1; dx <= 1; ++dx) {
+                int nx = cellX + dx;
+                if (nx < 0 || nx >= gridSizeX) continue;
+
+                unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY;
+                unsigned int b_startIdx = boundaryCellStart[neighborCellIndex];
+                unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex];
+
+                if (b_startIdx != 0xFFFFFFFF) {
+                    for (unsigned int j = b_startIdx; j < b_endIdx; ++j) {
+                        float3 pos_j = boundaryPositions[j];
+                        float3 r = pos_i - pos_j;
+                        float r2 = dot(r, r);
+                        if (r2 < H2 && r2 > 0.0001f) {
+                            float W = poly6Kernel(r2);
+                            density += PARTICLE_MASS * W; // Boundary treated similarly here
+
+                            if (sum_gradW2 != nullptr) {
+                                float dist = sqrtf(r2);
+                                float gradW_coeff = spikyKernelGrad(dist);
+                                float3 gradW = gradW_coeff * (r / dist);
+                                sumGradW2_local += dot(gradW, gradW);
                             }
                         }
                     }
                 }
             }
         }
+    }
 
-        densities[idx] = density;
-        density_errors[idx] = density - REST_DENSITY;
+    densities[idx] = density;
+    density_errors[idx] = density - REST_DENSITY;
 
-        if (sum_gradW2 != nullptr) {
-            sum_gradW2[idx] = sumGradW2_local;
-        }
+    if (sum_gradW2 != nullptr) {
+        sum_gradW2[idx] = sumGradW2_local;
     }
 }
 
+
 // Update pressures based on density errors
 __global__ void updatePressures(
     float* pressure_increments,
@@ -126,74 +165,104 @@ __global__ void computePressureForces(
     unsigned int* cellIndices,
     unsigned int* cellStart,
     unsigned int* cellEnd,
+    // Boundary data
+    float3* boundaryPositions,
+    unsigned int* boundaryCellStart,
+    unsigned int* boundaryCellEnd,
     int numParticles,
     int gridSizeX, int gridSizeY, int gridSizeZ
 )
 {
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (idx < numParticles) {
-        float3 pos_i = positions[idx];
-        float deltaP_i = pressure_increments[idx];
-
-        float3 fPressure = make_float3(0.0f, 0.0f, 0.0f);
-
-        unsigned int cellIndex = cellIndices[idx];
-
-        // Compute cell coordinates
-        int cellX = cellIndex % gridSizeX;
-        int cellY = (cellIndex / gridSizeX) % gridSizeY;
-        int cellZ = cellIndex / (gridSizeX * gridSizeY);
-
-        // Iterate over neighboring cells
-        for (int dz = -1; dz <= 1; ++dz) {
-            int nz = cellZ + dz;
-            if (nz < 0 || nz >= gridSizeZ) continue;
-            for (int dy = -1; dy <= 1; ++dy) {
-                int ny = cellY + dy;
-                if (ny < 0 || ny >= gridSizeY) continue;
-                for (int dx = -1; dx <= 1; ++dx) {
-                    int nx = cellX + dx;
-                    if (nx < 0 || nx >= gridSizeX) continue;
-
-                    unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY;
-
-                    unsigned int startIdx = cellStart[neighborCellIndex];
-                    unsigned int endIdx = cellEnd[neighborCellIndex];
-
-                    if (startIdx != 0xFFFFFFFF) {
-                        for (unsigned int j = startIdx; j < endIdx; ++j) {
-                            if (j == idx) continue;
-
-                            float3 pos_j = positions[j];
-                            float deltaP_j = pressure_increments[j];
-
-                            float3 r = pos_i - pos_j;
-
-                            float distSq = dot(r, r);
-
-                            if (distSq < H2 && distSq > 0.0001f) {
-                                float dist = sqrtf(distSq);
-
-                                // Compute gradient of kernel
-                                float gradW = spikyKernelGrad(dist);
-                                float3 gradWij = gradW * (r / dist);
-
-                                // Symmetric pressure force
-                                float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY);
-                                fPressure += factor * gradWij;
-                            }
+    if (idx >= numParticles) return;
+
+    float3 pos_i = positions[idx];
+    float deltaP_i = pressure_increments[idx];
+    float3 fPressure = make_float3(0.0f, 0.0f, 0.0f);
+
+    unsigned int cellIndex = cellIndices[idx];
+    int cellX = cellIndex % gridSizeX;
+    int cellY = (cellIndex / gridSizeX) % gridSizeY;
+    int cellZ = cellIndex / (gridSizeX * gridSizeY);
+
+    // ------------
+    // Fluid neighbors
+    // ------------
+    for (int dz = -1; dz <= 1; ++dz) {
+        int nz = cellZ + dz;
+        if (nz < 0 || nz >= gridSizeZ) continue;
+        for (int dy = -1; dy <= 1; ++dy) {
+            int ny = cellY + dy;
+            if (ny < 0 || ny >= gridSizeY) continue;
+            for (int dx = -1; dx <= 1; ++dx) {
+                int nx = cellX + dx;
+                if (nx < 0 || nx >= gridSizeX) continue;
+
+                unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY;
+                unsigned int startIdx = cellStart[neighborCellIndex];
+                unsigned int endIdx = cellEnd[neighborCellIndex];
+
+                if (startIdx != 0xFFFFFFFF) {
+                    for (unsigned int j = startIdx; j < endIdx; ++j) {
+                        if (j == idx) continue;
+                        float3 pos_j = positions[j];
+                        float deltaP_j = pressure_increments[j];
+
+                        float3 r = pos_i - pos_j;
+                        float distSq = dot(r, r);
+                        if (distSq < H2 && distSq > 0.0001f) {
+                            float dist = sqrtf(distSq);
+                            float gradW = spikyKernelGrad(dist);
+                            float3 gradWij = gradW * (r / dist);
+                            float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY);
+                            fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS;
                         }
                     }
                 }
             }
         }
+    }
 
-        // Multiply by constants
-        fPressure *= PARTICLE_MASS * PARTICLE_MASS;
-
-        pressure_forces[idx] = fPressure;
+    // ------------
+    // Boundary neighbors
+    // ------------
+    for (int dz = -1; dz <= 1; ++dz) {
+        int nz = cellZ + dz;
+        if (nz < 0 || nz >= gridSizeZ) continue;
+        for (int dy = -1; dy <= 1; ++dy) {
+            int ny = cellY + dy;
+            if (ny < 0 || ny >= gridSizeY) continue;
+            for (int dx = -1; dx <= 1; ++dx) {
+                int nx = cellX + dx;
+                if (nx < 0 || nx >= gridSizeX) continue;
+
+                unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY;
+                unsigned int b_startIdx = boundaryCellStart[neighborCellIndex];
+                unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex];
+
+                if (b_startIdx != 0xFFFFFFFF) {
+                    for (unsigned int j = b_startIdx; j < b_endIdx; ++j) {
+                        float3 pos_j = boundaryPositions[j];
+                        // Mirror pressure: p_b = p_i, and use REST_DENSITY for boundary
+                        float deltaP_j = deltaP_i; // Pressure mirroring
+
+                        float3 r = pos_i - pos_j;
+                        float distSq = dot(r, r);
+                        if (distSq < H2 && distSq > 0.0001f) {
+                            float dist = sqrtf(distSq);
+                            float gradW = spikyKernelGrad(dist);
+                            float3 gradWij = gradW * (r / dist);
+                            // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0
+                            float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY);
+                            fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS;
+                        }
+                    }
+                }
+            }
+        }
     }
+
+    pressure_forces[idx] = fPressure;
 }
 
 // Compute viscosity and external forces (excluding pressure forces)
diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu
index 05c163b43b3544496b5138ca7aebb6c9308c2a93..51171356a411e67f1e022de263f0b9a58119a0be 100644
--- a/src/cuda/pcisph_update.cu
+++ b/src/cuda/pcisph_update.cu
@@ -77,6 +77,9 @@ void updateParticles(float deltaTime)
         d_cellIndices,
         d_cellStart,
         d_cellEnd,
+        d_boundary_positions,
+        d_boundaryCellStart,
+        d_boundaryCellEnd,
         NUM_PARTICLES,
         gridSizeX, gridSizeY, gridSizeZ);
     cudaDeviceSynchronize();
@@ -103,6 +106,9 @@ void updateParticles(float deltaTime)
             d_cellIndices,
             d_cellStart,
             d_cellEnd,
+            d_boundary_positions,
+            d_boundaryCellStart,
+            d_boundaryCellEnd,
             NUM_PARTICLES,
             gridSizeX, gridSizeY, gridSizeZ);
         cudaDeviceSynchronize();
@@ -121,6 +127,9 @@ void updateParticles(float deltaTime)
             d_cellIndices,
             d_cellStart,
             d_cellEnd,
+            d_boundary_positions,
+            d_boundaryCellStart,
+            d_boundaryCellEnd,
             NUM_PARTICLES,
             gridSizeX, gridSizeY, gridSizeZ);
         cudaDeviceSynchronize();
@@ -141,12 +150,14 @@ void updateParticles(float deltaTime)
 
     // Enforce boundary conditions
     float damping = 0.75f;
+    
     enforceBoundary<<<blocksPerGrid, threadsPerBlock>>>(
         d_positions,
         d_velocities,
         NUM_PARTICLES,
         damping);
     cudaDeviceSynchronize();
+    
 
     // Unmap the VBO resource
     cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0);
diff --git a/src/settings/constants.h b/src/settings/constants.h
index 3faa9ca00980c22eb87f93a11ffcdc0e5f42c006..b96e1ec77dd818195053ed203dfa893035933845 100644
--- a/src/settings/constants.h
+++ b/src/settings/constants.h
@@ -13,10 +13,10 @@ const float h_GAS_CONSTANT = 0.02f;
 const float h_VISCOSITY = 0.2f;
 
 const float h_PARTICLE_MASS = 0.4f;
-const float h_REST_DENSITY = 3000.0f;
+const float h_REST_DENSITY = 7500.0f;
 
 const float h_PI = 3.14159265358979323846f;
-const float h_H = 0.35f; // Smoothing radius
+const float h_H = 0.25f; // Smoothing radius
 
 const int NUM_PARTICLES = 50000;