diff --git a/src/FluidSimulation.cpp b/src/FluidSimulation.cpp
index 1701db343d3ab7066ea8773348878607f045eff7..3e1fc47ea53abffe9dca96155a7e66189c0e7b5f 100644
--- a/src/FluidSimulation.cpp
+++ b/src/FluidSimulation.cpp
@@ -41,15 +41,15 @@ namespace FluidSimulation {
 
         // Initialize boundary
         std::vector<float3> boundary_positions;
-        std::vector<float3> boundary_velocities;
+        std::vector<float3> boundary_normals;
         int boundaryLayers = 1;
         float spacing = 0.5 * h_H; // Using 0.01 as per your code
 
-        createBoundaryParticles(boundary_positions, boundary_velocities, boundaryLayers, spacing);
+        createBoundaryParticles(boundary_positions, boundary_normals, boundaryLayers, spacing);
         m_numBoundaryParticles = boundary_positions.size();
 
         // Initialize boundary device arrays
-        FluidSimulationGPU::initializeBoundryDeviceArrays(boundary_positions.data(), boundary_velocities.data(), m_numBoundaryParticles);
+        FluidSimulationGPU::initializeBoundryDeviceArrays(boundary_positions.data(), boundary_normals.data(), m_numBoundaryParticles);
 
         // Create boundary VBO and register with CUDA
         glGenBuffers(1, &m_vbo_boundary);
diff --git a/src/cuda/include/cuda_device_vars.h b/src/cuda/include/cuda_device_vars.h
index e0eada192f94a395ec8b53503a082e6bca262ce7..b4eeb12a45cc11c36756b79c20a1052105aa0972 100644
--- a/src/cuda/include/cuda_device_vars.h
+++ b/src/cuda/include/cuda_device_vars.h
@@ -57,7 +57,7 @@ extern float* d_sum_gradW2; // For computing A
 
 // Boundary particles
 extern float3* d_boundary_positions;
-extern float3* d_boundary_velocities;
+extern float3* d_boundary_normals;
 
 extern unsigned int* d_boundaryCellStart;
 extern unsigned int* d_boundaryCellEnd;
diff --git a/src/cuda/include/pcisph_kernels.h b/src/cuda/include/pcisph_kernels.h
index 6551eb89d4a76cb3705230fcb7532779ecde7e03..f629f97d9f25f631220a5a9c3241874b7f52a8aa 100644
--- a/src/cuda/include/pcisph_kernels.h
+++ b/src/cuda/include/pcisph_kernels.h
@@ -49,16 +49,19 @@ __global__ void computePressureForces(
 );
 
 __global__ void correctBoundaryPenetrations(
-    float3* positions_pred,
-    float3* velocities_pred,
-    const float3* boundary_positions,
-    unsigned int* cellIndices,
-    unsigned int* boundaryCellStart,
-    unsigned int* boundaryCellEnd,
-    int numParticles,
-    int gridSizeX, int gridSizeY, int gridSizeZ,
-    float r0,
-    float normalDamping
+    float3* positions_pred,            // Predicted fluid positions
+    float3* velocities_pred,           // Predicted fluid velocities
+    const float3* boundary_positions,   // Boundary particle positions
+    const float3* boundary_normals,     // Boundary particle normals
+    unsigned int* cellIndices,          // Cell indices for fluid particles
+    unsigned int* boundaryCellStart,    // Start indices for boundary particles per cell
+    unsigned int* boundaryCellEnd,      // End indices for boundary particles per cell
+    int numParticles,                   // Number of fluid particles
+    int gridSizeX, int gridSizeY, int gridSizeZ, // Grid dimensions
+    float r0,                           // Penetration threshold (e.g., 0.5 * h)
+    float normalDamping,                // Damping factor for velocity
+    float epsilon,                      // Friction coefficient (0 <= epsilon <= 1)
+    float delta                         // Elasticity coefficient (0 <= delta <= 1)
 );
 
 // Function to compute viscosity and external forces
diff --git a/src/cuda/init_cuda.cu b/src/cuda/init_cuda.cu
index 7cb10f4a4d13d2960bede1f7179c7701a12fbb3e..9c8630bbac0f26525ddedae68faf186dbaf5faca 100644
--- a/src/cuda/init_cuda.cu
+++ b/src/cuda/init_cuda.cu
@@ -62,7 +62,7 @@ float* d_sum_gradW2 = nullptr;
 
 // Boundary particles
 float3* d_boundary_positions = nullptr;
-float3* d_boundary_velocities = nullptr;
+float3* d_boundary_normals = nullptr;
 
 unsigned int* d_boundaryCellStart = nullptr;
 unsigned int* d_boundaryCellEnd = nullptr;
@@ -169,7 +169,7 @@ void initializeFluidDeviceArrays(
 
 void initializeBoundryDeviceArrays(
     const float3* h_boundaryPositions,
-    const float3* h_boundaryVelocities,
+    const float3* h_boundaryNormals, // Renamed parameter
     int numBoundaryParticles
 )
 {
@@ -179,11 +179,13 @@ void initializeBoundryDeviceArrays(
     int gridSizeZ = (int)ceil((h_BOUND_Z_MAX - h_BOUND_Z_MIN) / h_H);
     int maxCells = gridSizeX * gridSizeY * gridSizeZ;
 
-    // cudaMalloc(&d_boundary_positions, numBoundaryParticles * sizeof(float3));
-    cudaMalloc(&d_boundary_velocities, numBoundaryParticles * sizeof(float3));
-
+    // Allocate memory for boundary positions
+    //cudaMalloc(&d_boundary_positions, numBoundaryParticles * sizeof(float3));
     //cudaMemcpy(d_boundary_positions, h_boundaryPositions, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice);
-    cudaMemcpy(d_boundary_velocities, h_boundaryVelocities, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice);
+
+    // Allocate memory for boundary normals
+    cudaMalloc(&d_boundary_normals, numBoundaryParticles * sizeof(float3));
+    cudaMemcpy(d_boundary_normals, h_boundaryNormals, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice);
 
     cudaMalloc(&d_boundaryCellIndices, numBoundaryParticles * sizeof(unsigned int));
     cudaMalloc(&d_boundaryParticleIndices, numBoundaryParticles * sizeof(unsigned int));
@@ -207,18 +209,18 @@ void boundaryNBSearch(int numBoundaryParticles) {
     int blocksPerGrid = (numBoundaryParticles + threadsPerBlock - 1) / threadsPerBlock;
     
     float3* d_boundary_positions_sorted;
-    float3* d_boundary_velocities_sorted;
+    float3* d_boundary_normals_sorted;
     
 
     cudaMalloc(&d_boundary_positions_sorted, numBoundaryParticles * sizeof(float3));
-    cudaMalloc(&d_boundary_velocities_sorted, numBoundaryParticles * sizeof(float3));
+    cudaMalloc(&d_boundary_normals_sorted, numBoundaryParticles * sizeof(float3));
     
     neigbourHoodSearch(
             numBoundaryParticles, 
             d_boundary_positions,
-            d_boundary_velocities,
+            d_boundary_normals,
             d_boundary_positions_sorted,
-            d_boundary_velocities_sorted,
+            d_boundary_normals_sorted,
             d_boundaryCellIndices,
             d_boundaryParticleIndices,
             d_boundaryCellEnd,
@@ -233,7 +235,7 @@ void boundaryNBSearch(int numBoundaryParticles) {
     cudaGraphicsUnmapResources(1, &cuda_vbo_boundary_resource, 0);
     
     cudaFree(d_boundary_positions_sorted);
-    cudaFree(d_boundary_velocities_sorted);
+    cudaFree(d_boundary_normals_sorted);
     
     // Move some of above init code for boundary to seeparate function
 }
@@ -261,7 +263,7 @@ void cleanupDeviceArrays()
     cudaFree(d_accelerations);
 
     cudaFree(d_boundary_positions);
-    cudaFree(d_boundary_velocities);
+    cudaFree(d_boundary_normals);
 
     cudaFree(d_boundaryCellIndices);
     cudaFree(d_boundaryParticleIndices);
diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu
index 615b9bf309185baeb96de8645206bbe3869e0bcd..a0dac1aaf44261bb06286921004d64a77b8a7eff 100644
--- a/src/cuda/pcisph_kernels.cu
+++ b/src/cuda/pcisph_kernels.cu
@@ -245,14 +245,16 @@ __global__ void correctBoundaryPenetrations(
     float3* positions_pred,
     float3* velocities_pred,
     const float3* boundary_positions,
+    const float3* boundary_normals,
     unsigned int* cellIndices,
     unsigned int* boundaryCellStart,
     unsigned int* boundaryCellEnd,
     int numParticles,
     int gridSizeX, int gridSizeY, int gridSizeZ,
     float r0,
-    float normalDamping
-)
+    float normalDamping,
+    float epsilon,
+    float delta)
 {
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
     if (idx >= numParticles) return;
@@ -267,11 +269,12 @@ __global__ void correctBoundaryPenetrations(
     int cellY = (cellIndex / gridSizeX) % gridSizeY;
     int cellZ = cellIndex / (gridSizeX * gridSizeY);
 
+    // Accumulators for penetration correction
     float w_sum = 0.0f;
     float depth_sum = 0.0f;
     float3 normal_sum = make_float3(0.0f, 0.0f, 0.0f);
 
-    // Iterate over neighboring cells in a 3x3x3 area
+    // Iterate over neighboring cells in a 3x3x3 grid
     for (int dz = -1; dz <= 1; ++dz) {
         int nz = cellZ + dz;
         if (nz < 0 || nz >= gridSizeZ) continue;
@@ -294,16 +297,18 @@ __global__ void correctBoundaryPenetrations(
                 // Loop over boundary particles in the neighboring cell
                 for (unsigned int b = b_startIdx; b < b_endIdx; ++b) {
                     float3 pos_b = boundary_positions[b];
+                    float3 n_b = boundary_normals[b];
                     float3 r = pos_i - pos_b;
-                    float dist = sqrtf(dot(r, r));
+                    float distSq = dot(r, r);
+                    float dist = sqrtf(distSq);
 
                     if (dist < r0 && dist > 1e-6f) {
+                        // Compute penetration weight
                         float w = (r0 - dist) / r0;
                         w_sum += w;
                         depth_sum += w * (r0 - dist);
 
-                        // Approximate normal direction away from boundary particle
-                        float3 n_b = r / dist;
+                        // Accumulate normals weighted by penetration
                         normal_sum += w * n_b;
                     }
                 }
@@ -311,19 +316,32 @@ __global__ void correctBoundaryPenetrations(
         }
     }
 
-    // If we found penetration, correct it
+    // If penetration is detected, apply corrections
     if (w_sum > 0.0f) {
+        // Compute the average normal direction
         float3 n_c = normal_sum / w_sum;
         float n_len = sqrtf(dot(n_c, n_c));
         if (n_len > 1e-6f) {
-            n_c /= n_len;
+            n_c /= n_len; // Normalize the accumulated normal
+
+            // Compute the total penetration depth
             float correction = depth_sum / w_sum;
+
+            // Correct the fluid particle's position
             pos_i += n_c * correction;
 
-            // Dampen the normal velocity component
+            // Decompose velocity into normal and tangential components
             float v_n = dot(vel_i, n_c);
             float3 v_normal = v_n * n_c;
-            vel_i -= normalDamping * v_normal;
+            float3 v_tangential = vel_i - v_normal;
+
+            // Apply friction and elasticity
+            // [v_tangential_new] = epsilon * v_tangential
+            // [v_normal_new] = -delta * v_normal
+            float3 v_new = epsilon * v_tangential - delta * v_normal;
+
+            // Update velocity
+            vel_i = v_new;
         }
     }
 
diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu
index b3042718ac006189be0e8edec9eb0e1d787e5645..34a73f90a56157c030c5f45fc9d1be9a454903f7 100644
--- a/src/cuda/pcisph_update.cu
+++ b/src/cuda/pcisph_update.cu
@@ -89,7 +89,9 @@ void updateParticles(float deltaTime)
     float totalSumGradW2 = thrust::reduce(dev_sumGradW2_ptr, dev_sumGradW2_ptr + NUM_PARTICLES, 0.0f, thrust::plus<float>());
 
     float r0 = 0.5f * h_H;          // Penetration threshold
-    float normalDamping = 0.1f;   // Damping factor (adjust as needed, e.g., 0.5f)
+    float epsilon = 1.0f; // Moderate to high friction
+    float delta = 0.5f;   // Low to moderate elasticity
+    float normalDamping = 0.5f; // Adjust as needed
 
 
     // Compute A
@@ -107,30 +109,19 @@ void updateParticles(float deltaTime)
             d_positions_pred,
             d_velocities_pred,
             d_boundary_positions,
+            d_boundary_normals,
             d_cellIndices,
             d_boundaryCellStart,
             d_boundaryCellEnd,
             NUM_PARTICLES,
             gridSizeX, gridSizeY, gridSizeZ,
             r0,
-            normalDamping
+            normalDamping,
+            epsilon,
+            delta
         );
         cudaDeviceSynchronize();
 
-        computeDensityError<<<blocksPerGrid, threadsPerBlock>>>(
-            d_positions_pred,
-            d_densities,
-            d_density_errors,
-            nullptr,
-            d_cellIndices,
-            d_cellStart,
-            d_cellEnd,
-            d_boundary_positions,
-            d_boundaryCellStart,
-            d_boundaryCellEnd,
-            NUM_PARTICLES,
-            gridSizeX, gridSizeY, gridSizeZ);
-        cudaDeviceSynchronize();
 
         updatePressures<<<blocksPerGrid, threadsPerBlock>>>(
             d_pressure_increments,
@@ -160,22 +151,40 @@ void updateParticles(float deltaTime)
             timeStep,
             NUM_PARTICLES);
         cudaDeviceSynchronize();
+        
+        computeDensityError<<<blocksPerGrid, threadsPerBlock>>>(
+            d_positions_pred,
+            d_densities,
+            d_density_errors,
+            nullptr,
+            d_cellIndices,
+            d_cellStart,
+            d_cellEnd,
+            d_boundary_positions,
+            d_boundaryCellStart,
+            d_boundaryCellEnd,
+            NUM_PARTICLES,
+            gridSizeX, gridSizeY, gridSizeZ);
+        cudaDeviceSynchronize();
     }
 
     
-    // Launch the boundary correction kernel
     correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>(
         d_positions_pred,
         d_velocities_pred,
         d_boundary_positions,
+        d_boundary_normals,
         d_cellIndices,
         d_boundaryCellStart,
         d_boundaryCellEnd,
         NUM_PARTICLES,
         gridSizeX, gridSizeY, gridSizeZ,
         r0,
-        normalDamping
+        normalDamping,
+        epsilon,
+        delta
     );
+    
     cudaDeviceSynchronize();
 
     // -------------------- Finalization --------------------
diff --git a/src/spawn_particles.cpp b/src/spawn_particles.cpp
index 0499834886b206f90fb21e322913448bc02014c3..59986b0b7994b2420699443313781a82db6dbc58 100644
--- a/src/spawn_particles.cpp
+++ b/src/spawn_particles.cpp
@@ -19,7 +19,7 @@ void initParticles(
     srand(static_cast<unsigned int>(time(0)));
 
     // Calculate particles per axis
-    int particlesPerAxis = static_cast<int>(ceil(pow(NUM_PARTICLES/2, 1.0f / 3.0f)));
+    int particlesPerAxis = static_cast<int>(ceil(pow(NUM_PARTICLES, 1.0f / 3.0f)));
 
     int index = 0;
     for (int i = 0; i < particlesPerAxis && index < NUM_PARTICLES; ++i) {
@@ -27,7 +27,7 @@ void initParticles(
             for (int k = 0; k < particlesPerAxis && index < NUM_PARTICLES; ++k) {
                 // Center particles around the origin
                 float x = (i - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING;
-                float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 2.0f;
+                float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 4.0f;
                 float z = (k - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 0.5;
 
                 // Add random perturbation within [-0.1*dx, 0.1*dx]
@@ -49,7 +49,7 @@ void initParticles(
             }
         }
     }
-
+    /*
     for (int i = 0; i < particlesPerAxis && index < NUM_PARTICLES; ++i) {
         for (int j = 0; j < particlesPerAxis && index < NUM_PARTICLES; ++j) {
             for (int k = 0; k < particlesPerAxis && index < NUM_PARTICLES; ++k) {
@@ -77,6 +77,7 @@ void initParticles(
             }
         }
     }
+    */
     // If NUM_PARTICLES is not a perfect cube, initialize remaining particles
     for (; index < NUM_PARTICLES; ++index) {
         h_positions[index] = make_float3(0.0f, 10.0f, 0.0f); // Default position
@@ -84,19 +85,20 @@ void initParticles(
     }
 }
 
+
 void createBoundaryParticles(
     std::vector<float3> &boundary_positions, 
-    std::vector<float3> &boundary_velocities,
+    std::vector<float3> &boundary_normals, // Renamed from boundary_velocities
     int boundaryLayers, 
     float spacing) 
 {
-    // Initialize velocities to zero for boundary particles
-    float3 zeroVel = make_float3(0.0f, 0.0f, 0.0f);
+    // Define boundary normals based on boundary geometry
+    // For axis-aligned boxes, normals are straightforward
 
     // Compute inner box dimensions based on the specified fractions
-    float box_size_x = (h_BOUND_X_MAX - h_BOUND_X_MIN); // / 4.0f; // 1/4 of X dimension
-    float box_size_y = (h_BOUND_Y_MAX - h_BOUND_Y_MIN); // / 2.0f; // 1/2 of Y dimension
-    float box_size_z = (h_BOUND_Z_MAX - h_BOUND_Z_MIN); // / 4.0f; // 1/4 of Z dimension
+    float box_size_x = (h_BOUND_X_MAX - h_BOUND_X_MIN);
+    float box_size_y = (h_BOUND_Y_MAX - h_BOUND_Y_MIN);
+    float box_size_z = (h_BOUND_Z_MAX - h_BOUND_Z_MIN);
 
     // Compute inner box center positions for X and Z axes
     float x_center = (h_BOUND_X_MAX + h_BOUND_X_MIN) / 2.0f;
@@ -140,29 +142,38 @@ void createBoundaryParticles(
 
             for (int i = 0; i < loop_i_count; i++) {
                 for (int j = 0; j < loop_j_count; j++) {
-                    float x, y, z;
+                    float x, y, z, nx, ny, nz;
                     switch (axis) {
-                        case 0: // X-axis (Left/Right walls): fix x, vary y and z
+                        case 0: // X-axis (Left/Right walls)
                             x = positive ? (inner_max_x + positionOffset) : (inner_min_x - positionOffset);
                             y = inner_min_y + i * spacing;
                             z = inner_min_z + j * spacing;
+                            nx = positive ? -1.0f : 1.0f;
+                            ny = 0.0f;
+                            nz = 0.0f;
                             break;
 
-                        case 1: // Y-axis (Floor/Ceiling): fix y, vary x and z
+                        case 1: // Y-axis (Floor/Ceiling)
                             y = positive ? (inner_max_y + positionOffset) : (inner_min_y - positionOffset);
                             x = inner_min_x + i * spacing;
                             z = inner_min_z + j * spacing;
+                            nx = 0.0f;
+                            ny = positive ? -1.0f : 1.0f;
+                            nz = 0.0f;
                             break;
 
-                        case 2: // Z-axis (Front/Back walls): fix z, vary x and y
+                        case 2: // Z-axis (Front/Back walls)
                             z = positive ? (inner_max_z + positionOffset) : (inner_min_z - positionOffset);
                             x = inner_min_x + i * spacing;
                             y = inner_min_y + j * spacing;
+                            nx = 0.0f;
+                            ny = 0.0f;
+                            nz = positive ? -1.0f : 1.0f;
                             break;
                     }
 
                     boundary_positions.emplace_back(make_float3(x, y, z));
-                    boundary_velocities.emplace_back(zeroVel);
+                    boundary_normals.emplace_back(make_float3(nx, ny, nz)); // Assign normals
                 }
             }
         }
@@ -173,9 +184,8 @@ void createBoundaryParticles(
     addBoundaryFace(0, false); // X-min (Left Wall)
     addBoundaryFace(0, true);  // X-max (Right Wall)
 
-    // Axis 1: Y-axis (Floor)
+    // Axis 1: Y-axis (Floor and Ceiling)
     addBoundaryFace(1, false); // Y-min (Floor)
-    // Uncomment the following line to add Y-max (Ceiling) when needed
     addBoundaryFace(1, true);  // Y-max (Ceiling)
 
     // Axis 2: Z-axis (Front and Back Walls)
@@ -183,102 +193,4 @@ void createBoundaryParticles(
     addBoundaryFace(2, true);  // Z-max (Back Wall)
 }
 
-/*
-// Create boundary particles
-void createBoundaryParticles(
-    std::vector<float3> &boundary_positions, 
-    std::vector<float3> &boundary_velocities,
-    int boundaryLayers, 
-    float spacing) 
-{
-    // Compute the number of particles along each dimension for a given spacing
-    int nx = static_cast<int>((h_BOUND_X_MAX - h_BOUND_X_MIN)/spacing) + 1;
-    int ny = static_cast<int>((h_BOUND_Y_MAX - h_BOUND_Y_MIN)/spacing) + 1;
-    int nz = static_cast<int>((h_BOUND_Z_MAX - h_BOUND_Z_MIN)/spacing) + 1;
-
-    // Initialize velocities to zero for boundary
-    float3 zeroVel = make_float3(0.0f, 0.0f, 0.0f);
-
-    // Y-min boundary (e.g., "floor")
-    for (int layer = 0; layer < boundaryLayers; layer++) {
-        float y = h_BOUND_Y_MIN - (layer+1)*spacing; // placing layers below Ymin
-        for (int ix = 0; ix < nx; ix++) {
-            for (int iz = 0; iz < nz; iz++) {
-                float x = h_BOUND_X_MIN + ix*spacing;
-                float z = h_BOUND_Z_MIN + iz*spacing;
-                boundary_positions.push_back(make_float3(x, y, z));
-                boundary_velocities.push_back(zeroVel);
-            }
-        }
-    }
-
-    /*
-    // Y-max boundary (ceiling)
-    for (int layer = 0; layer < boundaryLayers; layer++) {
-        float y = h_BOUND_Y_MAX + (layer+1)*spacing;
-        for (int ix = 0; ix < nx; ix++) {
-            for (int iz = 0; iz < nz; iz++) {
-                float x = h_BOUND_X_MIN + ix*spacing;
-                float z = h_BOUND_Z_MIN + iz*spacing;
-                boundary_positions.push_back(make_float3(x, y, z));
-                boundary_velocities.push_back(zeroVel);
-            }
-        }
-    }
-
-    // X-min boundary (vertical wall on the left)
-    int ny_cells = ny; // from earlier computation
-    for (int layer = 0; layer < boundaryLayers; layer++) {
-        float x = h_BOUND_X_MIN - (layer+1)*spacing;
-        for (int iy = 0; iy < ny_cells; iy++) {
-            for (int iz = 0; iz < nz; iz++) {
-                float y = h_BOUND_Y_MIN + iy*spacing;
-                float z = h_BOUND_Z_MIN + iz*spacing;
-                boundary_positions.push_back(make_float3(x, y, z));
-                boundary_velocities.push_back(zeroVel);
-            }
-        }
-    }
-
-    // X-max boundary (vertical wall on the right)
-    for (int layer = 0; layer < boundaryLayers; layer++) {
-        float x = h_BOUND_X_MAX + (layer+1)*spacing;
-        for (int iy = 0; iy < ny_cells; iy++) {
-            for (int iz = 0; iz < nz; iz++) {
-                float y = h_BOUND_Y_MIN + iy*spacing;
-                float z = h_BOUND_Z_MIN + iz*spacing;
-                boundary_positions.push_back(make_float3(x, y, z));
-                boundary_velocities.push_back(zeroVel);
-            }
-        }
-    }
-
-    // Z-min boundary (front wall)
-    for (int layer = 0; layer < boundaryLayers; layer++) {
-        float z = h_BOUND_Z_MIN - (layer+1)*spacing;
-        for (int ix = 0; ix < nx; ix++) {
-            for (int iy = 0; iy < ny_cells; iy++) {
-                float x = h_BOUND_X_MIN + ix*spacing;
-                float y = h_BOUND_Y_MIN + iy*spacing;
-                boundary_positions.push_back(make_float3(x, y, z));
-                boundary_velocities.push_back(zeroVel);
-            }
-        }
-    }
-
-    // Z-max boundary (back wall)
-    for (int layer = 0; layer < boundaryLayers; layer++) {
-        float z = h_BOUND_Z_MAX + (layer+1)*spacing;
-        for (int ix = 0; ix < nx; ix++) {
-            for (int iy = 0; iy < ny_cells; iy++) {
-                float x = h_BOUND_X_MIN + ix*spacing;
-                float y = h_BOUND_Y_MIN + iy*spacing;
-                boundary_positions.push_back(make_float3(x, y, z));
-                boundary_velocities.push_back(zeroVel);
-            }
-        }
-    }
-}
-*/
-
 }
\ No newline at end of file