From f3f0924570fa032150893d90ba5b057d71b4c26a Mon Sep 17 00:00:00 2001 From: adany292 <adany292@student.liu.se> Date: Sun, 29 Dec 2024 22:37:13 +0100 Subject: [PATCH] ADDED macro fro itterating neighbor cell to make code more managable --- src/cuda/include/helper_macros.h | 34 +++ src/cuda/pcisph_kernels.cu | 409 +++++++++++++------------------ 2 files changed, 210 insertions(+), 233 deletions(-) create mode 100644 src/cuda/include/helper_macros.h diff --git a/src/cuda/include/helper_macros.h b/src/cuda/include/helper_macros.h new file mode 100644 index 0000000..9c18a79 --- /dev/null +++ b/src/cuda/include/helper_macros.h @@ -0,0 +1,34 @@ +// helper_macros.h + +#ifndef HELPER_MACROS_H +#define HELPER_MACROS_H + +#define FOREACH_NEIGHBOR_CELL_BEGIN(cellIndex, gridSizeX, gridSizeY, gridSizeZ, neighborCellIdx) \ + do { \ + /* Compute cell coordinates */ \ + int cellX = cellIndex % gridSizeX; \ + int cellY = (cellIndex / gridSizeX) % gridSizeY; \ + int cellZ = cellIndex / (gridSizeX * gridSizeY); \ + \ + /* Iterate over neighboring cells */ \ + for (int dz = -1; dz <= 1; ++dz) { \ + int nz = cellZ + dz; \ + if (nz < 0 || nz >= gridSizeZ) continue; \ + for (int dy = -1; dy <= 1; ++dy) { \ + int ny = cellY + dy; \ + if (ny < 0 || ny >= gridSizeY) continue; \ + for (int dx = -1; dx <= 1; ++dx) { \ + int nx = cellX + dx; \ + if (nx < 0 || nx >= gridSizeX) continue; \ + \ + /* Compute neighbor cell index */ \ + neighborCellIdx = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; \ + /* User-defined code starts here */ + +#define FOREACH_NEIGHBOR_CELL_END() \ + } \ + } \ + } \ + } while(0) + +#endif // HELPER_MACROS_H diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu index e5693df..ab5a90a 100644 --- a/src/cuda/pcisph_kernels.cu +++ b/src/cuda/pcisph_kernels.cu @@ -2,6 +2,7 @@ #include "include/pcisph_kernels.h" #include "include/smoothing_kernels.h" +#include "helper_macros.h" // Make sure this is included #include "include/float3_helper_math.h" #include "include/cuda_device_vars.h" @@ -24,6 +25,7 @@ namespace FluidSimulationGPU { // Compute density errors (used in PCISPH) + __global__ void computeDensityError( float3* positions, float* densities, @@ -44,67 +46,53 @@ __global__ void computeDensityError( float3 pos_i = positions[idx]; float density = 0.0f; - float sumGradW2_local = 0.0f; - - unsigned int cellIndex = cellIndices[idx]; - int cellX = cellIndex % gridSizeX; - int cellY = (cellIndex / gridSizeX) % gridSizeY; - int cellZ = cellIndex / (gridSizeX * gridSizeY); - - // ---------------------- - // Fluid neighbors - // ---------------------- - for (int dz = -1; dz <= 1; ++dz) { - int nz = cellZ + dz; - if (nz < 0 || nz >= gridSizeZ) continue; - for (int dy = -1; dy <= 1; ++dy) { - int ny = cellY + dy; - if (ny < 0 || ny >= gridSizeY) continue; - for (int dx = -1; dx <= 1; ++dx) { - int nx = cellX + dx; - if (nx < 0 || nx >= gridSizeX) continue; - - unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; - unsigned int startIdx = cellStart[neighborCellIndex]; - unsigned int endIdx = cellEnd[neighborCellIndex]; - - if (startIdx != 0xFFFFFFFF) { - for (unsigned int j = startIdx; j < endIdx; ++j) { - float3 pos_j = positions[j]; - float3 r = pos_i - pos_j; - float r2 = dot(r, r); - - if (r2 < H2 && r2 > 0.0001f) { - float W = poly6Kernel(r2); - density += PARTICLE_MASS * W; - } - } + unsigned int cellIndex = cellIndices[idx]; + unsigned int neighborCellIdx; + + FOREACH_NEIGHBOR_CELL_BEGIN(cellIndex, gridSizeX, gridSizeY, gridSizeZ, neighborCellIdx) + { + unsigned int startIdx = cellStart[neighborCellIdx]; + unsigned int endIdx = cellEnd[neighborCellIdx]; + + if (startIdx != 0xFFFFFFFF) + { + for (unsigned int j = startIdx; j < endIdx; ++j) + { + float3 pos_j = positions[j]; + float3 r = pos_i - pos_j; + float r2 = dot(r, r); + + // Check if in range + if (r2 < H2 && r2 > 0.0001f) { + float W = poly6Kernel(r2); + density += PARTICLE_MASS * W; } - - // ---------------------- - // Boundary neighbors - // ---------------------- - - unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; - unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; - - if (b_startIdx != 0xFFFFFFFF) { - for (unsigned int j = b_startIdx; j < b_endIdx; ++j) { - float3 pos_j = boundaryPositions[j]; - float3 r = pos_i - pos_j; - float r2 = dot(r, r); - if (r2 < H2 && r2 > 0.0001f) { - float W = poly6Kernel(r2); - density += PARTICLE_MASS * W; // Boundary treated similarly here - } - } + } + } + + // Boundary neighbors + unsigned int b_startIdx = boundaryCellStart[neighborCellIdx]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIdx]; + + if (b_startIdx != 0xFFFFFFFF) + { + for (unsigned int j = b_startIdx; j < b_endIdx; ++j) + { + float3 pos_j = boundaryPositions[j]; + float3 r = pos_i - pos_j; + float r2 = dot(r, r); + if (r2 < H2 && r2 > 0.0001f) { + float W = poly6Kernel(r2); + density += PARTICLE_MASS * W; } } } + } + FOREACH_NEIGHBOR_CELL_END(); - densities[idx] = density; + densities[idx] = density; density_errors[idx] = density - REST_DENSITY; } @@ -124,7 +112,6 @@ __global__ void updatePressures( } } -// Compute pressure forces __global__ void computePressureForces( float3* positions, float* pressure_increments, @@ -132,7 +119,6 @@ __global__ void computePressureForces( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, - // Boundary data float3* boundaryPositions, unsigned int* boundaryCellStart, unsigned int* boundaryCellEnd, @@ -145,79 +131,70 @@ __global__ void computePressureForces( float3 pos_i = positions[idx]; float deltaP_i = pressure_increments[idx]; + float3 fPressure = make_float3(0.0f, 0.0f, 0.0f); unsigned int cellIndex = cellIndices[idx]; - int cellX = cellIndex % gridSizeX; - int cellY = (cellIndex / gridSizeX) % gridSizeY; - int cellZ = cellIndex / (gridSizeX * gridSizeY); - - // ------------ - // Fluid neighbors - // ------------ - for (int dz = -1; dz <= 1; ++dz) { - int nz = cellZ + dz; - if (nz < 0 || nz >= gridSizeZ) continue; - for (int dy = -1; dy <= 1; ++dy) { - int ny = cellY + dy; - if (ny < 0 || ny >= gridSizeY) continue; - for (int dx = -1; dx <= 1; ++dx) { - int nx = cellX + dx; - if (nx < 0 || nx >= gridSizeX) continue; - - unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; - unsigned int startIdx = cellStart[neighborCellIndex]; - unsigned int endIdx = cellEnd[neighborCellIndex]; - - if (startIdx != 0xFFFFFFFF) { - for (unsigned int j = startIdx; j < endIdx; ++j) { - if (j == idx) continue; - float3 pos_j = positions[j]; - //float deltaP_j = pressure_increments[j]; - float deltaP_j = deltaP_i; - - float3 r = pos_i - pos_j; - float distSq = dot(r, r); - if (distSq < H2 && distSq > 0.0001f) { - float dist = sqrtf(distSq); - float gradW = spikyKernelGrad(dist); - float3 gradWij = gradW * (r / dist); - float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); - fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS; - } - } + unsigned int neighborCellIdx; + + FOREACH_NEIGHBOR_CELL_BEGIN(cellIndex, gridSizeX, gridSizeY, gridSizeZ, neighborCellIdx) + { + // Fluid neighbors + unsigned int startIdx = cellStart[neighborCellIdx]; + unsigned int endIdx = cellEnd[neighborCellIdx]; + + if (startIdx != 0xFFFFFFFF) + { + for (unsigned int j = startIdx; j < endIdx; ++j) + { + // Avoid self-interaction + if (j == idx) continue; + + float3 pos_j = positions[j]; + // Using mirrored pressure for fluid + float deltaP_j = deltaP_i; // Or pressure_increments[j], but mirrored works for PCISPH ass approximation + + float3 r = pos_i - pos_j; + float distSq = dot(r, r); + if (distSq < H2 && distSq > 0.0001f) + { + float dist = sqrtf(distSq); + float gradW = spikyKernelGrad(dist); + float3 gradWij = gradW * (r / dist); + + float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); + fPressure += factor * gradWij * (PARTICLE_MASS * PARTICLE_MASS); + } + } + } + + // Boundary neighbors + unsigned int b_startIdx = boundaryCellStart[neighborCellIdx]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIdx]; + + if (b_startIdx != 0xFFFFFFFF) + { + for (unsigned int j = b_startIdx; j < b_endIdx; ++j) + { + float3 pos_j = boundaryPositions[j]; + // Mirror pressure: p_b = p_i + float deltaP_j = deltaP_i; + + float3 r = pos_i - pos_j; + float distSq = dot(r, r); + if (distSq < H2 && distSq > 0.0001f) + { + float dist = sqrtf(distSq); + float gradW = spikyKernelGrad(dist); + float3 gradWij = gradW * (r / dist); + + float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); + fPressure += factor * gradWij * (PARTICLE_MASS * PARTICLE_MASS); } - - // ------------ - // Boundary neighbors - // ------------ - - - unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; - unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; - - if (b_startIdx != 0xFFFFFFFF) { - for (unsigned int j = b_startIdx; j < b_endIdx; ++j) { - float3 pos_j = boundaryPositions[j]; - // Mirror pressure: p_b = p_i, and use REST_DENSITY for boundary - float deltaP_j = deltaP_i; // Pressure mirroring - - float3 r = pos_i - pos_j; - float distSq = dot(r, r); - if (distSq < H2 && distSq > 0.0001f) { - float dist = sqrtf(distSq); - float gradW = spikyKernelGrad(dist); - float3 gradWij = gradW * (r / dist); - // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0 - float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); - fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS; - } - } - } - } } } + FOREACH_NEIGHBOR_CELL_END(); pressure_forces[idx] = fPressure; } @@ -243,91 +220,67 @@ __global__ void correctBoundaryPenetrations( float3 pos_i = positions_pred[idx]; float3 vel_i = velocities_pred[idx]; - // Retrieve the cell index for this fluid particle unsigned int cellIndex = cellIndices[idx]; + unsigned int neighborCellIdx; - int cellX = cellIndex % gridSizeX; - int cellY = (cellIndex / gridSizeX) % gridSizeY; - int cellZ = cellIndex / (gridSizeX * gridSizeY); - - // Accumulators for penetration correction float w_sum = 0.0f; float depth_sum = 0.0f; float3 normal_sum = make_float3(0.0f, 0.0f, 0.0f); - // Iterate over neighboring cells in a 3x3x3 grid - for (int dz = -1; dz <= 1; ++dz) { - int nz = cellZ + dz; - if (nz < 0 || nz >= gridSizeZ) continue; - for (int dy = -1; dy <= 1; ++dy) { - int ny = cellY + dy; - if (ny < 0 || ny >= gridSizeY) continue; - for (int dx = -1; dx <= 1; ++dx) { - int nx = cellX + dx; - if (nx < 0 || nx >= gridSizeX) continue; - - // Compute the neighbor cell index - unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; - - // Get the boundary particles in this cell - unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; - unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; - - if (b_startIdx == 0xFFFFFFFF) continue; // No boundary particles in this cell - - // Loop over boundary particles in the neighboring cell - for (unsigned int b = b_startIdx; b < b_endIdx; ++b) { - float3 pos_b = boundary_positions[b]; - float3 n_b = boundary_normals[b]; - float3 r = pos_i - pos_b; - float distSq = dot(r, r); - float dist = sqrtf(distSq); - - if (dist < r0 && dist > 1e-6f) { - // Compute penetration weight - float w = (r0 - dist) / r0; - w_sum += w; - depth_sum += w * (r0 - dist); - - // Accumulate normals weighted by penetration - normal_sum += w * n_b; - } - } + FOREACH_NEIGHBOR_CELL_BEGIN(cellIndex, gridSizeX, gridSizeY, gridSizeZ, neighborCellIdx) + { + unsigned int b_startIdx = boundaryCellStart[neighborCellIdx]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIdx]; + + if (b_startIdx == 0xFFFFFFFF) continue; + + for (unsigned int b = b_startIdx; b < b_endIdx; ++b) + { + float3 pos_b = boundary_positions[b]; + float3 n_b = boundary_normals[b]; + float3 r = pos_i - pos_b; + float distSq = dot(r, r); + float dist = sqrtf(distSq); + + if (dist < r0 && dist > 1e-6f) { + // Compute penetration weight + float w = (r0 - dist) / r0; + w_sum += w; + depth_sum += w * (r0 - dist); + + // Accumulate normals weighted by penetration + normal_sum += w * n_b; } } } + FOREACH_NEIGHBOR_CELL_END(); - // If penetration is detected, apply corrections - if (w_sum > 0.0f) { - // Compute the average normal direction + // apply corrections + if (w_sum > 0.0f) + { float3 n_c = normal_sum / w_sum; float n_len = sqrtf(dot(n_c, n_c)); if (n_len > 1e-6f) { - n_c /= n_len; // Normalize the accumulated normal + n_c /= n_len; // Normalize - // Compute the total penetration depth float correction = depth_sum / w_sum; - // Correct the fluid particle's position + // Correct the fluid particles position pos_i += n_c * correction; - // Decompose velocity into normal and tangential components - float v_n = dot(vel_i, n_c); - float3 v_normal = v_n * n_c; + // Velocity into normal and tangential + float v_n = dot(vel_i, n_c); + float3 v_normal = v_n * n_c; float3 v_tangential = vel_i - v_normal; // Apply friction and elasticity - // [v_tangential_new] = epsilon * v_tangential - // [v_normal_new] = -delta * v_normal float3 v_new = epsilon * v_tangential - delta * v_normal; - // Update velocity vel_i = v_new; } } - // Write back updated positions and velocities - positions_pred[idx] = pos_i; + positions_pred[idx] = pos_i; velocities_pred[idx] = vel_i; } @@ -344,64 +297,54 @@ __global__ void computeViscosityAndExternalForces( ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= numParticles) return; - if (idx < numParticles) { - float3 pos_i = positions[idx]; - float3 vel_i = velocities[idx]; - - float3 fViscosity = make_float3(0.0f, 0.0f, 0.0f); - float3 fGravity = make_float3(0.0f, -10.0f, 0.0f) * PARTICLE_MASS; - - unsigned int cellIndex = cellIndices[idx]; - - // Compute cell coordinates - int cellX = cellIndex % gridSizeX; - int cellY = (cellIndex / gridSizeX) % gridSizeY; - int cellZ = cellIndex / (gridSizeX * gridSizeY); - - // Iterate over neighboring cells - for (int dz = -1; dz <= 1; ++dz) { - int nz = cellZ + dz; - if (nz < 0 || nz >= gridSizeZ) continue; - for (int dy = -1; dy <= 1; ++dy) { - int ny = cellY + dy; - if (ny < 0 || ny >= gridSizeY) continue; - for (int dx = -1; dx <= 1; ++dx) { - int nx = cellX + dx; - if (nx < 0 || nx >= gridSizeX) continue; - - unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; - - unsigned int startIdx = cellStart[neighborCellIndex]; - unsigned int endIdx = cellEnd[neighborCellIndex]; - - if (startIdx != 0xFFFFFFFF) { - for (unsigned int j = startIdx; j < endIdx; ++j) { - if (j == idx) continue; - - float3 pos_j = positions[j]; - float3 vel_j = velocities[j]; - float3 r = pos_i - pos_j; - - float distSq = dot(r, r); - - if (distSq < H2 && distSq > 0.0001f) { - float dist = sqrtf(distSq); - - // Viscosity force - float3 velDiff = vel_j - vel_i; - float viscLap = viscosityKernelLap(dist); - fViscosity += VISCOSITY * velDiff * viscLap * PARTICLE_MASS / REST_DENSITY; - } - } - } + float3 pos_i = positions[idx]; + float3 vel_i = velocities[idx]; + + float3 fViscosity = make_float3(0.0f, 0.0f, 0.0f); + float3 fGravity = make_float3(0.0f, -10.0f, 0.0f) * PARTICLE_MASS; + + // Current particle's cell + unsigned int cellIndex = cellIndices[idx]; + // We'll store neighbor cell index here + unsigned int neighborCellIdx; + + // ------------------------------------------------ + // Replace triple nested loop with the macro + // ------------------------------------------------ + FOREACH_NEIGHBOR_CELL_BEGIN(cellIndex, gridSizeX, gridSizeY, gridSizeZ, neighborCellIdx) + { + unsigned int startIdx = cellStart[neighborCellIdx]; + unsigned int endIdx = cellEnd[neighborCellIdx]; + + if (startIdx != 0xFFFFFFFF) + { + for (unsigned int j = startIdx; j < endIdx; ++j) + { + if (j == idx) continue; + + float3 pos_j = positions[j]; + float3 vel_j = velocities[j]; + float3 r = pos_i - pos_j; + + float distSq = dot(r, r); + if (distSq < H2 && distSq > 0.0001f) + { + float dist = sqrtf(distSq); + + // Viscosity force + float3 velDiff = vel_j - vel_i; + float viscLap = viscosityKernelLap(dist); + fViscosity += VISCOSITY * velDiff * viscLap * PARTICLE_MASS / REST_DENSITY; } } } - - // Total acceleration (excluding pressure force) - accelerations[idx] = (fViscosity + fGravity) / PARTICLE_MASS; } + FOREACH_NEIGHBOR_CELL_END(); + + // Total acceleration (excluding pressure) + accelerations[idx] = (fViscosity + fGravity) / PARTICLE_MASS; } // Enforce boundary conditions, very simple should bee improved. -- GitLab