diff --git a/src/cuda/include/pcisph_kernels.h b/src/cuda/include/pcisph_kernels.h index 18c65f885237b033780e2869ec41d48806c9a210..07be87cb0ed361f5743354e6da751f6f44015ba3 100644 --- a/src/cuda/include/pcisph_kernels.h +++ b/src/cuda/include/pcisph_kernels.h @@ -16,6 +16,10 @@ __global__ void computeDensityError( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, + // Boundary data + float3* boundaryPositions, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, int numParticles, int gridSizeX, int gridSizeY, int gridSizeZ ); @@ -36,6 +40,10 @@ __global__ void computePressureForces( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, + // Boundary data + float3* boundaryPositions, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, int numParticles, int gridSizeX, int gridSizeY, int gridSizeZ ); diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu index f2efd40677d2f06175404a1d6a7f80950a4733db..e42cda376bc9a29282087928e88d283418cc2ab2 100644 --- a/src/cuda/pcisph_kernels.cu +++ b/src/cuda/pcisph_kernels.cu @@ -32,76 +32,115 @@ __global__ void computeDensityError( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, + // Boundary data + float3* boundaryPositions, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, int numParticles, int gridSizeX, int gridSizeY, int gridSizeZ ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - - if (idx < numParticles) { - float3 pos_i = positions[idx]; - float density = 0.0f; - float sumGradW2_local = 0.0f; - - unsigned int cellIndex = cellIndices[idx]; - - // Compute cell coordinates - int cellX = cellIndex % gridSizeX; - int cellY = (cellIndex / gridSizeX) % gridSizeY; - int cellZ = cellIndex / (gridSizeX * gridSizeY); - - // Iterate over neighboring cells - for (int dz = -1; dz <= 1; ++dz) { - int nz = cellZ + dz; - if (nz < 0 || nz >= gridSizeZ) continue; - for (int dy = -1; dy <= 1; ++dy) { - int ny = cellY + dy; - if (ny < 0 || ny >= gridSizeY) continue; - for (int dx = -1; dx <= 1; ++dx) { - int nx = cellX + dx; - if (nx < 0 || nx >= gridSizeX) continue; - - unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; - - unsigned int startIdx = cellStart[neighborCellIndex]; - unsigned int endIdx = cellEnd[neighborCellIndex]; - - if (startIdx != 0xFFFFFFFF) { - for (unsigned int j = startIdx; j < endIdx; ++j) { - float3 pos_j = positions[j]; - - float3 r = pos_i - pos_j; - float r2 = dot(r, r); - - if (r2 < H2 && r2 > 0.0001f) { + if (idx >= numParticles) return; + + float3 pos_i = positions[idx]; + float density = 0.0f; + float sumGradW2_local = 0.0f; + + unsigned int cellIndex = cellIndices[idx]; + + int cellX = cellIndex % gridSizeX; + int cellY = (cellIndex / gridSizeX) % gridSizeY; + int cellZ = cellIndex / (gridSizeX * gridSizeY); + + // ---------------------- + // Fluid neighbors + // ---------------------- + for (int dz = -1; dz <= 1; ++dz) { + int nz = cellZ + dz; + if (nz < 0 || nz >= gridSizeZ) continue; + for (int dy = -1; dy <= 1; ++dy) { + int ny = cellY + dy; + if (ny < 0 || ny >= gridSizeY) continue; + for (int dx = -1; dx <= 1; ++dx) { + int nx = cellX + dx; + if (nx < 0 || nx >= gridSizeX) continue; + + unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; + unsigned int startIdx = cellStart[neighborCellIndex]; + unsigned int endIdx = cellEnd[neighborCellIndex]; + + if (startIdx != 0xFFFFFFFF) { + for (unsigned int j = startIdx; j < endIdx; ++j) { + float3 pos_j = positions[j]; + float3 r = pos_i - pos_j; + float r2 = dot(r, r); + + if (r2 < H2 && r2 > 0.0001f) { + float W = poly6Kernel(r2); + density += PARTICLE_MASS * W; + + if (sum_gradW2 != nullptr) { float dist = sqrtf(r2); + float gradW_coeff = spikyKernelGrad(dist); + float3 gradW = gradW_coeff * (r / dist); + sumGradW2_local += dot(gradW, gradW); + } + } + } + } + } + } + } - float W = poly6Kernel(r2); - density += PARTICLE_MASS * W; - - if (sum_gradW2 != nullptr) { - // Compute gradW - float gradW_coeff = spikyKernelGrad(dist); - float3 gradW = gradW_coeff * (r / dist); - - sumGradW2_local += dot(gradW, gradW); - } + // ---------------------- + // Boundary neighbors + // ---------------------- + for (int dz = -1; dz <= 1; ++dz) { + int nz = cellZ + dz; + if (nz < 0 || nz >= gridSizeZ) continue; + for (int dy = -1; dy <= 1; ++dy) { + int ny = cellY + dy; + if (ny < 0 || ny >= gridSizeY) continue; + for (int dx = -1; dx <= 1; ++dx) { + int nx = cellX + dx; + if (nx < 0 || nx >= gridSizeX) continue; + + unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; + unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; + + if (b_startIdx != 0xFFFFFFFF) { + for (unsigned int j = b_startIdx; j < b_endIdx; ++j) { + float3 pos_j = boundaryPositions[j]; + float3 r = pos_i - pos_j; + float r2 = dot(r, r); + if (r2 < H2 && r2 > 0.0001f) { + float W = poly6Kernel(r2); + density += PARTICLE_MASS * W; // Boundary treated similarly here + + if (sum_gradW2 != nullptr) { + float dist = sqrtf(r2); + float gradW_coeff = spikyKernelGrad(dist); + float3 gradW = gradW_coeff * (r / dist); + sumGradW2_local += dot(gradW, gradW); } } } } } } + } - densities[idx] = density; - density_errors[idx] = density - REST_DENSITY; + densities[idx] = density; + density_errors[idx] = density - REST_DENSITY; - if (sum_gradW2 != nullptr) { - sum_gradW2[idx] = sumGradW2_local; - } + if (sum_gradW2 != nullptr) { + sum_gradW2[idx] = sumGradW2_local; } } + // Update pressures based on density errors __global__ void updatePressures( float* pressure_increments, @@ -126,74 +165,104 @@ __global__ void computePressureForces( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, + // Boundary data + float3* boundaryPositions, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, int numParticles, int gridSizeX, int gridSizeY, int gridSizeZ ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - - if (idx < numParticles) { - float3 pos_i = positions[idx]; - float deltaP_i = pressure_increments[idx]; - - float3 fPressure = make_float3(0.0f, 0.0f, 0.0f); - - unsigned int cellIndex = cellIndices[idx]; - - // Compute cell coordinates - int cellX = cellIndex % gridSizeX; - int cellY = (cellIndex / gridSizeX) % gridSizeY; - int cellZ = cellIndex / (gridSizeX * gridSizeY); - - // Iterate over neighboring cells - for (int dz = -1; dz <= 1; ++dz) { - int nz = cellZ + dz; - if (nz < 0 || nz >= gridSizeZ) continue; - for (int dy = -1; dy <= 1; ++dy) { - int ny = cellY + dy; - if (ny < 0 || ny >= gridSizeY) continue; - for (int dx = -1; dx <= 1; ++dx) { - int nx = cellX + dx; - if (nx < 0 || nx >= gridSizeX) continue; - - unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; - - unsigned int startIdx = cellStart[neighborCellIndex]; - unsigned int endIdx = cellEnd[neighborCellIndex]; - - if (startIdx != 0xFFFFFFFF) { - for (unsigned int j = startIdx; j < endIdx; ++j) { - if (j == idx) continue; - - float3 pos_j = positions[j]; - float deltaP_j = pressure_increments[j]; - - float3 r = pos_i - pos_j; - - float distSq = dot(r, r); - - if (distSq < H2 && distSq > 0.0001f) { - float dist = sqrtf(distSq); - - // Compute gradient of kernel - float gradW = spikyKernelGrad(dist); - float3 gradWij = gradW * (r / dist); - - // Symmetric pressure force - float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); - fPressure += factor * gradWij; - } + if (idx >= numParticles) return; + + float3 pos_i = positions[idx]; + float deltaP_i = pressure_increments[idx]; + float3 fPressure = make_float3(0.0f, 0.0f, 0.0f); + + unsigned int cellIndex = cellIndices[idx]; + int cellX = cellIndex % gridSizeX; + int cellY = (cellIndex / gridSizeX) % gridSizeY; + int cellZ = cellIndex / (gridSizeX * gridSizeY); + + // ------------ + // Fluid neighbors + // ------------ + for (int dz = -1; dz <= 1; ++dz) { + int nz = cellZ + dz; + if (nz < 0 || nz >= gridSizeZ) continue; + for (int dy = -1; dy <= 1; ++dy) { + int ny = cellY + dy; + if (ny < 0 || ny >= gridSizeY) continue; + for (int dx = -1; dx <= 1; ++dx) { + int nx = cellX + dx; + if (nx < 0 || nx >= gridSizeX) continue; + + unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; + unsigned int startIdx = cellStart[neighborCellIndex]; + unsigned int endIdx = cellEnd[neighborCellIndex]; + + if (startIdx != 0xFFFFFFFF) { + for (unsigned int j = startIdx; j < endIdx; ++j) { + if (j == idx) continue; + float3 pos_j = positions[j]; + float deltaP_j = pressure_increments[j]; + + float3 r = pos_i - pos_j; + float distSq = dot(r, r); + if (distSq < H2 && distSq > 0.0001f) { + float dist = sqrtf(distSq); + float gradW = spikyKernelGrad(dist); + float3 gradWij = gradW * (r / dist); + float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); + fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS; } } } } } + } - // Multiply by constants - fPressure *= PARTICLE_MASS * PARTICLE_MASS; - - pressure_forces[idx] = fPressure; + // ------------ + // Boundary neighbors + // ------------ + for (int dz = -1; dz <= 1; ++dz) { + int nz = cellZ + dz; + if (nz < 0 || nz >= gridSizeZ) continue; + for (int dy = -1; dy <= 1; ++dy) { + int ny = cellY + dy; + if (ny < 0 || ny >= gridSizeY) continue; + for (int dx = -1; dx <= 1; ++dx) { + int nx = cellX + dx; + if (nx < 0 || nx >= gridSizeX) continue; + + unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; + unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; + + if (b_startIdx != 0xFFFFFFFF) { + for (unsigned int j = b_startIdx; j < b_endIdx; ++j) { + float3 pos_j = boundaryPositions[j]; + // Mirror pressure: p_b = p_i, and use REST_DENSITY for boundary + float deltaP_j = deltaP_i; // Pressure mirroring + + float3 r = pos_i - pos_j; + float distSq = dot(r, r); + if (distSq < H2 && distSq > 0.0001f) { + float dist = sqrtf(distSq); + float gradW = spikyKernelGrad(dist); + float3 gradWij = gradW * (r / dist); + // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0 + float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); + fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS; + } + } + } + } + } } + + pressure_forces[idx] = fPressure; } // Compute viscosity and external forces (excluding pressure forces) diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu index 05c163b43b3544496b5138ca7aebb6c9308c2a93..51171356a411e67f1e022de263f0b9a58119a0be 100644 --- a/src/cuda/pcisph_update.cu +++ b/src/cuda/pcisph_update.cu @@ -77,6 +77,9 @@ void updateParticles(float deltaTime) d_cellIndices, d_cellStart, d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, NUM_PARTICLES, gridSizeX, gridSizeY, gridSizeZ); cudaDeviceSynchronize(); @@ -103,6 +106,9 @@ void updateParticles(float deltaTime) d_cellIndices, d_cellStart, d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, NUM_PARTICLES, gridSizeX, gridSizeY, gridSizeZ); cudaDeviceSynchronize(); @@ -121,6 +127,9 @@ void updateParticles(float deltaTime) d_cellIndices, d_cellStart, d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, NUM_PARTICLES, gridSizeX, gridSizeY, gridSizeZ); cudaDeviceSynchronize(); @@ -141,12 +150,14 @@ void updateParticles(float deltaTime) // Enforce boundary conditions float damping = 0.75f; + enforceBoundary<<<blocksPerGrid, threadsPerBlock>>>( d_positions, d_velocities, NUM_PARTICLES, damping); cudaDeviceSynchronize(); + // Unmap the VBO resource cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); diff --git a/src/settings/constants.h b/src/settings/constants.h index 3faa9ca00980c22eb87f93a11ffcdc0e5f42c006..b96e1ec77dd818195053ed203dfa893035933845 100644 --- a/src/settings/constants.h +++ b/src/settings/constants.h @@ -13,10 +13,10 @@ const float h_GAS_CONSTANT = 0.02f; const float h_VISCOSITY = 0.2f; const float h_PARTICLE_MASS = 0.4f; -const float h_REST_DENSITY = 3000.0f; +const float h_REST_DENSITY = 7500.0f; const float h_PI = 3.14159265358979323846f; -const float h_H = 0.35f; // Smoothing radius +const float h_H = 0.25f; // Smoothing radius const int NUM_PARTICLES = 50000;