diff --git a/src/cuda/include/pcisph_kernels.h b/src/cuda/include/pcisph_kernels.h index f629f97d9f25f631220a5a9c3241874b7f52a8aa..98b22b2669ce12135e10d35a248d020f4a55d3c5 100644 --- a/src/cuda/include/pcisph_kernels.h +++ b/src/cuda/include/pcisph_kernels.h @@ -12,7 +12,6 @@ __global__ void computeDensityError( float3* positions, float* densities, float* density_errors, - float* sum_gradW2, unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, @@ -28,7 +27,6 @@ __global__ void computeDensityError( __global__ void updatePressures( float* pressure_increments, float* density_errors, - float A, int numParticles ); diff --git a/src/cuda/init_cuda.cu b/src/cuda/init_cuda.cu index fa7bc1a959c526a80be7b39aa22b367bec678c1d..307f00e4db4fa2e4d9f5af8a8ab18fa7717877cc 100644 --- a/src/cuda/init_cuda.cu +++ b/src/cuda/init_cuda.cu @@ -178,7 +178,7 @@ void initializeConstants() h_PARTICLE_MASS, h_REST_DENSITY, h_H, - h_H/96.0f, + h_H/2.0f, h_SPIKY_GRAD_CONST ); } diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu index d5c995e5071b86513d6dc15a9b203d74d8ca19bb..e5693df29f8ab1122547a39de2e412e81742c6c1 100644 --- a/src/cuda/pcisph_kernels.cu +++ b/src/cuda/pcisph_kernels.cu @@ -28,7 +28,6 @@ __global__ void computeDensityError( float3* positions, float* densities, float* density_errors, - float* sum_gradW2, unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, @@ -79,13 +78,6 @@ __global__ void computeDensityError( if (r2 < H2 && r2 > 0.0001f) { float W = poly6Kernel(r2); density += PARTICLE_MASS * W; - - if (sum_gradW2 != nullptr) { - float dist = sqrtf(r2); - float gradW_coeff = spikyKernelGrad(dist); - float3 gradW = gradW_coeff * (r / dist); - sumGradW2_local += dot(gradW, gradW); - } } } } @@ -105,13 +97,6 @@ __global__ void computeDensityError( if (r2 < H2 && r2 > 0.0001f) { float W = poly6Kernel(r2); density += PARTICLE_MASS * W; // Boundary treated similarly here - - if (sum_gradW2 != nullptr) { - float dist = sqrtf(r2); - float gradW_coeff = spikyKernelGrad(dist); - float3 gradW = gradW_coeff * (r / dist); - sumGradW2_local += dot(gradW, gradW); - } } } } @@ -121,10 +106,6 @@ __global__ void computeDensityError( densities[idx] = density; density_errors[idx] = density - REST_DENSITY; - - if (sum_gradW2 != nullptr) { - sum_gradW2[idx] = sumGradW2_local; - } } @@ -132,14 +113,13 @@ __global__ void computeDensityError( __global__ void updatePressures( float* pressure_increments, float* density_errors, - float A, int numParticles ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < numParticles) { - float deltaP = -density_errors[idx] / K_PCI; + float deltaP = -density_errors[idx] * K_PCI; pressure_increments[idx] = deltaP; } } @@ -193,7 +173,8 @@ __global__ void computePressureForces( for (unsigned int j = startIdx; j < endIdx; ++j) { if (j == idx) continue; float3 pos_j = positions[j]; - float deltaP_j = pressure_increments[j]; + //float deltaP_j = pressure_increments[j]; + float deltaP_j = deltaP_i; float3 r = pos_i - pos_j; float distSq = dot(r, r); @@ -212,27 +193,27 @@ __global__ void computePressureForces( // ------------ - // unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; - // unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; - - // if (b_startIdx != 0xFFFFFFFF) { - // for (unsigned int j = b_startIdx; j < b_endIdx; ++j) { - // float3 pos_j = boundaryPositions[j]; - // // Mirror pressure: p_b = p_i, and use REST_DENSITY for boundary - // float deltaP_j = deltaP_i; // Pressure mirroring - - // float3 r = pos_i - pos_j; - // float distSq = dot(r, r); - // if (distSq < H2 && distSq > 0.0001f) { - // float dist = sqrtf(distSq); - // float gradW = spikyKernelGrad(dist); - // float3 gradWij = gradW * (r / dist); - // // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0 - // float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); - // fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS; - // } - // } - // } + unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; + + if (b_startIdx != 0xFFFFFFFF) { + for (unsigned int j = b_startIdx; j < b_endIdx; ++j) { + float3 pos_j = boundaryPositions[j]; + // Mirror pressure: p_b = p_i, and use REST_DENSITY for boundary + float deltaP_j = deltaP_i; // Pressure mirroring + + float3 r = pos_i - pos_j; + float distSq = dot(r, r); + if (distSq < H2 && distSq > 0.0001f) { + float dist = sqrtf(distSq); + float gradW = spikyKernelGrad(dist); + float3 gradWij = gradW * (r / dist); + // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0 + float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); + fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS; + } + } + } } } diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu index 4b296bea67dfe70e04d7a16b1c3f4992a0372f10..eea44b15370931defeb6a87a411f63afae1eba82 100644 --- a/src/cuda/pcisph_update.cu +++ b/src/cuda/pcisph_update.cu @@ -68,45 +68,31 @@ void updateParticles(float deltaTime) // Initialize pressure increments to zero cudaMemset(d_pressure_increments, 0, NUM_PARTICLES * sizeof(float)); - // Compute densities and density errors, and per-particle sumGradW2 - computeDensityError<<<blocksPerGrid, threadsPerBlock>>>( - d_positions_pred, - d_densities, - d_density_errors, - d_sum_gradW2, - d_cellIndices, - d_cellStart, - d_cellEnd, - d_boundary_positions, - d_boundaryCellStart, - d_boundaryCellEnd, - NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ); - cudaDeviceSynchronize(); - - // Reduce sum_gradW2 over all particles - thrust::device_ptr<float> dev_sumGradW2_ptr(d_sum_gradW2); - float totalSumGradW2 = thrust::reduce(dev_sumGradW2_ptr, dev_sumGradW2_ptr + NUM_PARTICLES, 0.0f, thrust::plus<float>()); - float r0 = 0.5f * h_H; // Penetration threshold float epsilon = 1.0f; // Moderate to high friction float delta = 0.5f; // Low to moderate elasticity float normalDamping = 0.5f; // Adjust as needed - - // Compute A - float m = h_PARTICLE_MASS; - float rho0 = h_REST_DENSITY; - float A = 2.0f * timeStep * timeStep * m * m / (rho0 * rho0) * totalSumGradW2; - - //printf("K_PCI: %f \n", A); - // ------------ Iterative Pressure Correction Loop -------------------- const int maxIterations = 3; for (int iter = 0; iter < maxIterations; ++iter) { // Launch the boundary correction kernel + computeDensityError<<<blocksPerGrid, threadsPerBlock>>>( + d_positions_pred, + d_densities, + d_density_errors, + d_cellIndices, + d_cellStart, + d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ); + cudaDeviceSynchronize(); + correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( d_positions_pred, d_velocities_pred, @@ -128,7 +114,6 @@ void updateParticles(float deltaTime) updatePressures<<<blocksPerGrid, threadsPerBlock>>>( d_pressure_increments, d_density_errors, - A, NUM_PARTICLES); cudaDeviceSynchronize(); @@ -154,20 +139,6 @@ void updateParticles(float deltaTime) NUM_PARTICLES); cudaDeviceSynchronize(); - computeDensityError<<<blocksPerGrid, threadsPerBlock>>>( - d_positions_pred, - d_densities, - d_density_errors, - nullptr, - d_cellIndices, - d_cellStart, - d_cellEnd, - d_boundary_positions, - d_boundaryCellStart, - d_boundaryCellEnd, - NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ); - cudaDeviceSynchronize(); }