diff --git a/src/FluidSimulation.cpp b/src/FluidSimulation.cpp index cf9c0d59a20195f423296b8cfc0156504b173e8c..3e1fc47ea53abffe9dca96155a7e66189c0e7b5f 100644 --- a/src/FluidSimulation.cpp +++ b/src/FluidSimulation.cpp @@ -1,6 +1,7 @@ #include "include/FluidSimulation.h" #include "include/spawn_particles.h" // For createBoundaryParticles, initParticles //#include "include/PCISPH_sim.h" // For CUDA simulation functions +#include "settings/constants.h" #include "cuda/include/PCISPH_CUDA_interface.h" #include <iostream> @@ -22,17 +23,16 @@ namespace FluidSimulation { // Upload constants to the GPU FluidSimulationGPU::initializeConstants(); - // Initialize particles + // Initialize fluid particles std::vector<float3> h_positions(m_numParticles); std::vector<float3> h_velocities(m_numParticles); - float gridSpacing = 0.25f; - + float gridSpacing = 0.5 * h_H; initParticles(h_positions, h_velocities, m_numParticles, gridSpacing); // Initialize particle device arrays FluidSimulationGPU::initializeFluidDeviceArrays(h_positions.data(), h_velocities.data(), m_numParticles); - // Create particle VBO + // Create particle VBO and register with CUDA glGenBuffers(1, &m_vbo_particles); glBindBuffer(GL_ARRAY_BUFFER, m_vbo_particles); glBufferData(GL_ARRAY_BUFFER, h_positions.size() * sizeof(float3), h_positions.data(), GL_DYNAMIC_DRAW); @@ -41,25 +41,29 @@ namespace FluidSimulation { // Initialize boundary std::vector<float3> boundary_positions; - std::vector<float3> boundary_velocities; - int boundaryLayers = 3; - float spacing = 0.25f; + std::vector<float3> boundary_normals; + int boundaryLayers = 1; + float spacing = 0.5 * h_H; // Using 0.01 as per your code - createBoundaryParticles(boundary_positions, boundary_velocities, boundaryLayers, spacing); + createBoundaryParticles(boundary_positions, boundary_normals, boundaryLayers, spacing); m_numBoundaryParticles = boundary_positions.size(); - FluidSimulationGPU::initializeBoundryDeviceArrays(boundary_positions.data(), boundary_velocities.data(), m_numBoundaryParticles); + // Initialize boundary device arrays + FluidSimulationGPU::initializeBoundryDeviceArrays(boundary_positions.data(), boundary_normals.data(), m_numBoundaryParticles); - // Create boundary VBO + // Create boundary VBO and register with CUDA glGenBuffers(1, &m_vbo_boundary); glBindBuffer(GL_ARRAY_BUFFER, m_vbo_boundary); glBufferData(GL_ARRAY_BUFFER, boundary_positions.size() * sizeof(float3), boundary_positions.data(), GL_DYNAMIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); FluidSimulationGPU::registerBoundaryVBOWithCUDA(m_vbo_boundary); + + // Now that we have boundary particles set up and registered, run the boundary neighborhood search + FluidSimulationGPU::boundaryNBSearch(m_numBoundaryParticles); } void FluidSimulation::updateSimulation() { - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 2; ++i) { FluidSimulationGPU::updateParticles(0.001); } } diff --git a/src/cuda/include/cuda_device_vars.h b/src/cuda/include/cuda_device_vars.h index e0eada192f94a395ec8b53503a082e6bca262ce7..b4eeb12a45cc11c36756b79c20a1052105aa0972 100644 --- a/src/cuda/include/cuda_device_vars.h +++ b/src/cuda/include/cuda_device_vars.h @@ -57,7 +57,7 @@ extern float* d_sum_gradW2; // For computing A // Boundary particles extern float3* d_boundary_positions; -extern float3* d_boundary_velocities; +extern float3* d_boundary_normals; extern unsigned int* d_boundaryCellStart; extern unsigned int* d_boundaryCellEnd; diff --git a/src/cuda/include/pcisph_kernels.h b/src/cuda/include/pcisph_kernels.h index 18c65f885237b033780e2869ec41d48806c9a210..f629f97d9f25f631220a5a9c3241874b7f52a8aa 100644 --- a/src/cuda/include/pcisph_kernels.h +++ b/src/cuda/include/pcisph_kernels.h @@ -16,6 +16,10 @@ __global__ void computeDensityError( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, + // Boundary data + float3* boundaryPositions, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, int numParticles, int gridSizeX, int gridSizeY, int gridSizeZ ); @@ -36,10 +40,30 @@ __global__ void computePressureForces( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, + // Boundary data + float3* boundaryPositions, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, int numParticles, int gridSizeX, int gridSizeY, int gridSizeZ ); +__global__ void correctBoundaryPenetrations( + float3* positions_pred, // Predicted fluid positions + float3* velocities_pred, // Predicted fluid velocities + const float3* boundary_positions, // Boundary particle positions + const float3* boundary_normals, // Boundary particle normals + unsigned int* cellIndices, // Cell indices for fluid particles + unsigned int* boundaryCellStart, // Start indices for boundary particles per cell + unsigned int* boundaryCellEnd, // End indices for boundary particles per cell + int numParticles, // Number of fluid particles + int gridSizeX, int gridSizeY, int gridSizeZ, // Grid dimensions + float r0, // Penetration threshold (e.g., 0.5 * h) + float normalDamping, // Damping factor for velocity + float epsilon, // Friction coefficient (0 <= epsilon <= 1) + float delta // Elasticity coefficient (0 <= delta <= 1) +); + // Function to compute viscosity and external forces __global__ void computeViscosityAndExternalForces( float3* positions, diff --git a/src/cuda/init_cuda.cu b/src/cuda/init_cuda.cu index 7cb10f4a4d13d2960bede1f7179c7701a12fbb3e..9c8630bbac0f26525ddedae68faf186dbaf5faca 100644 --- a/src/cuda/init_cuda.cu +++ b/src/cuda/init_cuda.cu @@ -62,7 +62,7 @@ float* d_sum_gradW2 = nullptr; // Boundary particles float3* d_boundary_positions = nullptr; -float3* d_boundary_velocities = nullptr; +float3* d_boundary_normals = nullptr; unsigned int* d_boundaryCellStart = nullptr; unsigned int* d_boundaryCellEnd = nullptr; @@ -169,7 +169,7 @@ void initializeFluidDeviceArrays( void initializeBoundryDeviceArrays( const float3* h_boundaryPositions, - const float3* h_boundaryVelocities, + const float3* h_boundaryNormals, // Renamed parameter int numBoundaryParticles ) { @@ -179,11 +179,13 @@ void initializeBoundryDeviceArrays( int gridSizeZ = (int)ceil((h_BOUND_Z_MAX - h_BOUND_Z_MIN) / h_H); int maxCells = gridSizeX * gridSizeY * gridSizeZ; - // cudaMalloc(&d_boundary_positions, numBoundaryParticles * sizeof(float3)); - cudaMalloc(&d_boundary_velocities, numBoundaryParticles * sizeof(float3)); - + // Allocate memory for boundary positions + //cudaMalloc(&d_boundary_positions, numBoundaryParticles * sizeof(float3)); //cudaMemcpy(d_boundary_positions, h_boundaryPositions, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice); - cudaMemcpy(d_boundary_velocities, h_boundaryVelocities, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice); + + // Allocate memory for boundary normals + cudaMalloc(&d_boundary_normals, numBoundaryParticles * sizeof(float3)); + cudaMemcpy(d_boundary_normals, h_boundaryNormals, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice); cudaMalloc(&d_boundaryCellIndices, numBoundaryParticles * sizeof(unsigned int)); cudaMalloc(&d_boundaryParticleIndices, numBoundaryParticles * sizeof(unsigned int)); @@ -207,18 +209,18 @@ void boundaryNBSearch(int numBoundaryParticles) { int blocksPerGrid = (numBoundaryParticles + threadsPerBlock - 1) / threadsPerBlock; float3* d_boundary_positions_sorted; - float3* d_boundary_velocities_sorted; + float3* d_boundary_normals_sorted; cudaMalloc(&d_boundary_positions_sorted, numBoundaryParticles * sizeof(float3)); - cudaMalloc(&d_boundary_velocities_sorted, numBoundaryParticles * sizeof(float3)); + cudaMalloc(&d_boundary_normals_sorted, numBoundaryParticles * sizeof(float3)); neigbourHoodSearch( numBoundaryParticles, d_boundary_positions, - d_boundary_velocities, + d_boundary_normals, d_boundary_positions_sorted, - d_boundary_velocities_sorted, + d_boundary_normals_sorted, d_boundaryCellIndices, d_boundaryParticleIndices, d_boundaryCellEnd, @@ -233,7 +235,7 @@ void boundaryNBSearch(int numBoundaryParticles) { cudaGraphicsUnmapResources(1, &cuda_vbo_boundary_resource, 0); cudaFree(d_boundary_positions_sorted); - cudaFree(d_boundary_velocities_sorted); + cudaFree(d_boundary_normals_sorted); // Move some of above init code for boundary to seeparate function } @@ -261,7 +263,7 @@ void cleanupDeviceArrays() cudaFree(d_accelerations); cudaFree(d_boundary_positions); - cudaFree(d_boundary_velocities); + cudaFree(d_boundary_normals); cudaFree(d_boundaryCellIndices); cudaFree(d_boundaryParticleIndices); diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu index f2efd40677d2f06175404a1d6a7f80950a4733db..a0dac1aaf44261bb06286921004d64a77b8a7eff 100644 --- a/src/cuda/pcisph_kernels.cu +++ b/src/cuda/pcisph_kernels.cu @@ -32,76 +32,102 @@ __global__ void computeDensityError( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, + // Boundary data + float3* boundaryPositions, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, int numParticles, int gridSizeX, int gridSizeY, int gridSizeZ ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - - if (idx < numParticles) { - float3 pos_i = positions[idx]; - float density = 0.0f; - float sumGradW2_local = 0.0f; - - unsigned int cellIndex = cellIndices[idx]; - - // Compute cell coordinates - int cellX = cellIndex % gridSizeX; - int cellY = (cellIndex / gridSizeX) % gridSizeY; - int cellZ = cellIndex / (gridSizeX * gridSizeY); - - // Iterate over neighboring cells - for (int dz = -1; dz <= 1; ++dz) { - int nz = cellZ + dz; - if (nz < 0 || nz >= gridSizeZ) continue; - for (int dy = -1; dy <= 1; ++dy) { - int ny = cellY + dy; - if (ny < 0 || ny >= gridSizeY) continue; - for (int dx = -1; dx <= 1; ++dx) { - int nx = cellX + dx; - if (nx < 0 || nx >= gridSizeX) continue; - - unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; - - unsigned int startIdx = cellStart[neighborCellIndex]; - unsigned int endIdx = cellEnd[neighborCellIndex]; - - if (startIdx != 0xFFFFFFFF) { - for (unsigned int j = startIdx; j < endIdx; ++j) { - float3 pos_j = positions[j]; - - float3 r = pos_i - pos_j; - float r2 = dot(r, r); - - if (r2 < H2 && r2 > 0.0001f) { + if (idx >= numParticles) return; + + float3 pos_i = positions[idx]; + float density = 0.0f; + float sumGradW2_local = 0.0f; + + unsigned int cellIndex = cellIndices[idx]; + + int cellX = cellIndex % gridSizeX; + int cellY = (cellIndex / gridSizeX) % gridSizeY; + int cellZ = cellIndex / (gridSizeX * gridSizeY); + + // ---------------------- + // Fluid neighbors + // ---------------------- + for (int dz = -1; dz <= 1; ++dz) { + int nz = cellZ + dz; + if (nz < 0 || nz >= gridSizeZ) continue; + for (int dy = -1; dy <= 1; ++dy) { + int ny = cellY + dy; + if (ny < 0 || ny >= gridSizeY) continue; + for (int dx = -1; dx <= 1; ++dx) { + int nx = cellX + dx; + if (nx < 0 || nx >= gridSizeX) continue; + + unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; + unsigned int startIdx = cellStart[neighborCellIndex]; + unsigned int endIdx = cellEnd[neighborCellIndex]; + + if (startIdx != 0xFFFFFFFF) { + for (unsigned int j = startIdx; j < endIdx; ++j) { + float3 pos_j = positions[j]; + float3 r = pos_i - pos_j; + float r2 = dot(r, r); + + if (r2 < H2 && r2 > 0.0001f) { + float W = poly6Kernel(r2); + density += PARTICLE_MASS * W; + + if (sum_gradW2 != nullptr) { float dist = sqrtf(r2); - - float W = poly6Kernel(r2); - density += PARTICLE_MASS * W; - - if (sum_gradW2 != nullptr) { - // Compute gradW - float gradW_coeff = spikyKernelGrad(dist); - float3 gradW = gradW_coeff * (r / dist); - - sumGradW2_local += dot(gradW, gradW); - } + float gradW_coeff = spikyKernelGrad(dist); + float3 gradW = gradW_coeff * (r / dist); + sumGradW2_local += dot(gradW, gradW); + } + } + } + } + + // ---------------------- + // Boundary neighbors + // ---------------------- + + unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; + + if (b_startIdx != 0xFFFFFFFF) { + for (unsigned int j = b_startIdx; j < b_endIdx; ++j) { + float3 pos_j = boundaryPositions[j]; + float3 r = pos_i - pos_j; + float r2 = dot(r, r); + if (r2 < H2 && r2 > 0.0001f) { + float W = poly6Kernel(r2); + density += PARTICLE_MASS * W; // Boundary treated similarly here + + if (sum_gradW2 != nullptr) { + float dist = sqrtf(r2); + float gradW_coeff = spikyKernelGrad(dist); + float3 gradW = gradW_coeff * (r / dist); + sumGradW2_local += dot(gradW, gradW); } } } } } } + } - densities[idx] = density; - density_errors[idx] = density - REST_DENSITY; + densities[idx] = density; + density_errors[idx] = density - REST_DENSITY; - if (sum_gradW2 != nullptr) { - sum_gradW2[idx] = sumGradW2_local; - } + if (sum_gradW2 != nullptr) { + sum_gradW2[idx] = sumGradW2_local; } } + // Update pressures based on density errors __global__ void updatePressures( float* pressure_increments, @@ -126,74 +152,202 @@ __global__ void computePressureForces( unsigned int* cellIndices, unsigned int* cellStart, unsigned int* cellEnd, + // Boundary data + float3* boundaryPositions, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, int numParticles, int gridSizeX, int gridSizeY, int gridSizeZ ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= numParticles) return; + + float3 pos_i = positions[idx]; + float deltaP_i = pressure_increments[idx]; + float3 fPressure = make_float3(0.0f, 0.0f, 0.0f); + + unsigned int cellIndex = cellIndices[idx]; + int cellX = cellIndex % gridSizeX; + int cellY = (cellIndex / gridSizeX) % gridSizeY; + int cellZ = cellIndex / (gridSizeX * gridSizeY); + + // ------------ + // Fluid neighbors + // ------------ + for (int dz = -1; dz <= 1; ++dz) { + int nz = cellZ + dz; + if (nz < 0 || nz >= gridSizeZ) continue; + for (int dy = -1; dy <= 1; ++dy) { + int ny = cellY + dy; + if (ny < 0 || ny >= gridSizeY) continue; + for (int dx = -1; dx <= 1; ++dx) { + int nx = cellX + dx; + if (nx < 0 || nx >= gridSizeX) continue; + + unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; + unsigned int startIdx = cellStart[neighborCellIndex]; + unsigned int endIdx = cellEnd[neighborCellIndex]; + + if (startIdx != 0xFFFFFFFF) { + for (unsigned int j = startIdx; j < endIdx; ++j) { + if (j == idx) continue; + float3 pos_j = positions[j]; + float deltaP_j = pressure_increments[j]; + + float3 r = pos_i - pos_j; + float distSq = dot(r, r); + if (distSq < H2 && distSq > 0.0001f) { + float dist = sqrtf(distSq); + float gradW = spikyKernelGrad(dist); + float3 gradWij = gradW * (r / dist); + float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); + fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS; + } + } + } + + // ------------ + // Boundary neighbors + // ------------ + + /* + unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; + + if (b_startIdx != 0xFFFFFFFF) { + for (unsigned int j = b_startIdx; j < b_endIdx; ++j) { + float3 pos_j = boundaryPositions[j]; + // Mirror pressure: p_b = p_i, and use REST_DENSITY for boundary + float deltaP_j = deltaP_i; // Pressure mirroring + + float3 r = pos_i - pos_j; + float distSq = dot(r, r); + if (distSq < H2 && distSq > 0.0001f) { + float dist = sqrtf(distSq); + float gradW = spikyKernelGrad(dist); + float3 gradWij = gradW * (r / dist); + // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0 + float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); + fPressure += factor * gradWij * PARTICLE_MASS * PARTICLE_MASS; + } + } + } + */ + } + } + } - if (idx < numParticles) { - float3 pos_i = positions[idx]; - float deltaP_i = pressure_increments[idx]; - - float3 fPressure = make_float3(0.0f, 0.0f, 0.0f); - - unsigned int cellIndex = cellIndices[idx]; - - // Compute cell coordinates - int cellX = cellIndex % gridSizeX; - int cellY = (cellIndex / gridSizeX) % gridSizeY; - int cellZ = cellIndex / (gridSizeX * gridSizeY); - - // Iterate over neighboring cells - for (int dz = -1; dz <= 1; ++dz) { - int nz = cellZ + dz; - if (nz < 0 || nz >= gridSizeZ) continue; - for (int dy = -1; dy <= 1; ++dy) { - int ny = cellY + dy; - if (ny < 0 || ny >= gridSizeY) continue; - for (int dx = -1; dx <= 1; ++dx) { - int nx = cellX + dx; - if (nx < 0 || nx >= gridSizeX) continue; - - unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; - - unsigned int startIdx = cellStart[neighborCellIndex]; - unsigned int endIdx = cellEnd[neighborCellIndex]; + pressure_forces[idx] = fPressure; +} - if (startIdx != 0xFFFFFFFF) { - for (unsigned int j = startIdx; j < endIdx; ++j) { - if (j == idx) continue; +__global__ void correctBoundaryPenetrations( + float3* positions_pred, + float3* velocities_pred, + const float3* boundary_positions, + const float3* boundary_normals, + unsigned int* cellIndices, + unsigned int* boundaryCellStart, + unsigned int* boundaryCellEnd, + int numParticles, + int gridSizeX, int gridSizeY, int gridSizeZ, + float r0, + float normalDamping, + float epsilon, + float delta) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= numParticles) return; + + float3 pos_i = positions_pred[idx]; + float3 vel_i = velocities_pred[idx]; + + // Retrieve the cell index for this fluid particle + unsigned int cellIndex = cellIndices[idx]; + + int cellX = cellIndex % gridSizeX; + int cellY = (cellIndex / gridSizeX) % gridSizeY; + int cellZ = cellIndex / (gridSizeX * gridSizeY); + + // Accumulators for penetration correction + float w_sum = 0.0f; + float depth_sum = 0.0f; + float3 normal_sum = make_float3(0.0f, 0.0f, 0.0f); + + // Iterate over neighboring cells in a 3x3x3 grid + for (int dz = -1; dz <= 1; ++dz) { + int nz = cellZ + dz; + if (nz < 0 || nz >= gridSizeZ) continue; + for (int dy = -1; dy <= 1; ++dy) { + int ny = cellY + dy; + if (ny < 0 || ny >= gridSizeY) continue; + for (int dx = -1; dx <= 1; ++dx) { + int nx = cellX + dx; + if (nx < 0 || nx >= gridSizeX) continue; + + // Compute the neighbor cell index + unsigned int neighborCellIndex = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; + + // Get the boundary particles in this cell + unsigned int b_startIdx = boundaryCellStart[neighborCellIndex]; + unsigned int b_endIdx = boundaryCellEnd[neighborCellIndex]; + + if (b_startIdx == 0xFFFFFFFF) continue; // No boundary particles in this cell + + // Loop over boundary particles in the neighboring cell + for (unsigned int b = b_startIdx; b < b_endIdx; ++b) { + float3 pos_b = boundary_positions[b]; + float3 n_b = boundary_normals[b]; + float3 r = pos_i - pos_b; + float distSq = dot(r, r); + float dist = sqrtf(distSq); + + if (dist < r0 && dist > 1e-6f) { + // Compute penetration weight + float w = (r0 - dist) / r0; + w_sum += w; + depth_sum += w * (r0 - dist); + + // Accumulate normals weighted by penetration + normal_sum += w * n_b; + } + } + } + } + } - float3 pos_j = positions[j]; - float deltaP_j = pressure_increments[j]; + // If penetration is detected, apply corrections + if (w_sum > 0.0f) { + // Compute the average normal direction + float3 n_c = normal_sum / w_sum; + float n_len = sqrtf(dot(n_c, n_c)); + if (n_len > 1e-6f) { + n_c /= n_len; // Normalize the accumulated normal - float3 r = pos_i - pos_j; + // Compute the total penetration depth + float correction = depth_sum / w_sum; - float distSq = dot(r, r); + // Correct the fluid particle's position + pos_i += n_c * correction; - if (distSq < H2 && distSq > 0.0001f) { - float dist = sqrtf(distSq); + // Decompose velocity into normal and tangential components + float v_n = dot(vel_i, n_c); + float3 v_normal = v_n * n_c; + float3 v_tangential = vel_i - v_normal; - // Compute gradient of kernel - float gradW = spikyKernelGrad(dist); - float3 gradWij = gradW * (r / dist); + // Apply friction and elasticity + // [v_tangential_new] = epsilon * v_tangential + // [v_normal_new] = -delta * v_normal + float3 v_new = epsilon * v_tangential - delta * v_normal; - // Symmetric pressure force - float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); - fPressure += factor * gradWij; - } - } - } - } - } + // Update velocity + vel_i = v_new; } - - // Multiply by constants - fPressure *= PARTICLE_MASS * PARTICLE_MASS; - - pressure_forces[idx] = fPressure; } + + // Write back updated positions and velocities + positions_pred[idx] = pos_i; + velocities_pred[idx] = vel_i; } // Compute viscosity and external forces (excluding pressure forces) diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu index 05c163b43b3544496b5138ca7aebb6c9308c2a93..34a73f90a56157c030c5f45fc9d1be9a454903f7 100644 --- a/src/cuda/pcisph_update.cu +++ b/src/cuda/pcisph_update.cu @@ -77,6 +77,9 @@ void updateParticles(float deltaTime) d_cellIndices, d_cellStart, d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, NUM_PARTICLES, gridSizeX, gridSizeY, gridSizeZ); cudaDeviceSynchronize(); @@ -85,6 +88,12 @@ void updateParticles(float deltaTime) thrust::device_ptr<float> dev_sumGradW2_ptr(d_sum_gradW2); float totalSumGradW2 = thrust::reduce(dev_sumGradW2_ptr, dev_sumGradW2_ptr + NUM_PARTICLES, 0.0f, thrust::plus<float>()); + float r0 = 0.5f * h_H; // Penetration threshold + float epsilon = 1.0f; // Moderate to high friction + float delta = 0.5f; // Low to moderate elasticity + float normalDamping = 0.5f; // Adjust as needed + + // Compute A float m = h_PARTICLE_MASS; float rho0 = h_REST_DENSITY; @@ -95,18 +104,25 @@ void updateParticles(float deltaTime) const int maxIterations = 5; for (int iter = 0; iter < maxIterations; ++iter) { - computeDensityError<<<blocksPerGrid, threadsPerBlock>>>( + // Launch the boundary correction kernel + correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( d_positions_pred, - d_densities, - d_density_errors, - nullptr, + d_velocities_pred, + d_boundary_positions, + d_boundary_normals, d_cellIndices, - d_cellStart, - d_cellEnd, + d_boundaryCellStart, + d_boundaryCellEnd, NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ); + gridSizeX, gridSizeY, gridSizeZ, + r0, + normalDamping, + epsilon, + delta + ); cudaDeviceSynchronize(); + updatePressures<<<blocksPerGrid, threadsPerBlock>>>( d_pressure_increments, d_density_errors, @@ -121,6 +137,9 @@ void updateParticles(float deltaTime) d_cellIndices, d_cellStart, d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, NUM_PARTICLES, gridSizeX, gridSizeY, gridSizeZ); cudaDeviceSynchronize(); @@ -132,14 +151,49 @@ void updateParticles(float deltaTime) timeStep, NUM_PARTICLES); cudaDeviceSynchronize(); + + computeDensityError<<<blocksPerGrid, threadsPerBlock>>>( + d_positions_pred, + d_densities, + d_density_errors, + nullptr, + d_cellIndices, + d_cellStart, + d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ); + cudaDeviceSynchronize(); } + + correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( + d_positions_pred, + d_velocities_pred, + d_boundary_positions, + d_boundary_normals, + d_cellIndices, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ, + r0, + normalDamping, + epsilon, + delta + ); + + cudaDeviceSynchronize(); + // -------------------- Finalization -------------------- cudaMemcpy(d_velocities, d_velocities_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); cudaMemcpy(d_positions, d_positions_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); // Enforce boundary conditions + /* float damping = 0.75f; enforceBoundary<<<blocksPerGrid, threadsPerBlock>>>( d_positions, @@ -147,6 +201,8 @@ void updateParticles(float deltaTime) NUM_PARTICLES, damping); cudaDeviceSynchronize(); + */ + // Unmap the VBO resource cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); diff --git a/src/settings/constants.h b/src/settings/constants.h index 3faa9ca00980c22eb87f93a11ffcdc0e5f42c006..5ea4afd18d43e36e5d9a761a1237c6fcbd6f9722 100644 --- a/src/settings/constants.h +++ b/src/settings/constants.h @@ -10,13 +10,13 @@ const float h_BOUND_Z_MIN = -4.0f; const float h_BOUND_Z_MAX = 4.0f; const float h_GAS_CONSTANT = 0.02f; -const float h_VISCOSITY = 0.2f; +const float h_VISCOSITY = 0.04f; -const float h_PARTICLE_MASS = 0.4f; -const float h_REST_DENSITY = 3000.0f; +const float h_PARTICLE_MASS = 0.04f; +const float h_REST_DENSITY = 800.0f; const float h_PI = 3.14159265358979323846f; -const float h_H = 0.35f; // Smoothing radius +const float h_H = 0.25f; // Smoothing radius const int NUM_PARTICLES = 50000; diff --git a/src/spawn_particles.cpp b/src/spawn_particles.cpp index 255f0ed7a6a597862580bfbb61a3dee2e64684da..59986b0b7994b2420699443313781a82db6dbc58 100644 --- a/src/spawn_particles.cpp +++ b/src/spawn_particles.cpp @@ -27,8 +27,8 @@ void initParticles( for (int k = 0; k < particlesPerAxis && index < NUM_PARTICLES; ++k) { // Center particles around the origin float x = (i - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING; - float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 2.0f; - float z = (k - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING; + float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 4.0f; + float z = (k - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 0.5; // Add random perturbation within [-0.1*dx, 0.1*dx] float perturbation = 0.1f * GRID_SPACING; @@ -49,7 +49,35 @@ void initParticles( } } } + /* + for (int i = 0; i < particlesPerAxis && index < NUM_PARTICLES; ++i) { + for (int j = 0; j < particlesPerAxis && index < NUM_PARTICLES; ++j) { + for (int k = 0; k < particlesPerAxis && index < NUM_PARTICLES; ++k) { + // Center particles around the origin + float x = (i - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 9; + float y = (j - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING + 2.0f; + float z = (k - (particlesPerAxis - 1) / 2.0f) * GRID_SPACING - 0.5; + // Add random perturbation within [-0.1*dx, 0.1*dx] + float perturbation = 0.1f * GRID_SPACING; + float rand_x = ((float)rand() / RAND_MAX - 0.5f) * 2.0f * perturbation; + float rand_y = ((float)rand() / RAND_MAX - 0.5f) * 2.0f * perturbation; + float rand_z = ((float)rand() / RAND_MAX - 0.5f) * 2.0f * perturbation; + + h_positions[index] = make_float3( + x + rand_x, + y + rand_y, + z + rand_z + ); + + // Initialize velocities + h_velocities[index] = make_float3(0.0f, 0.0f, 0.0f); + + ++index; + } + } + } + */ // If NUM_PARTICLES is not a perfect cube, initialize remaining particles for (; index < NUM_PARTICLES; ++index) { h_positions[index] = make_float3(0.0f, 10.0f, 0.0f); // Default position @@ -57,101 +85,112 @@ void initParticles( } } -// Create boundary particles + void createBoundaryParticles( std::vector<float3> &boundary_positions, - std::vector<float3> &boundary_velocities, + std::vector<float3> &boundary_normals, // Renamed from boundary_velocities int boundaryLayers, float spacing) { - // Compute the number of particles along each dimension for a given spacing - int nx = static_cast<int>((h_BOUND_X_MAX - h_BOUND_X_MIN)/spacing) + 1; - int ny = static_cast<int>((h_BOUND_Y_MAX - h_BOUND_Y_MIN)/spacing) + 1; - int nz = static_cast<int>((h_BOUND_Z_MAX - h_BOUND_Z_MIN)/spacing) + 1; - - // Initialize velocities to zero for boundary - float3 zeroVel = make_float3(0.0f, 0.0f, 0.0f); - - // Y-min boundary (e.g., "floor") - for (int layer = 0; layer < boundaryLayers; layer++) { - float y = h_BOUND_Y_MIN - (layer+1)*spacing; // placing layers below Ymin - for (int ix = 0; ix < nx; ix++) { - for (int iz = 0; iz < nz; iz++) { - float x = h_BOUND_X_MIN + ix*spacing; - float z = h_BOUND_Z_MIN + iz*spacing; - boundary_positions.push_back(make_float3(x, y, z)); - boundary_velocities.push_back(zeroVel); + // Define boundary normals based on boundary geometry + // For axis-aligned boxes, normals are straightforward + + // Compute inner box dimensions based on the specified fractions + float box_size_x = (h_BOUND_X_MAX - h_BOUND_X_MIN); + float box_size_y = (h_BOUND_Y_MAX - h_BOUND_Y_MIN); + float box_size_z = (h_BOUND_Z_MAX - h_BOUND_Z_MIN); + + // Compute inner box center positions for X and Z axes + float x_center = (h_BOUND_X_MAX + h_BOUND_X_MIN) / 2.0f; + float z_center = (h_BOUND_Z_MAX + h_BOUND_Z_MIN) / 2.0f; + + // Define inner box minimum and maximum coordinates + float inner_min_x = x_center - box_size_x / 2.0f; + float inner_max_x = x_center + box_size_x / 2.0f; + + float inner_min_y = h_BOUND_Y_MIN; // Aligned with Y-min + float inner_max_y = h_BOUND_Y_MIN + box_size_y; + + float inner_min_z = z_center - box_size_z / 2.0f; + float inner_max_z = z_center + box_size_z / 2.0f; + + // Compute the number of particles along each dimension for the inner box + int nx_inner = static_cast<int>((inner_max_x - inner_min_x) / spacing) + 1; + int ny_inner = static_cast<int>((inner_max_y - inner_min_y) / spacing) + 1; + int nz_inner = static_cast<int>((inner_max_z - inner_min_z) / spacing) + 1; + + // Lambda function to add boundary particles for a specific face + auto addBoundaryFace = [&](int axis, bool positive) { + for (int layer = 0; layer < boundaryLayers; layer++) { + float positionOffset = layer * spacing; + + int loop_i_count, loop_j_count; + // Determine which loops correspond to each axis + if (axis == 0) { + // X-axis walls: vary Y and Z + loop_i_count = ny_inner; + loop_j_count = nz_inner; + } else if (axis == 1) { + // Y-axis walls: vary X and Z + loop_i_count = nx_inner; + loop_j_count = nz_inner; + } else { + // Z-axis walls: vary X and Y + loop_i_count = nx_inner; + loop_j_count = ny_inner; } - } - } - /* - // Y-max boundary (ceiling) - for (int layer = 0; layer < boundaryLayers; layer++) { - float y = h_BOUND_Y_MAX + (layer+1)*spacing; - for (int ix = 0; ix < nx; ix++) { - for (int iz = 0; iz < nz; iz++) { - float x = h_BOUND_X_MIN + ix*spacing; - float z = h_BOUND_Z_MIN + iz*spacing; - boundary_positions.push_back(make_float3(x, y, z)); - boundary_velocities.push_back(zeroVel); + for (int i = 0; i < loop_i_count; i++) { + for (int j = 0; j < loop_j_count; j++) { + float x, y, z, nx, ny, nz; + switch (axis) { + case 0: // X-axis (Left/Right walls) + x = positive ? (inner_max_x + positionOffset) : (inner_min_x - positionOffset); + y = inner_min_y + i * spacing; + z = inner_min_z + j * spacing; + nx = positive ? -1.0f : 1.0f; + ny = 0.0f; + nz = 0.0f; + break; + + case 1: // Y-axis (Floor/Ceiling) + y = positive ? (inner_max_y + positionOffset) : (inner_min_y - positionOffset); + x = inner_min_x + i * spacing; + z = inner_min_z + j * spacing; + nx = 0.0f; + ny = positive ? -1.0f : 1.0f; + nz = 0.0f; + break; + + case 2: // Z-axis (Front/Back walls) + z = positive ? (inner_max_z + positionOffset) : (inner_min_z - positionOffset); + x = inner_min_x + i * spacing; + y = inner_min_y + j * spacing; + nx = 0.0f; + ny = 0.0f; + nz = positive ? -1.0f : 1.0f; + break; + } + + boundary_positions.emplace_back(make_float3(x, y, z)); + boundary_normals.emplace_back(make_float3(nx, ny, nz)); // Assign normals + } } } - } - */ + }; - // X-min boundary (vertical wall on the left) - int ny_cells = ny; // from earlier computation - for (int layer = 0; layer < boundaryLayers; layer++) { - float x = h_BOUND_X_MIN - (layer+1)*spacing; - for (int iy = 0; iy < ny_cells; iy++) { - for (int iz = 0; iz < nz; iz++) { - float y = h_BOUND_Y_MIN + iy*spacing; - float z = h_BOUND_Z_MIN + iz*spacing; - boundary_positions.push_back(make_float3(x, y, z)); - boundary_velocities.push_back(zeroVel); - } - } - } + // Add boundary particles for all three axes + // Axis 0: X-axis (Left and Right Walls) + addBoundaryFace(0, false); // X-min (Left Wall) + addBoundaryFace(0, true); // X-max (Right Wall) - // X-max boundary (vertical wall on the right) - for (int layer = 0; layer < boundaryLayers; layer++) { - float x = h_BOUND_X_MAX + (layer+1)*spacing; - for (int iy = 0; iy < ny_cells; iy++) { - for (int iz = 0; iz < nz; iz++) { - float y = h_BOUND_Y_MIN + iy*spacing; - float z = h_BOUND_Z_MIN + iz*spacing; - boundary_positions.push_back(make_float3(x, y, z)); - boundary_velocities.push_back(zeroVel); - } - } - } + // Axis 1: Y-axis (Floor and Ceiling) + addBoundaryFace(1, false); // Y-min (Floor) + addBoundaryFace(1, true); // Y-max (Ceiling) - // Z-min boundary (front wall) - for (int layer = 0; layer < boundaryLayers; layer++) { - float z = h_BOUND_Z_MIN - (layer+1)*spacing; - for (int ix = 0; ix < nx; ix++) { - for (int iy = 0; iy < ny_cells; iy++) { - float x = h_BOUND_X_MIN + ix*spacing; - float y = h_BOUND_Y_MIN + iy*spacing; - boundary_positions.push_back(make_float3(x, y, z)); - boundary_velocities.push_back(zeroVel); - } - } - } - - // Z-max boundary (back wall) - for (int layer = 0; layer < boundaryLayers; layer++) { - float z = h_BOUND_Z_MAX + (layer+1)*spacing; - for (int ix = 0; ix < nx; ix++) { - for (int iy = 0; iy < ny_cells; iy++) { - float x = h_BOUND_X_MIN + ix*spacing; - float y = h_BOUND_Y_MIN + iy*spacing; - boundary_positions.push_back(make_float3(x, y, z)); - boundary_velocities.push_back(zeroVel); - } - } - } + // Axis 2: Z-axis (Front and Back Walls) + addBoundaryFace(2, false); // Z-min (Front Wall) + addBoundaryFace(2, true); // Z-max (Back Wall) } } \ No newline at end of file