diff --git a/src/FluidSimulation.cpp b/src/FluidSimulation.cpp index 09752c8cd1a5ef031a92f3ae7c35285450115cc4..df4cc16814210cfd875a510b20fca2f77c6b096b 100644 --- a/src/FluidSimulation.cpp +++ b/src/FluidSimulation.cpp @@ -67,7 +67,7 @@ namespace FluidSimulation { // Initialize fluid particles std::vector<float3> h_positions(m_numParticles); std::vector<float3> h_velocities(m_numParticles); - float gridSpacing = 0.5 * m_smoothingRadius; + float gridSpacing = 2.f * h_particleRadius; initParticles(h_positions, h_velocities, m_numParticles, gridSpacing, m_mode); // Initialize particle device arrays @@ -84,7 +84,7 @@ namespace FluidSimulation { std::vector<float3> boundary_positions; std::vector<float3> boundary_normals; int boundaryLayers = 1; - float spacing = 0.5 * m_smoothingRadius; // Using 0.01 as per your code + float spacing = 2.f * h_particleRadius; // Using 0.01 as per your code createBoundaryParticles(boundary_positions, boundary_normals, boundaryLayers, spacing); m_numBoundaryParticles = boundary_positions.size(); @@ -101,6 +101,8 @@ namespace FluidSimulation { // Now that we have boundary particles set up and registered, run the boundary neighborhood search FluidSimulationGPU::boundaryNBSearch(m_numBoundaryParticles); + + FluidSimulationGPU::calculatePCISPHSSclaingFactor(); } void FluidSimulation::updateSimulation() { diff --git a/src/FluidSimulationApp.cpp b/src/FluidSimulationApp.cpp index db4d400f8ba78b67cb6c53b5f389faeba8a5171d..45773eb3fbe30350107bf3bcc45003cf81805f44 100644 --- a/src/FluidSimulationApp.cpp +++ b/src/FluidSimulationApp.cpp @@ -245,28 +245,23 @@ namespace FluidSimulation { break; case RenderStage::ThicknessBuffer: - printf("Render Thickness \n"); - printf("Thickness stage active \n"); m_fluidRenderer->renderThicknessFrame(particleCount, boundaryCount); m_fluidRenderer->visualizeBuffer(RenderStage::ThicknessBuffer); break; case RenderStage::FilteredThickness: - printf("Render Filtered Thickness \n"); m_fluidRenderer->renderThicknessFrame(particleCount, boundaryCount); m_fluidRenderer->renderThicknessFilterFrame(); m_fluidRenderer->visualizeBuffer(RenderStage::FilteredThickness); break; case RenderStage::LightAttenuation: - printf("Render Filtered Thickness \n"); m_fluidRenderer->renderThicknessFrame(particleCount, boundaryCount); m_fluidRenderer->renderThicknessFilterFrame(); m_fluidRenderer->renderThicknessAttenFrame(); break; case RenderStage::ScreenSpacedRender: - printf("Render Screen Spaced \n"); m_fluidRenderer->renderDepthFrame(particleCount, boundaryCount); m_fluidRenderer->renderDepthFilterFrame(); m_fluidRenderer->renderDepthNormalsFrame(); diff --git a/src/cuda/include/cuda_device_vars.h b/src/cuda/include/cuda_device_vars.h index 0d73f817fa9ed37534e495605330b9142675c07c..1742015a04bf5d13eedb40ae62bb8d9ca264b191 100644 --- a/src/cuda/include/cuda_device_vars.h +++ b/src/cuda/include/cuda_device_vars.h @@ -15,6 +15,8 @@ extern __constant__ float PI; extern __constant__ float H; extern __constant__ float H2; extern __constant__ float POLY6_CONST; +extern __constant__ float POLY6_GRAD_CONST; + extern __constant__ float SPIKY_GRAD_CONST; extern __constant__ float VISC_LAP_CONST; @@ -54,6 +56,7 @@ extern float3* d_force_other; extern float3* d_velocities_pred; extern float3* d_positions_pred; extern float* d_sum_gradW2; // For computing A +extern __constant__ float d_PCISPH_density_scaling_factor; // Boundary particles extern float3* d_boundary_positions; diff --git a/src/cuda/include/float3_helper_math.h b/src/cuda/include/float3_helper_math.h index 521a6950730af7a8ea683713e485e572aead864a..c64b489b510f798c0df02de26001b07c14467bde 100644 --- a/src/cuda/include/float3_helper_math.h +++ b/src/cuda/include/float3_helper_math.h @@ -229,5 +229,23 @@ inline __host__ __device__ float3 cross(float3 a, float3 b) return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); } +inline __host__ __device__ float3 clamp(const float3 v, const float minVal, const float maxVal) +{ + return make_float3( + fminf(fmaxf(v.x, minVal), maxVal), + fminf(fmaxf(v.y, minVal), maxVal), + fminf(fmaxf(v.z, minVal), maxVal) + ); +} + +inline __host__ __device__ float3 clamp_length(const float3 v, const float maxLength) +{ + float len = length(v); + if (len > maxLength) + { + return v * (maxLength / len); + } + return v; +} #endif \ No newline at end of file diff --git a/src/cuda/include/helper_macros.h b/src/cuda/include/helper_macros.h new file mode 100644 index 0000000000000000000000000000000000000000..9c18a79914af30754f46615d5e517a80f8a41740 --- /dev/null +++ b/src/cuda/include/helper_macros.h @@ -0,0 +1,34 @@ +// helper_macros.h + +#ifndef HELPER_MACROS_H +#define HELPER_MACROS_H + +#define FOREACH_NEIGHBOR_CELL_BEGIN(cellIndex, gridSizeX, gridSizeY, gridSizeZ, neighborCellIdx) \ + do { \ + /* Compute cell coordinates */ \ + int cellX = cellIndex % gridSizeX; \ + int cellY = (cellIndex / gridSizeX) % gridSizeY; \ + int cellZ = cellIndex / (gridSizeX * gridSizeY); \ + \ + /* Iterate over neighboring cells */ \ + for (int dz = -1; dz <= 1; ++dz) { \ + int nz = cellZ + dz; \ + if (nz < 0 || nz >= gridSizeZ) continue; \ + for (int dy = -1; dy <= 1; ++dy) { \ + int ny = cellY + dy; \ + if (ny < 0 || ny >= gridSizeY) continue; \ + for (int dx = -1; dx <= 1; ++dx) { \ + int nx = cellX + dx; \ + if (nx < 0 || nx >= gridSizeX) continue; \ + \ + /* Compute neighbor cell index */ \ + neighborCellIdx = nx + ny * gridSizeX + nz * gridSizeX * gridSizeY; \ + /* User-defined code starts here */ + +#define FOREACH_NEIGHBOR_CELL_END() \ + } \ + } \ + } \ + } while(0) + +#endif // HELPER_MACROS_H diff --git a/src/cuda/include/init_cuda.h b/src/cuda/include/init_cuda.h index a55477664ee353afc6a5198e76a61149978297f0..954f2e82a05ddaad1083f83f3d67a5ffd7b91b2e 100644 --- a/src/cuda/include/init_cuda.h +++ b/src/cuda/include/init_cuda.h @@ -37,6 +37,8 @@ void updateConstantsOnDevice( float smoothingRadius ); +void calculatePCISPHSSclaingFactor(); + } // namespace FluidSimulationGPU #endif // INIT_CUDA_H diff --git a/src/cuda/include/smoothing_kernels.h b/src/cuda/include/smoothing_kernels.h index ed48092705daed120ae6da26414c521deea73fce..76e22fc9729a1c9cbc3dd1343e573ce5dd224f89 100644 --- a/src/cuda/include/smoothing_kernels.h +++ b/src/cuda/include/smoothing_kernels.h @@ -6,7 +6,11 @@ namespace FluidSimulationGPU { __device__ float poly6Kernel(float r2); +__device__ float poly6GradKernel(float r2); + +__device__ float spikyKernel(float r); __device__ float spikyKernelGrad(float r); + __device__ float viscosityKernelLap(float r); __device__ float cubicSplineKernel(float r); diff --git a/src/cuda/init_cuda.cu b/src/cuda/init_cuda.cu index bc682835476d3ab1044bc9f78cc2fe77b8cc3b17..c471ff46dd75619e950406d502aea18fa22b61f4 100644 --- a/src/cuda/init_cuda.cu +++ b/src/cuda/init_cuda.cu @@ -4,220 +4,225 @@ #include "../settings/constants.h" #include "include/pcisph_kernels.h" +#include "include/smoothing_kernels.h" #include <cuda_runtime.h> #include <cuda_gl_interop.h> #include <device_launch_parameters.h> #include <cstdio> -namespace FluidSimulationGPU { - -// Global Variables -cudaGraphicsResource_t cuda_vbo_resource; -cudaGraphicsResource_t cuda_vbo_boundary_resource; - -// Constants -__constant__ float PI; -__constant__ float H; -__constant__ float H2; -__constant__ float POLY6_CONST; -__constant__ float SPIKY_GRAD_CONST; -__constant__ float VISC_LAP_CONST; - -__constant__ float PARTICLE_MASS; -__constant__ float REST_DENSITY; -__constant__ float VISCOSITY; +namespace FluidSimulationGPU +{ -// Boundary constants -__constant__ float BOUND_X_MIN; -__constant__ float BOUND_X_MAX; -__constant__ float BOUND_Y_MIN; -__constant__ float BOUND_Y_MAX; -__constant__ float BOUND_Z_MIN; -__constant__ float BOUND_Z_MAX; + // Global Variables + cudaGraphicsResource_t cuda_vbo_resource; + cudaGraphicsResource_t cuda_vbo_boundary_resource; + + // Constants + __constant__ float PI; + __constant__ float H; + __constant__ float H2; + __constant__ float POLY6_CONST; + __constant__ float POLY6_GRAD_CONST; + __constant__ float SPIKY_GRAD_CONST; + __constant__ float VISC_LAP_CONST; + + __constant__ float PARTICLE_MASS; + __constant__ float REST_DENSITY; + __constant__ float VISCOSITY; + + // Boundary constants + __constant__ float BOUND_X_MIN; + __constant__ float BOUND_X_MAX; + __constant__ float BOUND_Y_MIN; + __constant__ float BOUND_Y_MAX; + __constant__ float BOUND_Z_MIN; + __constant__ float BOUND_Z_MAX; + + // Device Arrays + float3 *d_positions = nullptr; + float3 *d_velocities = nullptr; + + unsigned int *d_cellIndices = nullptr; + unsigned int *d_particleIndices = nullptr; + + float3 *d_positions_sorted = nullptr; + float3 *d_velocities_sorted = nullptr; + + unsigned int *d_cellStart = nullptr; + unsigned int *d_cellEnd = nullptr; + + // Arrays for densities, pressures, accelerations + float *d_densities = nullptr; + float *d_density_errors = nullptr; + float *d_particle_pressure = nullptr; + float3 *d_pressure_force = nullptr; + float3 *d_force_other = nullptr; + + // Additional arrays for PCISPH + float3 *d_velocities_pred = nullptr; + float3 *d_positions_pred = nullptr; + float *d_sum_gradW2 = nullptr; + + float h_PCISPH_density_scaling_factor = 0.0f; + __constant__ float d_PCISPH_density_scaling_factor; + + // Boundary particles + float3 *d_boundary_positions = nullptr; + float3 *d_boundary_normals = nullptr; + + unsigned int *d_boundaryCellStart = nullptr; + unsigned int *d_boundaryCellEnd = nullptr; + + unsigned int *d_boundaryCellIndices = nullptr; + unsigned int *d_boundaryParticleIndices = nullptr; + + void initializeConstants() + { + // Host constants + const float h_H2 = h_H * h_H; + const float h_POLY6_CONST = 315.0f / (64.0f * M_PI * powf(h_H, 9)); + const float h_POLY6_GRAD_CONST = -945.0f / (32.0f * M_PI * pow(h_H, 9.0f)); // Check that this andn othere constants are correct!!! + const float h_SPIKY_GRAD_CONST = -45.0f / (M_PI * powf(h_H, 6)); + const float h_VISC_LAP_CONST = 45.0f / (M_PI * powf(h_H, 6)); + + // Copy constants to device + cudaMemcpyToSymbol(PI, &h_PI, sizeof(float)); + cudaMemcpyToSymbol(H, &h_H, sizeof(float)); + cudaMemcpyToSymbol(H2, &h_H2, sizeof(float)); + cudaMemcpyToSymbol(POLY6_CONST, &h_POLY6_CONST, sizeof(float)); + cudaMemcpyToSymbol(POLY6_GRAD_CONST, &h_POLY6_GRAD_CONST, sizeof(float)); + cudaMemcpyToSymbol(SPIKY_GRAD_CONST, &h_SPIKY_GRAD_CONST, sizeof(float)); + cudaMemcpyToSymbol(VISC_LAP_CONST, &h_VISC_LAP_CONST, sizeof(float)); + + cudaMemcpyToSymbol(PARTICLE_MASS, &h_PARTICLE_MASS, sizeof(float)); + cudaMemcpyToSymbol(REST_DENSITY, &h_REST_DENSITY, sizeof(float)); + cudaMemcpyToSymbol(VISCOSITY, &h_VISCOSITY, sizeof(float)); + + // Copy boundary constants to device + cudaMemcpyToSymbol(BOUND_X_MIN, &h_BOUND_X_MIN, sizeof(float)); + cudaMemcpyToSymbol(BOUND_X_MAX, &h_BOUND_X_MAX, sizeof(float)); + cudaMemcpyToSymbol(BOUND_Y_MIN, &h_BOUND_Y_MIN, sizeof(float)); + cudaMemcpyToSymbol(BOUND_Y_MAX, &h_BOUND_Y_MAX, sizeof(float)); + cudaMemcpyToSymbol(BOUND_Z_MIN, &h_BOUND_Z_MIN, sizeof(float)); + cudaMemcpyToSymbol(BOUND_Z_MAX, &h_BOUND_Z_MAX, sizeof(float)); + } -// Device Arrays -float3* d_positions = nullptr; -float3* d_velocities = nullptr; + // Register VBO with CUDA + void registerVBOWithCUDA(unsigned int vbo) + { + cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsNone); + } -unsigned int* d_cellIndices = nullptr; -unsigned int* d_particleIndices = nullptr; + void registerBoundaryVBOWithCUDA(unsigned int vbo) + { + cudaGraphicsGLRegisterBuffer(&cuda_vbo_boundary_resource, vbo, cudaGraphicsMapFlagsNone); + } -float3* d_positions_sorted = nullptr; -float3* d_velocities_sorted = nullptr; + // Unregister VBO from CUDA + void unregisterVBOFromCUDA() + { + cudaGraphicsUnregisterResource(cuda_vbo_resource); + } -unsigned int* d_cellStart = nullptr; -unsigned int* d_cellEnd = nullptr; + void unregisterBoundaryVBOFromCUDA() + { + cudaGraphicsUnregisterResource(cuda_vbo_boundary_resource); + } -// Arrays for densities, pressures, accelerations -float* d_densities = nullptr; -float* d_density_errors = nullptr; -float* d_particle_pressure = nullptr; -float3* d_pressure_force = nullptr; -float3* d_force_other = nullptr; + // Initialize device arrays + void initializeFluidDeviceArrays( + const float3 *h_positions, + const float3 *h_velocities, + int numParticles) + { + // Allocate device memory for velocities + cudaMalloc(&d_velocities, numParticles * sizeof(float3)); + cudaMemcpy(d_velocities, h_velocities, numParticles * sizeof(float3), cudaMemcpyHostToDevice); + + // Allocate device memory for predicted velocities and positions + cudaMalloc(&d_velocities_pred, numParticles * sizeof(float3)); + cudaMalloc(&d_positions_pred, numParticles * sizeof(float3)); + + // Allocate device memory for cell indices and particle indices + cudaMalloc(&d_cellIndices, numParticles * sizeof(unsigned int)); + cudaMalloc(&d_particleIndices, numParticles * sizeof(unsigned int)); + + // Allocate device memory for sorted positions and velocities + cudaMalloc(&d_positions_sorted, numParticles * sizeof(float3)); + cudaMalloc(&d_velocities_sorted, numParticles * sizeof(float3)); + + // Allocate cell start and end arrays + int gridSizeX = (int)ceil((h_BOUND_X_MAX - h_BOUND_X_MIN) / h_H); + int gridSizeY = (int)ceil((h_BOUND_Y_MAX - h_BOUND_Y_MIN) / h_H); + int gridSizeZ = (int)ceil((h_BOUND_Z_MAX - h_BOUND_Z_MIN) / h_H); + int maxCells = gridSizeX * gridSizeY * gridSizeZ; + + cudaMalloc(&d_cellStart, maxCells * sizeof(unsigned int)); + cudaMalloc(&d_cellEnd, maxCells * sizeof(unsigned int)); + + // Allocate device memory for densities, pressure increments, and accelerations + cudaMalloc(&d_densities, numParticles * sizeof(float)); + cudaMalloc(&d_density_errors, numParticles * sizeof(float)); + cudaMalloc(&d_particle_pressure, numParticles * sizeof(float)); + cudaMalloc(&d_pressure_force, numParticles * sizeof(float3)); + cudaMalloc(&d_force_other, numParticles * sizeof(float3)); + + // Allocate device memory for sum_gradW2 + cudaMalloc(&d_sum_gradW2, numParticles * sizeof(float)); + + // Initialize positions_pred to initial positions + cudaMemcpy(d_positions_pred, h_positions, numParticles * sizeof(float3), cudaMemcpyHostToDevice); + } -// Additional arrays for PCISPH -float3* d_velocities_pred = nullptr; -float3* d_positions_pred = nullptr; -float* d_sum_gradW2 = nullptr; + void initializeBoundryDeviceArrays( + const float3 *h_boundaryPositions, + const float3 *h_boundaryNormals, // Renamed parameter + int numBoundaryParticles) + { + // Allocate cell start and end arrays + int gridSizeX = (int)ceil((h_BOUND_X_MAX - h_BOUND_X_MIN) / h_H); + int gridSizeY = (int)ceil((h_BOUND_Y_MAX - h_BOUND_Y_MIN) / h_H); + int gridSizeZ = (int)ceil((h_BOUND_Z_MAX - h_BOUND_Z_MIN) / h_H); + int maxCells = gridSizeX * gridSizeY * gridSizeZ; + + // Allocate memory for boundary positions + // cudaMalloc(&d_boundary_positions, numBoundaryParticles * sizeof(float3)); + // cudaMemcpy(d_boundary_positions, h_boundaryPositions, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice); + + // Allocate memory for boundary normals + cudaMalloc(&d_boundary_normals, numBoundaryParticles * sizeof(float3)); + cudaMemcpy(d_boundary_normals, h_boundaryNormals, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice); + + cudaMalloc(&d_boundaryCellIndices, numBoundaryParticles * sizeof(unsigned int)); + cudaMalloc(&d_boundaryParticleIndices, numBoundaryParticles * sizeof(unsigned int)); + + cudaMalloc(&d_boundaryCellStart, maxCells * sizeof(unsigned int)); + cudaMalloc(&d_boundaryCellEnd, maxCells * sizeof(unsigned int)); + } -// Boundary particles -float3* d_boundary_positions = nullptr; -float3* d_boundary_normals = nullptr; + // NeighbourHoodSearch for boundary particles 1 time at start since they dont move + void boundaryNBSearch(int numBoundaryParticles) + { -unsigned int* d_boundaryCellStart = nullptr; -unsigned int* d_boundaryCellEnd = nullptr; + // Map the VBO resource to get a device pointer + size_t num_bytes; + cudaGraphicsMapResources(1, &cuda_vbo_boundary_resource, 0); + cudaGraphicsResourceGetMappedPointer((void **)&d_boundary_positions, &num_bytes, cuda_vbo_boundary_resource); -unsigned int* d_boundaryCellIndices = nullptr; -unsigned int* d_boundaryParticleIndices = nullptr; + int gsX, gsY, gsZ; -void initializeConstants() -{ - // Host constants - const float h_H2 = h_H * h_H; - const float h_POLY6_CONST = 315.0f / (64.0f * h_PI * powf(h_H, 9)); - const float h_SPIKY_GRAD_CONST = -45.0f / (h_PI * powf(h_H, 6)); - const float h_VISC_LAP_CONST = 45.0f / (h_PI * powf(h_H, 6)); - - // Copy constants to device - cudaMemcpyToSymbol(PI, &h_PI, sizeof(float)); - cudaMemcpyToSymbol(H, &h_H, sizeof(float)); - cudaMemcpyToSymbol(H2, &h_H2, sizeof(float)); - cudaMemcpyToSymbol(POLY6_CONST, &h_POLY6_CONST, sizeof(float)); - cudaMemcpyToSymbol(SPIKY_GRAD_CONST, &h_SPIKY_GRAD_CONST, sizeof(float)); - cudaMemcpyToSymbol(VISC_LAP_CONST, &h_VISC_LAP_CONST, sizeof(float)); - - cudaMemcpyToSymbol(PARTICLE_MASS, &h_PARTICLE_MASS, sizeof(float)); - cudaMemcpyToSymbol(REST_DENSITY, &h_REST_DENSITY, sizeof(float)); - cudaMemcpyToSymbol(VISCOSITY, &h_VISCOSITY, sizeof(float)); - - // Copy boundary constants to device - cudaMemcpyToSymbol(BOUND_X_MIN, &h_BOUND_X_MIN, sizeof(float)); - cudaMemcpyToSymbol(BOUND_X_MAX, &h_BOUND_X_MAX, sizeof(float)); - cudaMemcpyToSymbol(BOUND_Y_MIN, &h_BOUND_Y_MIN, sizeof(float)); - cudaMemcpyToSymbol(BOUND_Y_MAX, &h_BOUND_Y_MAX, sizeof(float)); - cudaMemcpyToSymbol(BOUND_Z_MIN, &h_BOUND_Z_MIN, sizeof(float)); - cudaMemcpyToSymbol(BOUND_Z_MAX, &h_BOUND_Z_MAX, sizeof(float)); -} - -// Register VBO with CUDA -void registerVBOWithCUDA(unsigned int vbo) -{ - cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsNone); -} + int threadsPerBlock = 256; + int blocksPerGrid = (numBoundaryParticles + threadsPerBlock - 1) / threadsPerBlock; -void registerBoundaryVBOWithCUDA(unsigned int vbo) -{ - cudaGraphicsGLRegisterBuffer(&cuda_vbo_boundary_resource, vbo, cudaGraphicsMapFlagsNone); -} + float3 *d_boundary_positions_sorted; + float3 *d_boundary_normals_sorted; -// Unregister VBO from CUDA -void unregisterVBOFromCUDA() -{ - cudaGraphicsUnregisterResource(cuda_vbo_resource); -} + cudaMalloc(&d_boundary_positions_sorted, numBoundaryParticles * sizeof(float3)); + cudaMalloc(&d_boundary_normals_sorted, numBoundaryParticles * sizeof(float3)); -void unregisterBoundaryVBOFromCUDA() -{ - cudaGraphicsUnregisterResource(cuda_vbo_boundary_resource); -} - -// Initialize device arrays -void initializeFluidDeviceArrays( - const float3* h_positions, - const float3* h_velocities, - int numParticles -) -{ - // Allocate device memory for velocities - cudaMalloc(&d_velocities, numParticles * sizeof(float3)); - cudaMemcpy(d_velocities, h_velocities, numParticles * sizeof(float3), cudaMemcpyHostToDevice); - - // Allocate device memory for predicted velocities and positions - cudaMalloc(&d_velocities_pred, numParticles * sizeof(float3)); - cudaMalloc(&d_positions_pred, numParticles * sizeof(float3)); - - // Allocate device memory for cell indices and particle indices - cudaMalloc(&d_cellIndices, numParticles * sizeof(unsigned int)); - cudaMalloc(&d_particleIndices, numParticles * sizeof(unsigned int)); - - // Allocate device memory for sorted positions and velocities - cudaMalloc(&d_positions_sorted, numParticles * sizeof(float3)); - cudaMalloc(&d_velocities_sorted, numParticles * sizeof(float3)); - - // Allocate cell start and end arrays - int gridSizeX = (int)ceil((h_BOUND_X_MAX - h_BOUND_X_MIN) / h_H); - int gridSizeY = (int)ceil((h_BOUND_Y_MAX - h_BOUND_Y_MIN) / h_H); - int gridSizeZ = (int)ceil((h_BOUND_Z_MAX - h_BOUND_Z_MIN) / h_H); - int maxCells = gridSizeX * gridSizeY * gridSizeZ; - - cudaMalloc(&d_cellStart, maxCells * sizeof(unsigned int)); - cudaMalloc(&d_cellEnd, maxCells * sizeof(unsigned int)); - - // Allocate device memory for densities, pressure increments, and accelerations - cudaMalloc(&d_densities, numParticles * sizeof(float)); - cudaMalloc(&d_density_errors, numParticles * sizeof(float)); - cudaMalloc(&d_particle_pressure, numParticles * sizeof(float)); - cudaMalloc(&d_pressure_force, numParticles * sizeof(float3)); - cudaMalloc(&d_force_other, numParticles * sizeof(float3)); - - // Allocate device memory for sum_gradW2 - cudaMalloc(&d_sum_gradW2, numParticles * sizeof(float)); - - // Initialize positions_pred to initial positions - cudaMemcpy(d_positions_pred, h_positions, numParticles * sizeof(float3), cudaMemcpyHostToDevice); -} - -void initializeBoundryDeviceArrays( - const float3* h_boundaryPositions, - const float3* h_boundaryNormals, // Renamed parameter - int numBoundaryParticles -) -{ - // Allocate cell start and end arrays - int gridSizeX = (int)ceil((h_BOUND_X_MAX - h_BOUND_X_MIN) / h_H); - int gridSizeY = (int)ceil((h_BOUND_Y_MAX - h_BOUND_Y_MIN) / h_H); - int gridSizeZ = (int)ceil((h_BOUND_Z_MAX - h_BOUND_Z_MIN) / h_H); - int maxCells = gridSizeX * gridSizeY * gridSizeZ; - - // Allocate memory for boundary positions - //cudaMalloc(&d_boundary_positions, numBoundaryParticles * sizeof(float3)); - //cudaMemcpy(d_boundary_positions, h_boundaryPositions, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice); - - // Allocate memory for boundary normals - cudaMalloc(&d_boundary_normals, numBoundaryParticles * sizeof(float3)); - cudaMemcpy(d_boundary_normals, h_boundaryNormals, numBoundaryParticles * sizeof(float3), cudaMemcpyHostToDevice); - - cudaMalloc(&d_boundaryCellIndices, numBoundaryParticles * sizeof(unsigned int)); - cudaMalloc(&d_boundaryParticleIndices, numBoundaryParticles * sizeof(unsigned int)); - - cudaMalloc(&d_boundaryCellStart, maxCells * sizeof(unsigned int)); - cudaMalloc(&d_boundaryCellEnd, maxCells * sizeof(unsigned int)); -} - - -// NeighbourHoodSearch for boundary particles 1 time at start since they dont move -void boundaryNBSearch(int numBoundaryParticles) { - - // Map the VBO resource to get a device pointer - size_t num_bytes; - cudaGraphicsMapResources(1, &cuda_vbo_boundary_resource, 0); - cudaGraphicsResourceGetMappedPointer((void**)&d_boundary_positions, &num_bytes, cuda_vbo_boundary_resource); - - int gsX, gsY, gsZ; - - int threadsPerBlock = 256; - int blocksPerGrid = (numBoundaryParticles + threadsPerBlock - 1) / threadsPerBlock; - - float3* d_boundary_positions_sorted; - float3* d_boundary_normals_sorted; - - - cudaMalloc(&d_boundary_positions_sorted, numBoundaryParticles * sizeof(float3)); - cudaMalloc(&d_boundary_normals_sorted, numBoundaryParticles * sizeof(float3)); - - neigbourHoodSearch( - numBoundaryParticles, + neigbourHoodSearch( + numBoundaryParticles, d_boundary_positions, d_boundary_normals, d_boundary_positions_sorted, @@ -230,112 +235,235 @@ void boundaryNBSearch(int numBoundaryParticles) { threadsPerBlock, gsX, gsY, - gsZ); - - // Unmap the VBO resource - cudaGraphicsUnmapResources(1, &cuda_vbo_boundary_resource, 0); - - cudaFree(d_boundary_positions_sorted); - cudaFree(d_boundary_normals_sorted); - - // Move some of above init code for boundary to seeparate function -} - -void resetSimulation( - const float3* h_positions, - const float3* h_velocities, - int numParticles -) -{ - cudaMemcpy(d_velocities, h_velocities, numParticles * sizeof(float3), cudaMemcpyHostToDevice); - - // Initialize positions_pred to initial positions - cudaMemcpy(d_positions_pred, h_positions, numParticles * sizeof(float3), cudaMemcpyHostToDevice); - - // Map the VBO resource - cudaError_t err = cudaGraphicsMapResources(1, &cuda_vbo_resource, 0); - if (err != cudaSuccess) { - fprintf(stderr, "Failed to map VBO resource: %s\n", cudaGetErrorString(err)); - return; + gsZ); + + // Unmap the VBO resource + cudaGraphicsUnmapResources(1, &cuda_vbo_boundary_resource, 0); + + cudaFree(d_boundary_positions_sorted); + cudaFree(d_boundary_normals_sorted); + + // Move some of above init code for boundary to seeparate function } - // Check if d_positions is already assigned - if (d_positions == nullptr) { - size_t num_bytes = 0; + void calculatePCISPHSSclaingFactor() + { + + // We should only have: + // delta_t, rest_density, particle_radius, viscosity + // As fluid params we set. The rest we should deduce from them like: + + // params.kernel_radius = 4.f * params.particle_radius; + // params.kernel_radius2 = std::pow(params.kernel_radius, 2.f); + // params.particle_mass = params.rest_density / std::pow(1.f / (2.f * params.particle_radius), 3); + + // // -> grid (bucket count needs to be a multiple of 64 to make the hash keys locally unique + // params.bucket_count = params.fluid_count / 2; + // params.bucket_count -= params.bucket_count % 64; + // params.bucket_count = std::max(64U, params.bucket_count); + + // params.cell_size = params.kernel_radius; + + // // -> smoothing kernels + // params.poly6_normalization = 315.f / (64.f * utils::PI * std::pow(params.kernel_radius, 9.f)); + // params.poly6_d1_normalization = -945.f / (32.f * utils::PI * std::pow(params.kernel_radius, 9.f)); + // params.viscosity_d2_normalization = 45.f / (utils::PI * std::pow(params.kernel_radius, 6.0f)); + // params.spiky_d1_normalization = -45.f / (utils::PI * std::pow(params.kernel_radius, 6.0f)); + // params.surface_tension_normalization = 32.f / (utils::PI * std::pow(params.kernel_radius, 9.f)); + + float kernel_radius = 4.0f * h_particleRadius; + float kernel_radius2 = pow(kernel_radius, 2.0f); // check taht pow() func is correct!! + float particle_mass = h_REST_DENSITY / pow(1.0f / (2.0f * h_particleRadius), 3.0f) / 1.15f; + + // MAke sure constannts are correct!!! + float poly6__const = 315.f / (64.f * M_PI * pow(kernel_radius, 9.f)); + float poly6_grad_const = -945.f / (32.f * M_PI * pow(kernel_radius, 9.f)); + float viscosity_d2_normalization = 45.f / (M_PI * pow(kernel_radius, 6.0f)); + float spiky_d1_normalization = -45.f / (M_PI * pow(kernel_radius, 6.0f)); + float surface_tension_normalization = 32.f / (M_PI * pow(kernel_radius, 9.f)); + + cudaMemcpyToSymbol(H, &kernel_radius, sizeof(float)); + cudaMemcpyToSymbol(H2, &kernel_radius2, sizeof(float)); + cudaMemcpyToSymbol(POLY6_CONST, &poly6__const, sizeof(float)); + cudaMemcpyToSymbol(POLY6_GRAD_CONST, &poly6_grad_const, sizeof(float)); + cudaMemcpyToSymbol(SPIKY_GRAD_CONST, &spiky_d1_normalization, sizeof(float)); + cudaMemcpyToSymbol(VISC_LAP_CONST, &viscosity_d2_normalization, sizeof(float)); + + cudaMemcpyToSymbol(PARTICLE_MASS, &particle_mass, sizeof(float)); + + // 1) Beta + float timeStep = 0.01f; + float dtm_rho = (timeStep * particle_mass) / h_REST_DENSITY; + float beta = 2.0f * dtm_rho * dtm_rho; + + double sumDot = 0.0; + double sum_val[3] = {0.0, 0.0, 0.0}; + double sum_spiky[3] = {0.0, 0.0, 0.0}; + + float step = 2.0f * h_particleRadius; + float R = kernel_radius + h_particleRadius; + + // 2) Loop over sample points + for (float z = -R; z <= R; z += step) + { + for (float y = -R; y <= R; y += step) + { + for (float x = -R; x <= R; x += step) + { + float r2 = x * x + y * y + z * z; + float r = sqrtf(r2); + + if (r < h_H && r > 1e-9f) + { + float spikyMag = spiky_d1_normalization * pow(kernel_radius - r, 2.f) * (1.0f / r); // should it not match spky grad kernel?? r/r_norm?? + float3 spiky_grad = { + spikyMag * x, + spikyMag * y, + spikyMag * z}; + + float poly6Mag = poly6_grad_const * pow(kernel_radius2 - r2, 2.f); // Check ttaht kernel is correect!!! + float3 poly6_grad = { + poly6Mag * x, + poly6Mag * y, + poly6Mag * z}; + + sum_spiky[0] += spiky_grad.x; + sum_spiky[1] += spiky_grad.y; + sum_spiky[2] += spiky_grad.z; + + sum_val[0] += poly6_grad.x; + sum_val[1] += poly6_grad.y; + sum_val[2] += poly6_grad.z; + + double dotSV = (double)spiky_grad.x * poly6_grad.x + (double)spiky_grad.y * poly6_grad.y + (double)spiky_grad.z * poly6_grad.z; + sumDot += dotSV; + } + } + } + } - // Get the mapped pointer for positions - err = cudaGraphicsResourceGetMappedPointer((void**)&d_positions, &num_bytes, cuda_vbo_resource); - if (err != cudaSuccess) { - fprintf(stderr, "Failed to get mapped pointer: %s\n", cudaGetErrorString(err)); - cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); - return; + double sum_spiky_dot_val = sum_spiky[0] * sum_val[0] + sum_spiky[1] * sum_val[1] + sum_spiky[2] * sum_val[2]; + + double bigTerm = -sum_spiky_dot_val - sumDot; + if (fabs(bigTerm) < 1e-12) + { + bigTerm = 1e-12; // Avoid zero } - } - // Copy new positions to the mapped pointer - err = cudaMemcpy(d_positions, h_positions, numParticles * sizeof(float3), cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - fprintf(stderr, "Failed to copy new positions: %s\n", cudaGetErrorString(err)); + double alpha = -1.0 / ((double)beta * bigTerm); + + // Store final + h_PCISPH_density_scaling_factor = (float)alpha; + + printf("PCISPH density scaling factor: alpha = %g\n", + h_PCISPH_density_scaling_factor); + + // Optionally copy to device: + cudaMemcpyToSymbol(d_PCISPH_density_scaling_factor, + &h_PCISPH_density_scaling_factor, + sizeof(float), + 0, + cudaMemcpyHostToDevice); } - // Unmap the VBO resource - err = cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); - if (err != cudaSuccess) { - fprintf(stderr, "Failed to unmap VBO resource: %s\n", cudaGetErrorString(err)); + void resetSimulation( + const float3 *h_positions, + const float3 *h_velocities, + int numParticles) + { + cudaMemcpy(d_velocities, h_velocities, numParticles * sizeof(float3), cudaMemcpyHostToDevice); + + // Initialize positions_pred to initial positions + cudaMemcpy(d_positions_pred, h_positions, numParticles * sizeof(float3), cudaMemcpyHostToDevice); + + // Map the VBO resource + cudaError_t err = cudaGraphicsMapResources(1, &cuda_vbo_resource, 0); + if (err != cudaSuccess) + { + fprintf(stderr, "Failed to map VBO resource: %s\n", cudaGetErrorString(err)); + return; + } + + // Check if d_positions is already assigned + if (d_positions == nullptr) + { + size_t num_bytes = 0; + + // Get the mapped pointer for positions + err = cudaGraphicsResourceGetMappedPointer((void **)&d_positions, &num_bytes, cuda_vbo_resource); + if (err != cudaSuccess) + { + fprintf(stderr, "Failed to get mapped pointer: %s\n", cudaGetErrorString(err)); + cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); + return; + } + } + + // Copy new positions to the mapped pointer + err = cudaMemcpy(d_positions, h_positions, numParticles * sizeof(float3), cudaMemcpyHostToDevice); + if (err != cudaSuccess) + { + fprintf(stderr, "Failed to copy new positions: %s\n", cudaGetErrorString(err)); + } + + // Unmap the VBO resource + err = cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); + if (err != cudaSuccess) + { + fprintf(stderr, "Failed to unmap VBO resource: %s\n", cudaGetErrorString(err)); + } } -} - -void updateConstantsOnDevice(float particleMass, float restDensity, float viscosity, float smoothingRadius) { - // Recalculate derived constants - float h_H2 = smoothingRadius * smoothingRadius; - float h_POLY6_CONST = 315.0f / (64.0f * h_PI * powf(smoothingRadius, 9)); - float h_SPIKY_GRAD_CONST = -45.0f / (h_PI * powf(smoothingRadius, 6)); - float h_VISC_LAP_CONST = 45.0f / (h_PI * powf(smoothingRadius, 6)); - - // Send constants to CUDA device - cudaMemcpyToSymbol(PARTICLE_MASS, &particleMass, sizeof(float)); - cudaMemcpyToSymbol(REST_DENSITY, &restDensity, sizeof(float)); - cudaMemcpyToSymbol(VISCOSITY, &viscosity, sizeof(float)); - cudaMemcpyToSymbol(H, &smoothingRadius, sizeof(float)); - cudaMemcpyToSymbol(H2, &h_H2, sizeof(float)); - cudaMemcpyToSymbol(POLY6_CONST, &h_POLY6_CONST, sizeof(float)); - cudaMemcpyToSymbol(SPIKY_GRAD_CONST, &h_SPIKY_GRAD_CONST, sizeof(float)); - cudaMemcpyToSymbol(VISC_LAP_CONST, &h_VISC_LAP_CONST, sizeof(float)); -} - -// Cleanup device arrays -void cleanupDeviceArrays() -{ - cudaFree(d_velocities); - cudaFree(d_velocities_pred); - cudaFree(d_positions_pred); - cudaFree(d_cellIndices); - cudaFree(d_particleIndices); + void updateConstantsOnDevice(float particleMass, float restDensity, float viscosity, float smoothingRadius) + { + // Recalculate derived constants + float h_H2 = smoothingRadius * smoothingRadius; + float h_POLY6_CONST = 315.0f / (64.0f * h_PI * powf(smoothingRadius, 9)); + float h_SPIKY_GRAD_CONST = -45.0f / (h_PI * powf(smoothingRadius, 6)); + float h_VISC_LAP_CONST = 45.0f / (h_PI * powf(smoothingRadius, 6)); + + // Send constants to CUDA device + cudaMemcpyToSymbol(PARTICLE_MASS, &particleMass, sizeof(float)); + cudaMemcpyToSymbol(REST_DENSITY, &restDensity, sizeof(float)); + cudaMemcpyToSymbol(VISCOSITY, &viscosity, sizeof(float)); + cudaMemcpyToSymbol(H, &smoothingRadius, sizeof(float)); + cudaMemcpyToSymbol(H2, &h_H2, sizeof(float)); + cudaMemcpyToSymbol(POLY6_CONST, &h_POLY6_CONST, sizeof(float)); + cudaMemcpyToSymbol(SPIKY_GRAD_CONST, &h_SPIKY_GRAD_CONST, sizeof(float)); + cudaMemcpyToSymbol(VISC_LAP_CONST, &h_VISC_LAP_CONST, sizeof(float)); + } - cudaFree(d_positions_sorted); - cudaFree(d_velocities_sorted); + // Cleanup device arrays + void cleanupDeviceArrays() + { + cudaFree(d_velocities); + cudaFree(d_velocities_pred); + cudaFree(d_positions_pred); - cudaFree(d_cellStart); - cudaFree(d_cellEnd); + cudaFree(d_cellIndices); + cudaFree(d_particleIndices); - cudaFree(d_densities); - cudaFree(d_density_errors); - cudaFree(d_particle_pressure); - cudaFree(d_pressure_force); - cudaFree(d_force_other); + cudaFree(d_positions_sorted); + cudaFree(d_velocities_sorted); - cudaFree(d_boundary_positions); - cudaFree(d_boundary_normals); + cudaFree(d_cellStart); + cudaFree(d_cellEnd); - cudaFree(d_boundaryCellIndices); - cudaFree(d_boundaryParticleIndices); - cudaFree(d_boundaryCellEnd); - cudaFree(d_boundaryCellStart); + cudaFree(d_densities); + cudaFree(d_density_errors); + cudaFree(d_particle_pressure); + cudaFree(d_pressure_force); + cudaFree(d_force_other); - cudaFree(d_sum_gradW2); + cudaFree(d_boundary_positions); + cudaFree(d_boundary_normals); -} + cudaFree(d_boundaryCellIndices); + cudaFree(d_boundaryParticleIndices); + cudaFree(d_boundaryCellEnd); + cudaFree(d_boundaryCellStart); + cudaFree(d_sum_gradW2); + } } // namespace FluidSimulationGPU diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu index 8ca0dec615a01118dba1d9c2976f281fa3900d24..4e1a5e278d077263f2fc9a19ea5a56aac3f5ce45 100644 --- a/src/cuda/pcisph_kernels.cu +++ b/src/cuda/pcisph_kernels.cu @@ -3,6 +3,7 @@ #include "include/smoothing_kernels.h" #include "include/float3_helper_math.h" +#include "include/helper_macros.h" #include "include/cuda_device_vars.h" #include "../settings/constants.h" @@ -119,7 +120,7 @@ __global__ void computeInitialDensity( } } - densities[idx] = density_i + b_density_i; + densities[idx] = REST_DENSITY; //density_i + b_density_i; if (sum_gradW2 != nullptr) { sum_gradW2[idx] = sumGradW2_local; @@ -208,10 +209,10 @@ __global__ void computeDensityError( } } - float density_variation = density_i + b_density_i - REST_DENSITY; + float density_variation = max(density_i + b_density_i - REST_DENSITY, 0.f); //densities[idx] = density; density_errors[idx] = density_variation; - particle_pressure[idx] += -density_variation * K_PCI; + particle_pressure[idx] += density_variation * d_PCISPH_density_scaling_factor; } @@ -299,9 +300,9 @@ __global__ void computePressureForces( if (distSq < H2 && distSq > 0.0001f) { float dist = sqrtf(distSq); float gradW = spikyKernelGrad(dist); - float3 gradWij = gradW * (r / dist); + float3 gradWij = gradW * (r / dist); // fix ^ to get r with sign //float factor = - (P_i + P_j) / (REST_DENSITY * REST_DENSITY); - float pfc = pfc_i + pfc_j; + float pfc = pfc_i + pfc_j; fPressure += -pfc * gradWij * PARTICLE_MASS * PARTICLE_MASS; } } @@ -327,7 +328,7 @@ __global__ void computePressureForces( if (distSq < H2 && distSq > 0.0001f) { float dist = sqrtf(distSq); float gradW = spikyKernelGrad(dist); - float3 gradWij = gradW * (r / dist); + float3 gradWij = gradW * (r/dist); // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0 //float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); float pfc = pfc_i + pfc_j; @@ -341,7 +342,8 @@ __global__ void computePressureForces( } } - pressure_force[idx] = fPressure + b_fPressure;} + pressure_force[idx] = fPressure + b_fPressure; +} __global__ void correctBoundaryPenetrations( float3* positions_pred, @@ -607,11 +609,15 @@ __global__ void updateVelocitiesPositions( if (idx < numParticles) { // Update predicted velocity + float3 vel = velocities[idx]; + vel = clamp(vel, -50, 50); float3 acc = (force_other[idx] + pressure_force[idx]) / PARTICLE_MASS; - velocities[idx] += acc * timeStep; + vel += acc * timeStep; + + velocities[idx] = vel; // Update predicted position - positions[idx] += acc * (timeStep * timeStep); + positions[idx] += vel * timeStep; } } diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu index 9051e606c3504726edf215f6f77d6bff4621500c..5e9203f14ea26955ac709d9bce13cf1eb7dbd73e 100644 --- a/src/cuda/pcisph_update.cu +++ b/src/cuda/pcisph_update.cu @@ -9,120 +9,193 @@ #include <thrust/device_ptr.h> #include <thrust/reduce.h> -namespace FluidSimulationGPU { - -void updateParticles(float deltaTime) +namespace FluidSimulationGPU { - // Map the VBO resource to get a device pointer - size_t num_bytes; - cudaGraphicsMapResources(1, &cuda_vbo_resource, 0); - cudaGraphicsResourceGetMappedPointer((void**)&d_positions, &num_bytes, cuda_vbo_resource); - - int threadsPerBlock = 256; - int blocksPerGrid = (NUM_PARTICLES + threadsPerBlock - 1) / threadsPerBlock; - - // Time step should be small but we can run updateParticles multiple times during display - float timeStep = 0.0025f; - - // -------------------- Neighbor Search -------------------- - int gridSizeX, gridSizeY, gridSizeZ; - neigbourHoodSearch( - NUM_PARTICLES, - d_positions, - d_velocities, - d_positions_sorted, - d_velocities_sorted, - d_cellIndices, - d_particleIndices, - d_cellEnd, - d_cellStart, - blocksPerGrid, - threadsPerBlock, - gridSizeX, - gridSizeY, - gridSizeZ); - - // -------------------- Prediction Step -------------------- - - computeViscosityAndExternalForces<<<blocksPerGrid, threadsPerBlock>>>( - d_positions, - d_velocities, - d_force_other, - d_cellIndices, - d_cellStart, - d_cellEnd, - NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ); - cudaDeviceSynchronize(); - - // predictVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( - // d_velocities, - // d_velocities_pred, - // d_positions, - // d_positions_pred, - // d_force_other, - // timeStep, - // NUM_PARTICLES); - // cudaDeviceSynchronize(); - - // Initialize pressure increments to zero - cudaMemset(d_pressure_force, 0, NUM_PARTICLES * sizeof(float)); - cudaMemset(d_particle_pressure, 0, NUM_PARTICLES * sizeof(float)); - - // Compute densities and density errors, and per-particle sumGradW2 - computeInitialDensity<<<blocksPerGrid, threadsPerBlock>>>( - d_positions, - d_densities, - d_density_errors, - d_sum_gradW2, - d_cellIndices, - d_cellStart, - d_cellEnd, - d_boundary_positions, - d_boundaryCellStart, - d_boundaryCellEnd, - NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ); - cudaDeviceSynchronize(); - - // Reduce sum_gradW2 over all particles - thrust::device_ptr<float> dev_sumGradW2_ptr(d_sum_gradW2); - float totalSumGradW2 = thrust::reduce(dev_sumGradW2_ptr, dev_sumGradW2_ptr + NUM_PARTICLES, 0.0f, thrust::plus<float>()); - - float r0 = 0.5f * h_H; // Penetration threshold - float epsilon = 1.0f; // Moderate to high friction - float delta = 0.5f; // Low to moderate elasticity - float normalDamping = 0.5f; // Adjust as needed - - - // Compute A - float m = h_PARTICLE_MASS; - float rho0 = h_REST_DENSITY; - float B = 2.0f * timeStep * timeStep * m * m / (rho0 * rho0); - float K_PCI = -1 / (B * totalSumGradW2); - - // ------------ Iterative Pressure Correction Loop -------------------- - - const int maxIterations = 5; - - for (int iter = 0; iter < maxIterations; ++iter) { - // Launch the boundary correction kernel - - predictVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( + + void updateParticles(float deltaTime) + { + // Map the VBO resource to get a device pointer + size_t num_bytes; + cudaGraphicsMapResources(1, &cuda_vbo_resource, 0); + cudaGraphicsResourceGetMappedPointer((void **)&d_positions, &num_bytes, cuda_vbo_resource); + + int threadsPerBlock = 256; + int blocksPerGrid = (NUM_PARTICLES + threadsPerBlock - 1) / threadsPerBlock; + + // Time step should be small but we can run updateParticles multiple times during display + float timeStep = 0.01f; + + // -------------------- Neighbor Search -------------------- + int gridSizeX, gridSizeY, gridSizeZ; + neigbourHoodSearch( + NUM_PARTICLES, + d_positions, + d_velocities, + d_positions_sorted, + d_velocities_sorted, + d_cellIndices, + d_particleIndices, + d_cellEnd, + d_cellStart, + blocksPerGrid, + threadsPerBlock, + gridSizeX, + gridSizeY, + gridSizeZ); + + // -------------------- Prediction Step -------------------- + + computeViscosityAndExternalForces<<<blocksPerGrid, threadsPerBlock>>>( + d_positions, d_velocities, - d_velocities_pred, + d_force_other, + d_cellIndices, + d_cellStart, + d_cellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ); + cudaDeviceSynchronize(); + + // predictVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( + // d_velocities, + // d_velocities_pred, + // d_positions, + // d_positions_pred, + // d_force_other, + // timeStep, + // NUM_PARTICLES); + // cudaDeviceSynchronize(); + + // Initialize pressure increments to zero + cudaMemset(d_pressure_force, 0, NUM_PARTICLES * sizeof(float)); + cudaMemset(d_particle_pressure, 0, NUM_PARTICLES * sizeof(float)); + + // Compute densities and density errors, and per-particle sumGradW2 + computeInitialDensity<<<blocksPerGrid, threadsPerBlock>>>( + d_positions, + d_densities, + d_density_errors, + d_sum_gradW2, + d_cellIndices, + d_cellStart, + d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ); + cudaDeviceSynchronize(); + + // Reduce sum_gradW2 over all particles + thrust::device_ptr<float> dev_sumGradW2_ptr(d_sum_gradW2); + float totalSumGradW2 = thrust::reduce(dev_sumGradW2_ptr, dev_sumGradW2_ptr + NUM_PARTICLES, 0.0f, thrust::plus<float>()); + + float r0 = 0.5f * h_H; // Penetration threshold + float epsilon = 1.0f; // Moderate to high friction + float delta = 0.5f; // Low to moderate elasticity + float normalDamping = 0.5f; // Adjust as needed + + // Compute A + float m = h_PARTICLE_MASS; + float rho0 = h_REST_DENSITY; + float B = 2.0f * timeStep * timeStep * m * m / (rho0 * rho0); + float K_PCI = -1 / (B * totalSumGradW2); + + // ------------ Iterative Pressure Correction Loop -------------------- + + const int maxIterations = 5; + + for (int iter = 0; iter < maxIterations; ++iter) + { + // Launch the boundary correction kernel + + predictVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( + d_velocities, + d_velocities_pred, + d_positions, + d_positions_pred, + d_pressure_force, + d_force_other, + timeStep, + NUM_PARTICLES); + cudaDeviceSynchronize(); + + correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( + d_positions_pred, + d_velocities_pred, + d_boundary_positions, + d_boundary_normals, + d_cellIndices, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ, + r0, + normalDamping, + epsilon, + delta); + cudaDeviceSynchronize(); + + // Compute desnity and update pressure + computeDensityError<<<blocksPerGrid, threadsPerBlock>>>( + d_positions_pred, + d_density_errors, + d_particle_pressure, + d_cellIndices, + d_cellStart, + d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + K_PCI, + gridSizeX, gridSizeY, gridSizeZ); + cudaDeviceSynchronize(); + + // updatePressures<<<blocksPerGrid, threadsPerBlock>>>( + // d_particle_pressure, + // d_density_errors, + // K_PCI, + // NUM_PARTICLES); + // cudaDeviceSynchronize(); + + computePressureForces<<<blocksPerGrid, threadsPerBlock>>>( + d_positions_pred, + d_densities, + d_particle_pressure, + d_pressure_force, + d_cellIndices, + d_cellStart, + d_cellEnd, + d_boundary_positions, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ); + cudaDeviceSynchronize(); + + // updateVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( + // d_velocities_pred, + // d_positions_pred, + // d_pressure_force, + // timeStep, + // NUM_PARTICLES); + // cudaDeviceSynchronize(); + } + + updateVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( d_positions, - d_positions_pred, + d_velocities, d_pressure_force, d_force_other, timeStep, NUM_PARTICLES); - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( - d_positions_pred, - d_velocities_pred, + d_positions, + d_velocities, d_boundary_positions, d_boundary_normals, d_cellIndices, @@ -135,102 +208,26 @@ void updateParticles(float deltaTime) epsilon, delta ); - cudaDeviceSynchronize(); - // Compute desnity and update pressure - computeDensityError<<<blocksPerGrid, threadsPerBlock>>>( - d_positions_pred, - d_density_errors, - d_particle_pressure, - d_cellIndices, - d_cellStart, - d_cellEnd, - d_boundary_positions, - d_boundaryCellStart, - d_boundaryCellEnd, - NUM_PARTICLES, - K_PCI, - gridSizeX, gridSizeY, gridSizeZ); cudaDeviceSynchronize(); - // updatePressures<<<blocksPerGrid, threadsPerBlock>>>( - // d_particle_pressure, - // d_density_errors, - // K_PCI, - // NUM_PARTICLES); - // cudaDeviceSynchronize(); + // -------------------- Finalization -------------------- - computePressureForces<<<blocksPerGrid, threadsPerBlock>>>( - d_positions_pred, - d_densities, - d_particle_pressure, - d_pressure_force, - d_cellIndices, - d_cellStart, - d_cellEnd, - d_boundary_positions, - d_boundaryCellStart, - d_boundaryCellEnd, + // cudaMemcpy(d_velocities, d_velocities_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); + // cudaMemcpy(d_positions, d_positions_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); + + // Enforce boundary conditions + + float damping = 0.75f; + enforceBoundary<<<blocksPerGrid, threadsPerBlock>>>( + d_positions, + d_velocities, NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ); + damping); cudaDeviceSynchronize(); - // updateVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( - // d_velocities_pred, - // d_positions_pred, - // d_pressure_force, - // timeStep, - // NUM_PARTICLES); - // cudaDeviceSynchronize(); - + // Unmap the VBO resource + cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); } - updateVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( - d_positions, - d_velocities, - d_pressure_force, - d_force_other, - timeStep, - NUM_PARTICLES); - cudaDeviceSynchronize(); - - correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( - d_positions, - d_velocities, - d_boundary_positions, - d_boundary_normals, - d_cellIndices, - d_boundaryCellStart, - d_boundaryCellEnd, - NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ, - r0, - normalDamping, - epsilon, - delta - ); - - cudaDeviceSynchronize(); - - // -------------------- Finalization -------------------- - - //cudaMemcpy(d_velocities, d_velocities_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); - //cudaMemcpy(d_positions, d_positions_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); - - // Enforce boundary conditions - /* - float damping = 0.75f; - enforceBoundary<<<blocksPerGrid, threadsPerBlock>>>( - d_positions, - d_velocities, - NUM_PARTICLES, - damping); - cudaDeviceSynchronize(); - */ - - - // Unmap the VBO resource - cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0); -} - } // namespace FluidSimulationGPU diff --git a/src/cuda/smoothing_kernels.cu b/src/cuda/smoothing_kernels.cu index 4bbb8346cef1a3fb59accc7a296b0cb5ff9f8f97..45580770ea11763885e21ea5d10526d0bc733f14 100644 --- a/src/cuda/smoothing_kernels.cu +++ b/src/cuda/smoothing_kernels.cu @@ -14,6 +14,26 @@ __device__ float poly6Kernel(float r2) } } +__device__ float poly6GradKernel(float r2) +{ + if (r2 < H2){ + float term = H2 - r2; + return POLY6_GRAD_CONST * term * term; + } else { + return 0.0f; + } +} + +__device__ float spikyKernel(float r) +{ + if (r >= 0.0f && r <= H) { + float term = H - r; + return term * term * term; + } else { + return 0.0f; + } +} + __device__ float spikyKernelGrad(float r) { if (r > 0.0f && r <= H) { diff --git a/src/include/FluidSimulationApp.h b/src/include/FluidSimulationApp.h index 0211cbf52d1ab6a9ac385e83103359a4679bc041..e9614d36f3c6dc793dfda4f43a82c5728f1d446f 100644 --- a/src/include/FluidSimulationApp.h +++ b/src/include/FluidSimulationApp.h @@ -23,7 +23,7 @@ namespace FluidSimulation FluidSimulation* m_fluidSimulation; bool m_updatePhysics; - RenderStage selectedStage = RenderStage::ScreenSpacedRender; + RenderStage selectedStage = RenderStage::ParticleView; void RenderControlPannel(); void RenderFluidView(); diff --git a/src/settings/constants.h b/src/settings/constants.h index 5098cfc5853206bdd83f19f3fa027073d9052e00..fbd5bc95fc2fe7822c1f1f0f0b54f6d67298fbb5 100644 --- a/src/settings/constants.h +++ b/src/settings/constants.h @@ -10,10 +10,10 @@ const float h_BOUND_Z_MIN = -4.0f; const float h_BOUND_Z_MAX = 4.0f; const float h_GAS_CONSTANT = 0.02f; -const float h_VISCOSITY = 0.04f; +const float h_VISCOSITY = 0.0035105f; const float h_PARTICLE_MASS = 0.04f; -const float h_REST_DENSITY = 400.0f; +const float h_REST_DENSITY = 998.29f; const float h_PI = 3.14159265358979323846f; const float h_H = 0.25f; // Smoothing radius @@ -23,4 +23,6 @@ const int NUM_PARTICLES = 50000; const float h_POINT_SCALE = 14.0f; const float h_POINT_RADIUS = 25.0f; +const float h_particleRadius = 0.008; + #endif // CONSTANTS_H