diff --git a/src/FluidSimulation.cpp b/src/FluidSimulation.cpp index 52882abb0b5d581c4f225f662278e757320ac2d2..63d337b2200590fe7d13d853da02c7d3d70d9f9a 100644 --- a/src/FluidSimulation.cpp +++ b/src/FluidSimulation.cpp @@ -83,7 +83,7 @@ namespace FluidSimulation { // Initialize boundary std::vector<float3> boundary_positions; std::vector<float3> boundary_normals; - int boundaryLayers = 1; + int boundaryLayers = 4; float spacing = 2.f * h_particleRadius; // Using 0.01 as per your code createBoundaryParticles(boundary_positions, boundary_normals, boundaryLayers, spacing); diff --git a/src/cuda/include/pcisph_kernels.h b/src/cuda/include/pcisph_kernels.h index 4615c3afdf92653ebbda46537e6debe8e5a1948f..cbe80c7a97fd3321212757795ea3ed5047ac3d14 100644 --- a/src/cuda/include/pcisph_kernels.h +++ b/src/cuda/include/pcisph_kernels.h @@ -116,6 +116,26 @@ __global__ void enforceBoundary( float damping ); +// Function to predict velocities and positions +__global__ void predictVelNPos( + float3* velocities, + float3* velocities_pred, + float3* positions, + float3* positions_pred, + float3* force_other, + float timeStep, + int numParticles +); + +// Function to update velocities and positions based on pressure forces +__global__ void updatePredVelNPos( + float3* velocities_pred, + float3* positions_pred, + float3* pressure_forces, + float timeStep, + int numParticles +); + // Function to predict velocities and positions __global__ void predictVelocitiesPositions( float3* velocities, diff --git a/src/cuda/init_cuda.cu b/src/cuda/init_cuda.cu index f9df13dace0912c267bee6c345efc83eec9d2f9b..50293da50d81c5f2e0022280e95e9b58b46813b4 100644 --- a/src/cuda/init_cuda.cu +++ b/src/cuda/init_cuda.cu @@ -2,6 +2,7 @@ #include "include/cuda_device_vars.h" #include "../settings/constants.h" +#include "include/float3_helper_math.h" #include "include/pcisph_kernels.h" #include "include/smoothing_kernels.h" @@ -282,8 +283,9 @@ namespace FluidSimulationGPU // params.surface_tension_normalization = 32.f / (utils::PI * std::pow(params.kernel_radius, 9.f)); float kernel_radius = 4.0f * h_particleRadius; - float kernel_radius2 = pow(kernel_radius, 2.0f); // check taht pow() func is correct!! - float particle_mass = h_REST_DENSITY / pow(1.0f / (2.0f * h_particleRadius), 3.0f); + float kernel_radius2 = powf(kernel_radius, 2.0f); // check taht pow() func is correct!! + //float particle_mass = h_REST_DENSITY / pow(1.0f / (2.0f * h_particleRadius), 3.0f) * 100.f; + float particle_mass = 0.04f; printf("particle_radius: %f\n", h_particleRadius); printf("kernel_radius: %f\n", kernel_radius); @@ -293,11 +295,11 @@ namespace FluidSimulationGPU // MAke sure constannts are correct!!! - float poly6__const = 315.f / (64.f * M_PI * pow(kernel_radius, 9.f)); - float poly6_grad_const = -945.f / (32.f * M_PI * pow(kernel_radius, 9.f)); - float viscosity_d2_normalization = 45.f / (M_PI * pow(kernel_radius, 6.0f)); - float spiky_d1_normalization = -45.f / (M_PI * pow(kernel_radius, 6.0f)); - float surface_tension_normalization = 32.f / (M_PI * pow(kernel_radius, 9.f)); + float poly6__const = 315.f / (64.f * M_PI * powf(kernel_radius, 9.f)); + float poly6_grad_const = -945.f / (32.f * M_PI * powf(kernel_radius, 9.f)); + float viscosity_d2_normalization = 45.f / (M_PI * powf(kernel_radius, 6.0f)); + float spiky_d1_normalization = -45.f / (M_PI * powf(kernel_radius, 6.0f)); + float surface_tension_normalization = 32.f / (M_PI * powf(kernel_radius, 9.f)); cudaMemcpyToSymbol(H, &kernel_radius, sizeof(float)); cudaMemcpyToSymbol(H2, &kernel_radius2, sizeof(float)); @@ -314,7 +316,7 @@ namespace FluidSimulationGPU cudaMemcpyToSymbol(SURFACE_TENS_TERM, &surface_tension_term, sizeof(float)); // 1) Beta - float timeStep = 0.001f; + float timeStep = 0.004f; float dtm_rho = (timeStep * particle_mass) / h_REST_DENSITY; float beta = 2.0f * dtm_rho * dtm_rho; @@ -322,6 +324,9 @@ namespace FluidSimulationGPU double sum_val[3] = {0.0, 0.0, 0.0}; double sum_spiky[3] = {0.0, 0.0, 0.0}; + float3 grad_sum = make_float3(0.f, 0.f, 0.f); + float sum_dot_grad = 0; + float step = 2.0f * h_particleRadius; float R = kernel_radius + h_particleRadius; @@ -332,11 +337,20 @@ namespace FluidSimulationGPU { for (float x = -R; x <= R; x += step) { - float r2 = x * x + y * y + z * z; - float r = sqrtf(r2); + float3 r = make_float3(x, y, z); + float r2 = dot(r, r); + float r_norm = sqrtf(r2); + // float r2 = x * x + y * y + z * z; + // float r_norm = sqrtf(r2); if (r2 < kernel_radius2) { + float3 grad = (r/r_norm) * spiky_d1_normalization * powf(kernel_radius - r_norm, 2.f); + + grad_sum += grad; + + sum_dot_grad += dot(grad, grad); + /* float spikyMag = spiky_d1_normalization * pow(kernel_radius - r, 2.f) * (1.0f / r); // should it not match spky grad kernel?? r/r_norm?? float3 spiky_grad = { spikyMag * x, @@ -359,11 +373,18 @@ namespace FluidSimulationGPU double dotSV = (double)spiky_grad.x * poly6_grad.x + (double)spiky_grad.y * poly6_grad.y + (double)spiky_grad.z * poly6_grad.z; sumDot += dotSV; + */ } } } } + float dot_sum_grad = dot(grad_sum, grad_sum); + + float grad_term = -dot_sum_grad - sum_dot_grad; + + h_PCISPH_density_scaling_factor = (float)-1.f / (beta * grad_term); + /* double sum_spiky_dot_val = sum_spiky[0] * sum_val[0] + sum_spiky[1] * sum_val[1] + sum_spiky[2] * sum_val[2]; double bigTerm = -sum_spiky_dot_val - sumDot; @@ -373,9 +394,10 @@ namespace FluidSimulationGPU } double alpha = -1.0 / ((double)beta * bigTerm); - // Store final h_PCISPH_density_scaling_factor = (float)alpha; + */ + printf("PCISPH density scaling factor: alpha = %g\n", h_PCISPH_density_scaling_factor); @@ -440,9 +462,9 @@ namespace FluidSimulationGPU { // Recalculate derived constants float h_H2 = smoothingRadius * smoothingRadius; - float h_POLY6_CONST = 315.0f / (64.0f * h_PI * powf(smoothingRadius, 9)); - float h_SPIKY_GRAD_CONST = -45.0f / (h_PI * powf(smoothingRadius, 6)); - float h_VISC_LAP_CONST = 45.0f / (h_PI * powf(smoothingRadius, 6)); + float h_POLY6_CONST = 315.0f / (64.0f * M_PI * powf(smoothingRadius, 9)); + float h_SPIKY_GRAD_CONST = -45.0f / (M_PI * powf(smoothingRadius, 6)); + float h_VISC_LAP_CONST = 45.0f / (M_PI * powf(smoothingRadius, 6)); // Send constants to CUDA device cudaMemcpyToSymbol(PARTICLE_MASS, &particleMass, sizeof(float)); diff --git a/src/cuda/pcisph_kernels.cu b/src/cuda/pcisph_kernels.cu index a552c0eda645c876bd84995df8b1382b604428f2..c7fff7ada52d51d650cce2c002256e7235d4bb1e 100644 --- a/src/cuda/pcisph_kernels.cu +++ b/src/cuda/pcisph_kernels.cu @@ -209,11 +209,11 @@ __global__ void computeDensityError( } } - float density_variation = density_i + b_density_i - REST_DENSITY; + float density_variation = (density_i + b_density_i) - REST_DENSITY; //float density_variation = max(0.0f, density_i + b_density_i - REST_DENSITY); //densities[idx] = density; density_errors[idx] = density_variation; - particle_pressure[idx] += density_variation * d_PCISPH_density_scaling_factor; + particle_pressure[idx] += -density_variation * K_PCI; } @@ -230,7 +230,7 @@ __global__ void updatePressures( if (idx < numParticles) { float deltaP = -density_errors[idx] * A; - pressures[idx] += deltaP; + pressures[idx] = deltaP; } } @@ -258,7 +258,8 @@ __global__ void computePressureForces( float P_i = pressure[idx]; float dens_i = densities[idx]; - float pfc_i = P_i / (dens_i* dens_i); + //float pfc_i = P_i / (dens_i * dens_i); + float pfc_i = P_i / (REST_DENSITY * REST_DENSITY); float3 fPressure = make_float3(0.0f, 0.0f, 0.0f); @@ -303,7 +304,8 @@ __global__ void computePressureForces( float gradW = spikyKernelGrad(dist); float3 gradWij = gradW * (r / dist); // fix ^ to get r with sign //float factor = - (P_i + P_j) / (REST_DENSITY * REST_DENSITY); - float pfc = pfc_i + pfc_j; + //float pfc = pfc_i + pfc_j; + float pfc = 2.f * pfc_i; fPressure += -pfc * gradWij * PARTICLE_MASS * PARTICLE_MASS; } } @@ -332,7 +334,8 @@ __global__ void computePressureForces( float3 gradWij = gradW * (r/dist); // Using Equation (85): p_i and p_b = p_i; densities: ρ_i and ρ_0 //float factor = - (deltaP_i + deltaP_j) / (REST_DENSITY * REST_DENSITY); - float pfc = pfc_i + pfc_j; + //float pfc = pfc_i + pfc_j; + float pfc = 2.f * pfc_i; b_fPressure += -pfc * gradWij * PARTICLE_MASS * PARTICLE_MASS; } } @@ -549,7 +552,7 @@ __global__ void computeViscosityAndExternalForcesSurfaceTension( float rho_i = densities[idx]; // Initialize the final “other” forces for particle i - float3 fGravity = make_float3(0.f, -0.f, 0.f) * PARTICLE_MASS; // e.g. (0, -9.8f, 0)*mass + float3 fGravity = make_float3(0.f, -5.f, 0.f) * PARTICLE_MASS; // e.g. (0, -9.8f, 0)*mass float3 fViscosity = make_float3(0.f, 0.f, 0.f); float3 fCohesion = make_float3(0.f, 0.f, 0.f); float3 fCurvature = make_float3(0.f, 0.f, 0.f); @@ -635,7 +638,7 @@ __global__ void computeViscosityAndExternalForcesSurfaceTension( float3 fSurface = (fCohesion + fCurvature); - float3 fOther = fGravity + fViscosity + fSurface; + float3 fOther = fGravity; //+ fViscosity; // + fSurface; force_other[idx] = fOther; } @@ -683,6 +686,62 @@ __global__ void enforceBoundary( } } + +// Function to predict velocities and positions +__global__ void predictVelNPos( + float3* velocities, + float3* velocities_pred, + float3* positions, + float3* positions_pred, + float3* force_other, + float timeStep, + int numParticles +) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= numParticles) return; + float max_vel = 50.f; + // Compute acceleration + float3 acc = force_other[idx] / PARTICLE_MASS; + + // Predict velocity with clamping + //float3 vel_pred = clamp_length(velocities[idx] + acc * timeStep, max_vel); + float3 vel_pred = velocities[idx] + acc * timeStep; + + // Predict position + float3 pos_pred = positions[idx] + vel_pred * timeStep; + + + velocities_pred[idx] = vel_pred; + positions_pred[idx] = pos_pred; +} + +// Function to update velocities and positions based on pressure forces +__global__ void updatePredVelNPos( + float3* velocities_pred, + float3* positions_pred, + float3* pressure_forces, + float timeStep, + int numParticles +) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= numParticles) return; + float max_vel = 50.f; + // Compute acceleration + float3 acc = pressure_forces[idx] / PARTICLE_MASS; + + // Predict velocity with clamping + //float3 vel_pred = clamp_length(velocities[idx] + acc * timeStep, max_vel); + float3 vel_pred = velocities_pred[idx] + acc * timeStep; + + // Predict position + float3 pos_pred = positions_pred[idx] + vel_pred * timeStep; + + velocities_pred[idx] = vel_pred; + positions_pred[idx] = pos_pred; +} + // Kernel to predict velocities and positions with boundary handling __global__ void predictVelocitiesPositions( float3* velocities, diff --git a/src/cuda/pcisph_update.cu b/src/cuda/pcisph_update.cu index 3ec48ca0777898ac8746a042785c79d74de8379b..0cd8244eda1118215c263523d781c3eba8de590d 100644 --- a/src/cuda/pcisph_update.cu +++ b/src/cuda/pcisph_update.cu @@ -23,7 +23,7 @@ namespace FluidSimulationGPU int blocksPerGrid = (NUM_PARTICLES + threadsPerBlock - 1) / threadsPerBlock; // Time step should be small but we can run updateParticles multiple times during display - float timeStep = 0.001f; + float timeStep = 0.004f; // -------------------- Neighbor Search -------------------- int gridSizeX, gridSizeY, gridSizeZ; @@ -57,7 +57,7 @@ namespace FluidSimulationGPU // cudaDeviceSynchronize(); // Initialize pressure increments to zero - cudaMemset(d_pressure_force, 0, NUM_PARTICLES * sizeof(float)); + cudaMemset(d_pressure_force, 0, NUM_PARTICLES * sizeof(float3)); cudaMemset(d_particle_pressure, 0, NUM_PARTICLES * sizeof(float)); // Compute densities and density errors, and per-particle sumGradW2 @@ -100,6 +100,17 @@ namespace FluidSimulationGPU gridSizeX, gridSizeY, gridSizeZ); cudaDeviceSynchronize(); + predictVelNPos<<<blocksPerGrid, threadsPerBlock>>>( + d_velocities, + d_velocities_pred, + d_positions, + d_positions_pred, + d_force_other, + timeStep, + NUM_PARTICLES + ); + cudaDeviceSynchronize(); + // Reduce sum_gradW2 over all particles thrust::device_ptr<float> dev_sumGradW2_ptr(d_sum_gradW2); float totalSumGradW2 = thrust::reduce(dev_sumGradW2_ptr, dev_sumGradW2_ptr + NUM_PARTICLES, 0.0f, thrust::plus<float>()); @@ -113,43 +124,36 @@ namespace FluidSimulationGPU float m = h_PARTICLE_MASS; float rho0 = h_REST_DENSITY; float B = 2.0f * timeStep * timeStep * m * m / (rho0 * rho0); - float K_PCI = -1 / (B * totalSumGradW2); + // float K_PCI = -1 / (B * totalSumGradW2); + float K_PCI = -0.5f; + + printf("K_PCI: %f\n", K_PCI); + + correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( + d_positions_pred, + d_velocities_pred, + d_boundary_positions, + d_boundary_normals, + d_cellIndices, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ, + r0, + normalDamping, + epsilon, + delta); + cudaDeviceSynchronize(); + // ------------ Iterative Pressure Correction Loop -------------------- - const int maxIterations = 7; + const int maxIterations = 5; for (int iter = 0; iter < maxIterations; ++iter) { // Launch the boundary correction kernel - predictVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( - d_velocities, - d_velocities_pred, - d_positions, - d_positions_pred, - d_pressure_force, - d_force_other, - timeStep, - NUM_PARTICLES); - cudaDeviceSynchronize(); - - correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( - d_positions_pred, - d_velocities_pred, - d_boundary_positions, - d_boundary_normals, - d_cellIndices, - d_boundaryCellStart, - d_boundaryCellEnd, - NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ, - r0, - normalDamping, - epsilon, - delta); - cudaDeviceSynchronize(); - // Compute desnity and update pressure computeDensityError<<<blocksPerGrid, threadsPerBlock>>>( d_positions_pred, @@ -188,46 +192,37 @@ namespace FluidSimulationGPU gridSizeX, gridSizeY, gridSizeZ); cudaDeviceSynchronize(); - // updateVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( - // d_velocities_pred, - // d_positions_pred, - // d_pressure_force, - // timeStep, - // NUM_PARTICLES); - // cudaDeviceSynchronize(); - } - - updateVelocitiesPositions<<<blocksPerGrid, threadsPerBlock>>>( - d_positions, - d_velocities, - d_pressure_force, - d_force_other, - timeStep, - NUM_PARTICLES); - - cudaDeviceSynchronize(); + updatePredVelNPos<<<blocksPerGrid, threadsPerBlock>>>( + d_velocities_pred, + d_positions_pred, + d_pressure_force, + timeStep, + NUM_PARTICLES); + cudaDeviceSynchronize(); + + correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( + d_positions_pred, + d_velocities_pred, + d_boundary_positions, + d_boundary_normals, + d_cellIndices, + d_boundaryCellStart, + d_boundaryCellEnd, + NUM_PARTICLES, + gridSizeX, gridSizeY, gridSizeZ, + r0, + normalDamping, + epsilon, + delta); - correctBoundaryPenetrations<<<blocksPerGrid, threadsPerBlock>>>( - d_positions, - d_velocities, - d_boundary_positions, - d_boundary_normals, - d_cellIndices, - d_boundaryCellStart, - d_boundaryCellEnd, - NUM_PARTICLES, - gridSizeX, gridSizeY, gridSizeZ, - r0, - normalDamping, - epsilon, - delta); + cudaDeviceSynchronize(); + } - cudaDeviceSynchronize(); // -------------------- Finalization -------------------- - // cudaMemcpy(d_velocities, d_velocities_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); - // cudaMemcpy(d_positions, d_positions_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); + cudaMemcpy(d_velocities, d_velocities_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); + cudaMemcpy(d_positions, d_positions_pred, NUM_PARTICLES * sizeof(float3), cudaMemcpyDeviceToDevice); // Enforce boundary conditions diff --git a/src/settings/constants.h b/src/settings/constants.h index 7b04799c4e0cf157aef724e4e63270c73a273709..daaa916f3df108e30ce3c10bf28ddbe63b53cf1b 100644 --- a/src/settings/constants.h +++ b/src/settings/constants.h @@ -2,28 +2,35 @@ #ifndef CONSTANTS_H #define CONSTANTS_H -const float h_BOUND_X_MIN = -1.0f; -const float h_BOUND_X_MAX = 1.0f; +// const float h_BOUND_X_MIN = -1.0f; +// const float h_BOUND_X_MAX = 1.0f; +// const float h_BOUND_Y_MIN = 0.0f; +// const float h_BOUND_Y_MAX = 2.5f; +// const float h_BOUND_Z_MIN = -1.0f; +// const float h_BOUND_Z_MAX = 1.0f; +const float h_BOUND_X_MIN = -6.0f; +const float h_BOUND_X_MAX = 12.0f; const float h_BOUND_Y_MIN = 0.0f; -const float h_BOUND_Y_MAX = 2.5f; -const float h_BOUND_Z_MIN = -1.0f; -const float h_BOUND_Z_MAX = 1.0f; +const float h_BOUND_Y_MAX = 10.0f; +const float h_BOUND_Z_MIN = -4.0f; +const float h_BOUND_Z_MAX = 4.0f; + const float h_GAS_CONSTANT = 0.02f; -const float h_VISCOSITY = 0.0035105f; +const float h_VISCOSITY = 0.35105f; const float h_PARTICLE_MASS = 0.04f; -const float h_REST_DENSITY = 1000.0f; +const float h_REST_DENSITY = 800.0f; const float h_PI = 3.14159265358979323846f; const float h_H = 0.25f; // Smoothing radius const int NUM_PARTICLES = 50000; -const float h_POINT_SCALE = 4.0f; -const float h_POINT_RADIUS = 10.0f; +const float h_POINT_SCALE = 18.0f; +const float h_POINT_RADIUS = 12.0f; -const float h_particleRadius = 0.008; +const float h_particleRadius = 0.0625; const float h_surcface_tension_coeff = 0.0032; const float h_surface_tension_term = 0.035; diff --git a/src/spawn_particles.cpp b/src/spawn_particles.cpp index b8decd4b17d3f1c2ca7a92fe41f6424b112fbb93..f63e9d052202a9499d59e010e0ad918ac9fa32f3 100644 --- a/src/spawn_particles.cpp +++ b/src/spawn_particles.cpp @@ -50,7 +50,7 @@ void initParticles( { case SimulationMode::OneLargeCube: particlesPerAxis = static_cast<int>(ceil(pow(NUM_PARTICLES, 1.0f / 3.0f))); - index = initializeCube(0, particlesPerAxis, 0.1f, 0.1f, 0.1f); + index = initializeCube(0, particlesPerAxis, 1.0f, 4.0f, 1.0f); break; case SimulationMode::TwoSmallCubes: