#include <stdio.h>
#include <cutil_inline.h> #include <cuda.h>
#include "vec.h"
const int WIDTH = 256; const int HEIGHT = 256; const int NSUBSAMPLES = 2; const int NAO_SAMPLES = 8;
__device__ const float M_PI = 3.1415926535; __device__ const float NoHit = 1e16;
typedef struct { vec center; float radius; } Sphere;
typedef struct { float a, b, c, d; } Plane;
typedef struct { float t; vec p; vec n; } Isect;
__device__ __host__ float sq(float x) { return x * x; }
__device__ __host__ void ray_sphere_intersect(Isect* isect, const vec* orig, const vec* dir, const vec* sphere_center, float sphere_radius) { vec v; vsub(&v, orig, sphere_center); float dd = sq(vdot(&v, dir)) - (vsqlength(&v) - sq(sphere_radius)); if (dd >= 0) { float sqdd = sqrt(dd); float t1 = -vdot(&v, dir) - sqdd; if (0 < t1 && t1 < isect->t) { isect->t = t1;
vec tmp; vadd(&isect->p, orig, vscale(&tmp, dir, t1));
vnormalize(vsub(&isect->n, &isect->p, sphere_center)); } } }
__device__ __host__ void ray_plane_intersect(Isect* isect, const vec* orig, const vec* dir, float a, float b, float c, float d) { float h = orig->x * a + orig->y * b + orig->z * c + d; if (h >= 0) { float dot = dir->x * a + dir->y * b + dir->z * c; if (dot < 0) { float t = h / -dot; if (t < isect->t) { isect->t = t;
vec tmp; vadd(&isect->p, orig, vscale(&tmp, dir, t));
isect->n.x = a; isect->n.y = b; isect->n.z = c; } } } }
__device__ __host__ void orthoBasis(vec* basis, const vec& n) { basis[2] = n; basis[1].x = basis[1].y = basis[1].z = 0;
if (n.x < 0.6 && n.x > -0.6) { basis[1].x = 1; } else if (n.y < 0.6 && n.y > -0.6) { basis[1].y = 1; } else if (n.z < 0.6 && n.z > -0.6) { basis[1].z = 1; } else { basis[1].x = 1; }
vnormalize(vcross(&basis[0], &basis[1], &basis[2])); vnormalize(vcross(&basis[1], &basis[2], &basis[0])); }
__device__ const Sphere spheres[3] = { { { -2.0, 0.0, -3.5 }, 0.5 }, { { -0.5, 0.0, -3.0 }, 0.5 }, { { 1.0, 0.0, -2.2 }, 0.5 }, };
__device__ const Plane plane = { 0, 1, 0, 0.5 };
__device__ __host__ void ambient_occlusion(vec* col, const Isect* isect, float* rands) { const float eps = 0.0001; const int ntheta = NAO_SAMPLES; const int nphi = NAO_SAMPLES;
vec p; vadd(&p, &isect->p, vscale(&p, &isect->n, eps));
vec basis[3]; orthoBasis(basis, isect->n);
int occludeCount = 0; for (int i=0; i < ntheta * nphi; ++i) { float theta = sqrt(rands[i*2+0]); float phi = 2 * M_PI * rands[i*2+1];
float x = cos(phi) * theta; float y = sin(phi) * theta; float z = sqrt(1 - theta * theta);
float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x; float ry = x * basis[0].y + y * basis[1].y + z * basis[2].y; float rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;
vec raydir = { rx, ry, rz }; Isect occIsect; occIsect.t = NoHit; ray_sphere_intersect(&occIsect, &p, &raydir, &spheres[0].center, spheres[0].radius); ray_sphere_intersect(&occIsect, &p, &raydir, &spheres[1].center, spheres[1].radius); ray_sphere_intersect(&occIsect, &p, &raydir, &spheres[2].center, spheres[2].radius); ray_plane_intersect(&occIsect, &p, &raydir, plane.a, plane.b, plane.c, plane.d); if (occIsect.t < NoHit) occludeCount += 1; }
float c = (float)(ntheta * nphi - occludeCount) / (ntheta * nphi); col->x = c; col->y = c; col->z = c; }
__global__ void render(float* fimg, float* rands) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x < WIDTH && y < HEIGHT) { vec col = { 0, 0, 0 }; for (int v=0; v<NSUBSAMPLES; ++v) { for (int u=0; u<NSUBSAMPLES; ++u) { vec raydir; raydir.x = (x + (u / (float)NSUBSAMPLES) - (WIDTH / 2.0)) / (WIDTH / 2.0); raydir.y = -(y + (v / (float)NSUBSAMPLES) - (HEIGHT / 2.0)) / (HEIGHT / 2.0); raydir.z = -1; vnormalize(&raydir);
const vec eyepos = { 0, 0, 0 };
Isect isect; isect.t = NoHit; ray_sphere_intersect(&isect, &eyepos, &raydir, &spheres[0].center, spheres[0].radius); ray_sphere_intersect(&isect, &eyepos, &raydir, &spheres[1].center, spheres[1].radius); ray_sphere_intersect(&isect, &eyepos, &raydir, &spheres[2].center, spheres[2].radius); ray_plane_intersect(&isect, &eyepos, &raydir, plane.a, plane.b, plane.c, plane.d); if (isect.t < NoHit) { vec c; ambient_occlusion(&c, &isect, rands); vadd(&col, &col, &c); } } }
fimg[(y*WIDTH+x)*3+0] = col.x * (1.0 / sq(NSUBSAMPLES)); fimg[(y*WIDTH+x)*3+1] = col.y * (1.0 / sq(NSUBSAMPLES)); fimg[(y*WIDTH+x)*3+2] = col.z * (1.0 / sq(NSUBSAMPLES)); } }
float clamp(float x, float min, float max) { return x < min ? min : x > max ? max : x; }
bool saveppm(const char* fn, int w, int h, const unsigned char* img) { FILE* fp = fopen(fn, "wb"); if (fp == NULL) { return false; } else { fprintf(fp,"P6\n%d %d\n%d\n", w, h, 255); fwrite(img, w * h * 3, 1, fp); return true; } }
int main() { float* d_fimg; float* d_rands;
cutilSafeCall(cudaMalloc((void**) &d_fimg, sizeof(float) * (WIDTH * HEIGHT) * 3)); cutilSafeCall(cudaMalloc((void**) &d_rands, sizeof(float) * NAO_SAMPLES * NAO_SAMPLES * 2));
{ float h_rands[NAO_SAMPLES * NAO_SAMPLES * 2]; for (int i=0; i<NAO_SAMPLES * NAO_SAMPLES * 2; ++i) h_rands[i] = (float)rand() / (float)RAND_MAX; cutilSafeCall( cudaMemcpy( d_rands, h_rands, sizeof(h_rands), cudaMemcpyHostToDevice) ); }
dim3 dimBlock(16, 16); dim3 dimGrid( (WIDTH + dimBlock.x - 1) / dimBlock.x, (HEIGHT + dimBlock.y - 1) / dimBlock.y); render<<<dimGrid, dimBlock>>>(d_fimg, d_rands);
cutilCheckMsg("Kernel execution failed");
{ const int N = WIDTH * HEIGHT * 3; float* result = new float[N]; cutilSafeCall( cudaMemcpy( result, d_fimg, sizeof(float) * N, cudaMemcpyDeviceToHost) );
unsigned char* img = new unsigned char[N]; for (int i=0; i<N; ++i) img[i] = (unsigned char)(clamp(result[i], 0, 1) * 255.99f); saveppm("ao.ppm", WIDTH, HEIGHT, img);
delete[] result; delete[] img; }
cutilSafeCall(cudaFree(d_fimg)); cutilSafeCall(cudaFree(d_rands)); }
|