diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fefb344 --- /dev/null +++ b/.gitignore @@ -0,0 +1,58 @@ +# Folders # +################### +PROJ1_WIN/ipch/* +PROJ1_WIN/Debug/* +PROJ1_WIN/Debug (v5.5)/* +PROJ1_WIN/Release/* +PROJ1_WIN/565Pathtracer/Debug/* +PROJ1_WIN/565Pathtracer/Debug (v5.5)/* +PROJ1_WIN/565Pathtracer/Release/* +PROJ1_WIN/565Pathtracer/Win32/* + +# Misc # +################### +*.pdf +*.suo +*.user + +# Compiled source # +################### +*.com +*.class +*.dll +*.exe +*.o +*.so + +# Packages # +############ +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso +*.jar +*.rar +*.tar +*.zip + +# Logs and databases # +###################### +*.log +*.sql +*.sqlite +*.sdf +*.pdb + +# OS generated files # +###################### +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db +*.opensdf +*.ipch diff --git a/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj b/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj index 4515c57..1ad0bec 100755 --- a/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj +++ b/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj @@ -44,6 +44,9 @@ + + + {FF21CA49-522E-4E86-B508-EE515B248FC4} Win32Proj @@ -202,4 +205,4 @@ - + \ No newline at end of file diff --git a/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj.filters b/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj.filters index d49ad9c..2a7dba3 100755 --- a/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj.filters +++ b/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj.filters @@ -1,42 +1,83 @@  - - - - - stb_image stb_image + + Source + + + Source + + + Source + + + Source + + + Source + - - - - - - - - - - stb_image stb_image - - - + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Source + + + Source + {011aa553-95e8-4e59-b7ff-1bb89aebe21d} + + {1235a944-4608-4364-a5d4-5fc5408f61ca} + + + {df94487e-df64-4170-b341-0725bfbb39a9} + + + + + Source + + + + \ No newline at end of file diff --git a/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj.user b/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj.user index dfd9f6c..7f6b7fb 100755 --- a/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj.user +++ b/PROJ1_WIN/565Pathtracer/565Pathtracer.vcxproj.user @@ -16,4 +16,12 @@ scene=../../scenes/sampleScene.txt WindowsLocalDebugger - + + scene=../../scenes/sampleScene.txt + WindowsLocalDebugger + + + scene=../../scenes/sampleScene.txt + WindowsLocalDebugger + + \ No newline at end of file diff --git a/PROJ1_WIN/565Pathtracer/Mytry02.png b/PROJ1_WIN/565Pathtracer/Mytry02.png new file mode 100644 index 0000000..cf30251 Binary files /dev/null and b/PROJ1_WIN/565Pathtracer/Mytry02.png differ diff --git a/PROJ1_WIN/565Pathtracer/test.0.png b/PROJ1_WIN/565Pathtracer/test.0.png new file mode 100644 index 0000000..e1066aa Binary files /dev/null and b/PROJ1_WIN/565Pathtracer/test.0.png differ diff --git a/Project2-PerfAnalysis.docx b/Project2-PerfAnalysis.docx new file mode 100644 index 0000000..bd83059 Binary files /dev/null and b/Project2-PerfAnalysis.docx differ diff --git a/README.md b/README.md index 1e36dc5..9054cdf 100755 --- a/README.md +++ b/README.md @@ -1,147 +1,95 @@ ------------------------------------------------------------------------------- CIS565: Project 2: CUDA Pathtracer ------------------------------------------------------------------------------- -Fall 2013 -------------------------------------------------------------------------------- -Due Wednesday, 10/02/13 -------------------------------------------------------------------------------- +For Project 2, I extended my raytracer into a full-blown pathtracer. The effort was made +easier by the fact that much of the work had been done in the raytracer phase itself, +including implementation of the geometrical intersection tests. Antialiasing and Motion Blur, +which I implemented for the raytracer (but did not make it in time for submission), +became default components of the pathtracer and they satisfy the "two extra features" requirement. ------------------------------------------------------------------------------- -NOTE: +PATH TRACING: ------------------------------------------------------------------------------- -This project requires an NVIDIA graphics card with CUDA capability! Any card after the Geforce 8xxx series will work. If you do not have an NVIDIA graphics card in the machine you are working on, feel free to use any machine in the SIG Lab or in Moore100 labs. All machines in the SIG Lab and Moore100 are equipped with CUDA capable NVIDIA graphics cards. If this too proves to be a problem, please contact Patrick or Liam as soon as possible. +As in the case of the raytracer, rays are projected into the scene through the projection plane. The ray bounces +around in the scene, accumulating colours of all the objects that it hits, eventually either hitting the light, +flying off into the darkness behind the camera or dying after x Bounces. If the ray hits the light, +it will contribute its colour to the pixel through which it was traced. Otherwise, it contributes noise (black). When +we do this a sufficient number of times, we get a result close to what the ground truth would be. ------------------------------------------------------------------------------- -INTRODUCTION: +IMPLEMENTATION DETAILS: ------------------------------------------------------------------------------- -In this project, you will extend your raytracer from Project 1 into a full CUDA based global illumination pathtracer. - -For this project, you may either choose to continue working off of your codebase from Project 1, or you may choose to use the included basecode in this repository. The basecode for Project 2 is the same as the basecode for Project 1, but with some missing components you will need filled in, such as the intersection testing and camera raycasting methods. +In contrast to the raytracer, there are two crucial differences: +i. Rays are parallelized, not the pixels. This means that a thread computes the pixel colour contribution of a +single ray. +ii. Rays can bounce around in the scene, upto a certain maximum number of bounces. Russian roulette is not employed +to determine when a ray dies. This makes this pathtracer more heavily biased than others. -How you choose to extend your raytracer into a pathtracer is a fairly open-ended problem; the supplied basecode is meant to serve as one possible set of guidelines for doing so, but you may choose any approach you want in your actual implementation, including completely scrapping the provided basecode in favor of your own from-scratch solution. +Unlike implementations of most of my peers, my pathtracer renders the image, accumulating +the colours in the colour buffer as it goes and outputs a single, final image to the GLUT/GLFW window. +Because of this, effects like Antialiasing, Motion Blur, Depth of Field or any other effect where the pixel +colour results from averaging together distinct values from many iterations are "free" for me, just like in +the case of the raytracer. -------------------------------------------------------------------------------- -CONTENTS: -------------------------------------------------------------------------------- -The Project2 root directory contains the following subdirectories: - -* src/ contains the source code for the project. Both the Windows Visual Studio solution and the OSX makefile reference this folder for all source; the base source code compiles on OSX and Windows without modification. -* scenes/ contains an example scene description file. -* renders/ contains two example renders: the raytraced render from Project 1 (GI_no.bmp), and the same scene rendered with global illumination (GI_yes.bmp). -* PROJ1_WIN/ contains a Windows Visual Studio 2010 project and all dependencies needed for building and running on Windows 7. -* PROJ1_OSX/ contains a OSX makefile, run script, and all dependencies needed for building and running on Mac OSX 10.8. -* PROJ1_NIX/ contains a Linux makefile for building and running on Ubuntu - 12.04 LTS. Note that you will need to set the following environment - variables: - - - PATH=$PATH:/usr/local/cuda-5.5/bin - - LD_LIBRARY_PATH=/usr/local/cuda-5.5/lib64:/lib - -The projects build and run exactly the same way as in Project0 and Project1. +For antialiasing, I'm sampling the scene at a rate of 8 samples per pixel. ------------------------------------------------------------------------------- -REQUIREMENTS: +FEATURES: ------------------------------------------------------------------------------- -In this project, you are given code for: - -* All of the basecode from Project 1, plus: -* Intersection testing code for spheres and cubes -* Code for raycasting from the camera +Current +------- +As with the raytracer, the pathtracer supports sphere and cube primitives. It implements all of the required +features and the following optional features: -You will need to implement the following features. A number of these required features you may have already implemented in Project 1. If you have, you are ahead of the curve and have less work to do! +* Antialiasing (Supersampled at 8x) +* Refraction with Fresnel reflectance +* Motion blur (Translational) +* My own stream compactor (a CPU/GPU hybrid; more on this below) +* Diffuse reflectance -* Full global illumination (including soft shadows, color bleeding, etc.) by pathtracing rays through the scene. -* Properly accumulating emittance and colors to generate a final image -* Supersampled antialiasing -* Parallelization by ray instead of by pixel via stream compaction -* Perfect specular reflection - -You are also required to implement at least two of the following features. Some of these features you may have already implemented in Project 1. If you have, you may NOT resubmit those features and instead must pick two new ones to implement. - -* Additional BRDF models, such as Cook-Torrance, Ward, etc. Each BRDF model may count as a separate feature. -* Texture mapping -* Bump mapping -* Translational motion blur -* Fresnel-based Refraction, i.e. glass -* OBJ Mesh loading and rendering without KD-Tree -* Interactive camera -* Integrate an existing stackless KD-Tree library, such as CUKD (https://github.com/unvirtual/cukd) -* Depth of field - -Alternatively, implementing just one of the following features can satisfy the "pick two" feature requirement, since these are correspondingly more difficult problems: - -* Physically based subsurface scattering and transmission -* Implement and integrate your own stackless KD-Tree from scratch. -* Displacement mapping -* Deformational motion blur - -As yet another alternative, if you have a feature or features you really want to implement that are not on this list, let us know, and we'll probably say yes! +In addition, the submission includes code for Texture Mapping. However, this code isn't ready for primetime yet. ------------------------------------------------------------------------------- -NOTES ON GLM: +STREAM COMPACTION ------------------------------------------------------------------------------- -This project uses GLM, the GL Math library, for linear algebra. You need to know two important points on how GLM is used in this project: +I implemented stream compaction on my own using the Naive Parallel Scan method discussed by Patrick in class. +However, even though there's nothing apparently wrong about my implementation, the scan fails for indices beyond +65536. It so happens that log2 (65536) = 16 which is the size of a half-warp, and the algorithm involves a thread +accessing a location 2^d - 1 spaces before its own index. I highly suspect something is amiss here. But, it could +even turn out to be a problem with the lab machines (which have old cards with ancient drivers). -* In this project, indices in GLM vectors (such as vec3, vec4), are accessed via swizzling. So, instead of v[0], v.x is used, and instead of v[1], v.y is used, and so on and so forth. -* GLM Matrix operations work fine on NVIDIA Fermi cards and later, but pre-Fermi cards do not play nice with GLM matrices. As such, in this project, GLM matrices are replaced with a custom matrix struct, called a cudaMat4, found in cudaMat4.h. A custom function for multiplying glm::vec4s and cudaMat4s is provided as multiplyMV() in intersections.h. +This failure was giving an incorrect result when I was using the GPU solely for stream compaction +(see below for screenshot). This necessitated using a hybrid approach, where the CPU would do the exclusive scan and +the GPU would perform the actual compaction. As noted in the performance analysis document, this causes a slight drop +in performance. -------------------------------------------------------------------------------- -README -------------------------------------------------------------------------------- -All students must replace or augment the contents of this Readme.md in a clear -manner with the following: - -* A brief description of the project and the specific features you implemented. -* At least one screenshot of your project running. -* A 30 second or longer video of your project running. To create the video you - can use http://www.microsoft.com/expression/products/Encoder4_Overview.aspx -* A performance evaluation (described in detail below). +Not having nSight installed on lab machines makes the whole process equivalent to shooting in the dark. If it hits, +well and good. Otherwise, hard luck. Till now, it's been the latter. ------------------------------------------------------------------------------- -PERFORMANCE EVALUATION +SCREENSHOTS ------------------------------------------------------------------------------- -The performance evaluation is where you will investigate how to make your CUDA -programs more efficient using the skills you've learned in class. You must have -performed at least one experiment on your code to investigate the positive or -negative effects on performance. - -One such experiment would be to investigate the performance increase involved -with adding a spatial data-structure to your scene data. - -Another idea could be looking at the change in timing between various block -sizes. - -A good metric to track would be number of rays per second, or frames per -second, or number of objects displayable at 60fps. - -We encourage you to get creative with your tweaks. Consider places in your code -that could be considered bottlenecks and try to improve them. - -Each student should provide no more than a one page summary of their -optimizations along with tables and or graphs to visually explain any -performance differences. +This is the best image I have, rendered with 5000 iterations. Unfortunately, it has got a lot of artifacts:
+
+Artifacts aside, the glass looks amazingly real. The diffuse reflectance on the gold sphere in the centre also looks good.
+The same scene with 2000 iterations:
+
+
+Incorrect rendering of the same scene with full-GPU stream compaction:
+
+The above two images were rendered at around 3000 iterations. As is evident, when the iteration count goes up, so does the +artifacts, which has lead me to believe that these are some sort of floating point accumulation errors. ------------------------------------------------------------------------------- -THIRD PARTY CODE POLICY +VIDEO ------------------------------------------------------------------------------- -* Use of any third-party code must be approved by asking on the Google group. If it is approved, all students are welcome to use it. Generally, we approve use of third-party code that is not a core part of the project. For example, for the ray tracer, we would approve using a third-party library for loading models, but would not approve copying and pasting a CUDA function for doing refraction. -* Third-party code must be credited in README.md. -* Using third-party code without its approval, including using another student's code, is an academic integrity violation, and will result in you receiving an F for the semester. +Unfortunately, I wasn't able to use the laptop I used to make a video for the raytracer to run the pathtracer, +because some of my cudaMalloc calls were erroring out without rhyme or reason. The SIG and Moore Lab machines don't +have any video capturing/encoding software, and my own laptop has an AMD graphics card. So, a video could not be +prepared for this. ------------------------------------------------------------------------------- -SELF-GRADING -------------------------------------------------------------------------------- -* On the submission date, email your grade, on a scale of 0 to 100, to Liam, liamboone+cis565@gmail.com, with a one paragraph explanation. Be concise and realistic. Recall that we reserve 30 points as a sanity check to adjust your grade. Your actual grade will be (0.7 * your grade) + (0.3 * our grade). We hope to only use this in extreme cases when your grade does not realistically reflect your work - it is either too high or too low. In most cases, we plan to give you the exact grade you suggest. -* Projects are not weighted evenly, e.g., Project 0 doesn't count as much as the path tracer. We will determine the weighting at the end of the semester based on the size of each project. - +PERFORMANCE ANALYSIS ------------------------------------------------------------------------------- -SUBMISSION -------------------------------------------------------------------------------- -As with the previous project, you should fork this project and work inside of your fork. Upon completion, commit your finished project back to your fork, and make a pull request to the master repository. -You should include a README.md file in the root directory detailing the following - -* A brief description of the project and specific features you implemented -* At least one screenshot of your project running, and at least one screenshot of the final rendered output of your pathtracer -* Instructions for building and running your project if they differ from the base code -* A link to your blog post detailing the project -* A list of all third-party code used +A performance analysis was performed for this project and can be found in the root folder with the +name Project2-PerfAnalysis. It is a Word Document. diff --git a/perfcomp.txt b/perfcomp.txt new file mode 100644 index 0000000..72834e1 --- /dev/null +++ b/perfcomp.txt @@ -0,0 +1 @@ +Compaction on CPU - 770s (1000 iterations; 1+5 bounces) \ No newline at end of file diff --git a/renders/CPUStreamCompaction.png b/renders/CPUStreamCompaction.png new file mode 100644 index 0000000..c01744a Binary files /dev/null and b/renders/CPUStreamCompaction.png differ diff --git a/renders/FinalRender.png b/renders/FinalRender.png new file mode 100644 index 0000000..44d7aa0 Binary files /dev/null and b/renders/FinalRender.png differ diff --git a/renders/FinalRender_2000Iter.png b/renders/FinalRender_2000Iter.png new file mode 100644 index 0000000..2ba6e79 Binary files /dev/null and b/renders/FinalRender_2000Iter.png differ diff --git a/renders/GPUStreamCompaction.png b/renders/GPUStreamCompaction.png new file mode 100644 index 0000000..c3508b7 Binary files /dev/null and b/renders/GPUStreamCompaction.png differ diff --git a/scenes/sampleScene.old.txt b/scenes/sampleScene.old.txt new file mode 100644 index 0000000..8ee66a6 --- /dev/null +++ b/scenes/sampleScene.old.txt @@ -0,0 +1,225 @@ +MATERIAL 0 //white diffuse +RGB 0.9 0.9 0.9 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 1 //red diffuse +RGB .63 .06 .04 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 2 //green diffuse +RGB .15 .48 .09 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 3 //red glossy +RGB .63 .26 .24 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 2 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 4 //white glossy +RGB 1 1 1 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 2 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 5 //glass +RGB 0 0 0 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 1 +REFRIOR 2.2 +SCATTER 0 +ABSCOEFF .02 5.1 5.7 +RSCTCOEFF 13 +EMITTANCE 0 + +MATERIAL 6 //green glossy +RGB .35 .48 .29 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 2.6 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 7 //light +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 1 + +MATERIAL 8 //light +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 15 + +CAMERA +RES 600 600 +FOVY 25 +ITERATIONS 1 +FILE test.png +frame 0 +EYE 0 4.5 12 +VIEW 0 0 -1 +UP 0 1 0 + +OBJECT 0 +cube +material 0 +frame 0 +TRANS 0 0 0 +ROTAT 0 0 90 +SCALE .01 10 10 +frame 1 +TRANS 0 0 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +OBJECT 1 +cube +material 0 +frame 0 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 +frame 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +OBJECT 2 +cube +material 0 +frame 0 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 +frame 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +OBJECT 3 +cube +material 1 +frame 0 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 +frame 1 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +OBJECT 4 +cube +material 2 +frame 0 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 +frame 1 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +OBJECT 5 +sphere +material 4 +frame 0 +TRANS 0 2 0 +ROTAT 0 180 0 +SCALE 3 3 3 +frame 1 +TRANS 0 2 0 +ROTAT 0 180 0 +SCALE 3 3 3 + +OBJECT 6 +sphere +material 3 +frame 0 +TRANS 2 5 2 +ROTAT 0 180 0 +SCALE 2.5 2.5 2.5 +frame 1 +TRANS 2 5 2 +ROTAT 0 180 0 +SCALE 2.5 2.5 2.5 + +OBJECT 7 +sphere +material 6 +frame 0 +TRANS -2 5 -2 +ROTAT 0 180 0 +SCALE 3 3 3 +frame 1 +TRANS -2 5 -2 +ROTAT 0 180 0 +SCALE 3 3 3 + +OBJECT 8 +cube +material 8 +frame 0 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .3 3 3 +frame 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .3 3 3 diff --git a/scenes/sampleScene.txt b/scenes/sampleScene.txt index 52d079e..d663250 100755 --- a/scenes/sampleScene.txt +++ b/scenes/sampleScene.txt @@ -34,11 +34,11 @@ ABSCOEFF 0 0 0 RSCTCOEFF 0 EMITTANCE 0 -MATERIAL 3 //red glossy +MATERIAL 3 //red glossy - specular reflective RGB .63 .26 .24 SPECEX 0 SPECRGB 1 1 1 -REFL 0 +REFL 1 REFR 0 REFRIOR 2 SCATTER 0 @@ -64,7 +64,7 @@ SPECEX 0 SPECRGB 1 1 1 REFL 0 REFR 1 -REFRIOR 2.2 +REFRIOR 2.8 SCATTER 0 ABSCOEFF .02 5.1 5.7 RSCTCOEFF 13 @@ -106,17 +106,25 @@ ABSCOEFF 0 0 0 RSCTCOEFF 0 EMITTANCE 15 +MATERIAL 9 //gold diffuse reflective +RGB 1.0 0.84 0 +SPECEX 0.85 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + CAMERA -RES 800 800 +RES 600 600 FOVY 25 -ITERATIONS 5000 -FILE test.bmp +ITERATIONS 2000 +FILE test.png frame 0 -EYE 0 4.5 12 -VIEW 0 0 -1 -UP 0 1 0 -frame 1 -EYE 0 4.5 12 +EYE 0 4.5 25 VIEW 0 0 -1 UP 0 1 0 @@ -126,11 +134,7 @@ material 0 frame 0 TRANS 0 0 0 ROTAT 0 0 90 -SCALE .01 10 10 -frame 1 -TRANS 0 0 0 -ROTAT 0 0 90 -SCALE .01 10 10 +SCALE .01 10 10 OBJECT 1 cube @@ -138,11 +142,7 @@ material 0 frame 0 TRANS 0 5 -5 ROTAT 0 90 0 -SCALE .01 10 10 -frame 1 -TRANS 0 5 -5 -ROTAT 0 90 0 -SCALE .01 10 10 +SCALE .01 10 10 OBJECT 2 cube @@ -151,10 +151,6 @@ frame 0 TRANS 0 10 0 ROTAT 0 0 90 SCALE .01 10 10 -frame 1 -TRANS 0 10 0 -ROTAT 0 0 90 -SCALE .01 10 10 OBJECT 3 cube @@ -163,10 +159,6 @@ frame 0 TRANS -5 5 0 ROTAT 0 0 0 SCALE .01 10 10 -frame 1 -TRANS -5 5 0 -ROTAT 0 0 0 -SCALE .01 10 10 OBJECT 4 cube @@ -175,55 +167,51 @@ frame 0 TRANS 5 5 0 ROTAT 0 0 0 SCALE .01 10 10 -frame 1 -TRANS 5 5 0 -ROTAT 0 0 0 -SCALE .01 10 10 -OBJECT 5 +OBJECT 5 // white diffuse motion blur sphere sphere material 4 frame 0 -TRANS 0 2 0 -ROTAT 0 180 0 -SCALE 3 3 3 -frame 1 -TRANS 0 2 0 +TRANS 0 2 1.5 ROTAT 0 180 0 SCALE 3 3 3 -OBJECT 6 +OBJECT 6 // mirror sphere. sphere material 3 frame 0 -TRANS 2 5 2 -ROTAT 0 180 0 -SCALE 2.5 2.5 2.5 -frame 1 -TRANS 2 5 2 +TRANS 3.3 5 0 ROTAT 0 180 0 SCALE 2.5 2.5 2.5 -OBJECT 7 -sphere -material 6 -frame 0 -TRANS -2 5 -2 -ROTAT 0 180 0 -SCALE 3 3 3 -frame 1 -TRANS -2 5 -2 -ROTAT 0 180 0 -SCALE 3 3 3 - -OBJECT 8 +OBJECT 7 // light cube material 8 frame 0 TRANS 0 10 0 ROTAT 0 0 90 SCALE .3 3 3 -frame 1 -TRANS 0 10 0 -ROTAT 0 0 90 -SCALE .3 3 3 + +OBJECT 8 // gold diffuse reflective +sphere +material 9 +frame 0 +TRANS 0 4 -1 +ROTAT 0 180 0 +SCALE 2 2 2 + +OBJECT 9 // red diffuse sphere +sphere +material 1 +frame 0 +TRANS -3.1 5.5 1.2 +ROTAT 0 180 0 +SCALE 2 2 2 + +OBJECT 10 // refractive glass +sphere +material 5 +frame 0 +TRANS -2.5 2 0 +ROTAT 0 0 0 +SCALE 2.6 2.6 2.6 diff --git a/scenes/sampleScene2.txt b/scenes/sampleScene2.txt new file mode 100644 index 0000000..a48e4a9 --- /dev/null +++ b/scenes/sampleScene2.txt @@ -0,0 +1,210 @@ +MATERIAL 0 //white diffuse +RGB 0.9 0.9 0.9 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 1 //red diffuse +RGB .63 .06 .04 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 2 //green diffuse +RGB .15 .48 .09 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 3 //red glossy - specular reflective +RGB .63 .26 .24 +SPECEX 0 +SPECRGB 1 1 1 +REFL 1 +REFR 0 +REFRIOR 2 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 4 //white glossy +RGB 1 1 1 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 2 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 5 //glass +RGB 0 0 0 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 1 +REFRIOR 2.8 +SCATTER 0 +ABSCOEFF .02 5.1 5.7 +RSCTCOEFF 13 +EMITTANCE 0 + +MATERIAL 6 //green glossy +RGB .35 .48 .29 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 2.6 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 7 //light +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 1 + +MATERIAL 8 //light +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 15 + +MATERIAL 9 //gold diffuse reflective +RGB 1.0 0.84 0 +SPECEX 0.85 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +CAMERA +RES 600 600 +FOVY 25 +ITERATIONS 2000 +FILE test.png +frame 0 +EYE 0 4.5 25 +VIEW 0 0 -1 +UP 0 1 0 + +OBJECT 0 +cube +material 0 +frame 0 +TRANS 0 0 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +OBJECT 1 +cube +material 0 +frame 0 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +OBJECT 2 +cube +material 0 +frame 0 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +OBJECT 3 +cube +material 1 +frame 0 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +OBJECT 4 +cube +material 2 +frame 0 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + + +OBJECT 6 // mirror sphere. +sphere +material 3 +frame 0 +TRANS 3.3 5 0 +ROTAT 0 180 0 +SCALE 2.5 2.5 2.5 + +OBJECT 7 // light +cube +material 8 +frame 0 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .3 3 3 + +OBJECT 8 // gold diffuse reflective +sphere +material 9 +frame 0 +TRANS 0 4 -1 +ROTAT 0 180 0 +SCALE 2 2 2 + +OBJECT 9 // red diffuse sphere +sphere +material 1 +frame 0 +TRANS -3.1 5.5 1.2 +ROTAT 0 180 0 +SCALE 2 2 2 + +OBJECT 10 // refractive glass +sphere +material 5 +frame 0 +TRANS -2.5 2 0 +ROTAT 0 0 0 +SCALE 2.6 2.6 2.6 diff --git a/scenes/sampleScene_rt.txt b/scenes/sampleScene_rt.txt new file mode 100644 index 0000000..edf39ae --- /dev/null +++ b/scenes/sampleScene_rt.txt @@ -0,0 +1,190 @@ +MATERIAL 0 //white diffuse +RGB 1 1 1 +SPECEX 30 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 1 //red diffuse +RGB .63 .06 .04 +SPECEX 30 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 2 //green diffuse +RGB .15 .48 .09 +SPECEX 30 +SPECRGB 1 1 1 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 3 //red glossy +RGB .63 .06 .04 +SPECEX 15 +SPECRGB 1 1 1 +REFL 1 +REFR 0 +REFRIOR 2 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 4 //white glossy +RGB 1 1 1 +SPECEX 15 +SPECRGB 1 1 1 +REFL 1 +REFR 0 +REFRIOR 2 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 5 //glass +RGB 0 0 0 +SPECEX 0 +SPECRGB 1 1 1 +REFL 0 +REFR 1 +REFRIOR 2.2 +SCATTER 0 +ABSCOEFF .02 5.1 5.7 +RSCTCOEFF 13 +EMITTANCE 0 + +MATERIAL 6 //green glossy +RGB .15 .48 .09 +SPECEX 15 +SPECRGB 1 1 1 +REFL 1 +REFR 0 +REFRIOR 2.6 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 0 + +MATERIAL 7 //light +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 10 + +MATERIAL 8 //light +RGB 1 1 1 +SPECEX 10 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +SCATTER 0 +ABSCOEFF 0 0 0 +RSCTCOEFF 0 +EMITTANCE 15 + +CAMERA +RES 600 600 +FOVY 25 +ITERATIONS 1 +FILE test.png +frame 0 +EYE 0 4.5 25 +VIEW 0 0 -1 +UP 0 1 0 + +OBJECT 0 +cube +material 0 +frame 0 +TRANS 0 0 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +OBJECT 1 +cube +material 0 +frame 0 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +OBJECT 2 +cube +material 0 +frame 0 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +OBJECT 3 +cube +material 1 +frame 0 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +OBJECT 4 +cube +material 2 +frame 0 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +OBJECT 5 +sphere +material 4 +frame 0 +TRANS 0 2 0 +ROTAT 0 180 0 +SCALE 3 3 3 + +OBJECT 6 +sphere +material 3 +frame 0 +TRANS 2 5 2 +ROTAT 0 180 0 +SCALE 2.5 2.5 2.5 + +OBJECT 7 +sphere +material 6 +frame 0 +TRANS -2 5 -2 +ROTAT 0 180 0 +SCALE 3 3 3 + + +OBJECT 8 +cube +material 8 +frame 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 1 3 \ No newline at end of file diff --git a/src/image.cpp b/src/image.cpp index 67bf157..46b6235 100755 --- a/src/image.cpp +++ b/src/image.cpp @@ -38,7 +38,7 @@ image::~image(){ //------------------------ float image::applyGamma(float f){ - //apply gamma correction, use simple power law gamma for now. + //apply gamma correction, use simple power law gamma for now. TODO: sRGB return pow(f/float(gamma.divisor), gamma.gamma); } diff --git a/src/interactions.h b/src/interactions.h index 6561796..29171ea 100755 --- a/src/interactions.h +++ b/src/interactions.h @@ -8,10 +8,56 @@ #include "intersections.h" +struct Fresnel { + float reflectionCoefficient; + float transmissionCoefficient; +}; + +struct AbsorptionAndScatteringProperties{ + glm::vec3 absorptionCoefficient; + float reducedScatteringCoefficient; +}; + //forward declaration +__host__ __device__ bool calculateScatterAndAbsorption(ray& r, float& depth, AbsorptionAndScatteringProperties& currentAbsorptionAndScattering, glm::vec3& unabsorbedColor, material m, float randomFloatForScatteringDistance, float randomFloat2, float randomFloat3); __host__ __device__ glm::vec3 getRandomDirectionInSphere(float xi1, float xi2); +__host__ __device__ glm::vec3 calculateTransmission(glm::vec3 absorptionCoefficient, float distance); +__host__ __device__ glm::vec3 calculateTransmissionDirection(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR); +__host__ __device__ glm::vec3 calculateReflectionDirection(glm::vec3 normal, glm::vec3 incident); +__host__ __device__ Fresnel calculateFresnel(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR, glm::vec3 reflectionDirection, glm::vec3 transmissionDirection); __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere(glm::vec3 normal, float xi1, float xi2); +//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION +__host__ __device__ glm::vec3 calculateTransmission(glm::vec3 absorptionCoefficient, float distance) { + return glm::vec3(0,0,0); +} + +//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION +__host__ __device__ bool calculateScatterAndAbsorption(ray& r, float& depth, AbsorptionAndScatteringProperties& currentAbsorptionAndScattering, + glm::vec3& unabsorbedColor, material m, float randomFloatForScatteringDistance, float randomFloat2, float randomFloat3){ + return false; +} + +//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION +__host__ __device__ glm::vec3 calculateTransmissionDirection(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR) { + return glm::vec3(0,0,0); +} + +//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION +__host__ __device__ glm::vec3 calculateReflectionDirection(glm::vec3 normal, glm::vec3 incident) { + //nothing fancy here + return glm::vec3(0,0,0); +} + +//TODO (OPTIONAL): IMPLEMENT THIS FUNCTION +__host__ __device__ Fresnel calculateFresnel(glm::vec3 normal, glm::vec3 incident, float incidentIOR, float transmittedIOR, glm::vec3 reflectionDirection, glm::vec3 transmissionDirection) { + Fresnel fresnel; + + fresnel.reflectionCoefficient = 1; + fresnel.transmissionCoefficient = 0; + return fresnel; +} + //LOOK: This function demonstrates cosine weighted random direction generation in a sphere! __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere(glm::vec3 normal, float xi1, float xi2) { @@ -40,11 +86,126 @@ __host__ __device__ glm::vec3 calculateRandomDirectionInHemisphere(glm::vec3 nor } -//TODO: IMPLEMENT THIS FUNCTION +// Given the refractive indices of the materials at intersection, cosine of the incident angle and +// a random number uniformly distributed between 0 and 1, this function returns true if the Fresnel +// reflectance term is greater than or equal to the random number, signifying reflection. Otherwise, +// it will return false, signifying refraction/transmittance. +__host__ __device__ bool calculateFresnelReflectance (float outsideRefIndex, float insideRefIndex, float cosineIncidentAngle, + float uniformRandomBetween01) +{ + float RF0 = (insideRefIndex - outsideRefIndex) / (insideRefIndex + outsideRefIndex); + RF0 = RF0 * RF0; + + //if (cosineIncidentAngle < 0) // External Reflection + //{ + float fresnelRefl = RF0 + (1-RF0)*pow ((1-abs(cosineIncidentAngle)), 5); + + if (uniformRandomBetween01 <= fresnelRefl) + return true; // reflectance + return false; // refraction + //} + //else // Internal Reflection. + //{ + // float sinCritAngle = insideRefIndex / outsideRefIndex; + // float sinIncidentAngle = sqrt (1 - (cosineIncidentAngle * cosineIncidentAngle)); + // if (sinIncidentAngle > sinCritAngle) + // return true; // reflection + // return false; // refraction + //} +} + +//TODO: Done! //Now that you know how cosine weighted direction generation works, try implementing non-cosine (uniform) weighted random direction generation. //This should be much easier than if you had to implement calculateRandomDirectionInHemisphere. __host__ __device__ glm::vec3 getRandomDirectionInSphere(float xi1, float xi2) { - return glm::vec3(0,0,0); + + float cosTheta = 2*xi1 - 1; // Spread out xi1 in [0,1] to [-1, 1]. + float sinTheta = sqrt (1 - cosTheta*cosTheta); + float phi = TWO_PI * xi2; + + return glm::vec3(sinTheta*cos(phi), sinTheta*sin(phi), cosTheta); } +__host__ __device__ glm::vec3 calculateDirectionInLobeAroundNormal (glm::vec3 normal, thrust::default_random_engine rng) +{ + float piBy16 = PI / 16.0; + thrust::uniform_real_distribution angleTheta(0, TWO_PI); + thrust::uniform_real_distribution anglePhi(-piBy16, piBy16); + + glm::vec3 someDirNotNormal; + if ((normal.x < normal.y) && (normal.x < normal.z)) + someDirNotNormal = glm::vec3(1, 0, 0); + else if (normal.y < normal.z) + someDirNotNormal = glm::vec3(0, 1, 0); + else + someDirNotNormal = glm::vec3(0, 0, 1); + + //Use not-normal direction to generate two perpendicular directions + glm::vec3 perpendicularDirection1 = glm::normalize(glm::cross(normal, someDirNotNormal)); + glm::vec3 perpendicularDirection2 = glm::normalize(glm::cross(normal, perpendicularDirection1)); + + return ( cos(anglePhi(rng)) * normal ) + ( sin(anglePhi(rng))*cos(angleTheta(rng)) * perpendicularDirection1 ) + + ( sin(anglePhi(rng))*sin(angleTheta(rng)) * perpendicularDirection2 ); +} + +//TODO (PARTIALLY OPTIONAL): IMPLEMENT THIS FUNCTION +//returns 0 if diffuse scatter, 1 if reflected, 2 if transmitted. +__host__ __device__ int calculateBSDF(ray& r, glm::vec3 intersect, glm::vec3 normal, glm::vec3 emittedColor, + AbsorptionAndScatteringProperties& currentAbsorptionAndScattering, + float randomSeed, glm::vec3& color, glm::vec3& unabsorbedColor, + material m) +{ + int retVal = 0; + r.origin = intersect-0.01f*r.direction; //slightly perturb along normal to avoid self-intersection. + thrust::default_random_engine rng(hash(randomSeed)); + thrust::uniform_real_distribution u01(0, 1); + thrust::uniform_real_distribution u02(0, 1); + + if (m.hasReflective >= 1.0) // specular reflectance + { + r.direction = glm::normalize (reflectRay (r.direction, normal)); + retVal = 1; + } + else if (m.hasRefractive) // Fresnel refractance. + { + float cosIncidentAngle = glm::dot (r.direction, normal); + float insideRefIndex = m.indexOfRefraction; float outsideRefIndex = 1.0; + if (cosIncidentAngle > 0) // If ray going from inside to outside. + { + outsideRefIndex = m.indexOfRefraction; + insideRefIndex = 1.0; + normal = -normal; + } + + if (calculateFresnelReflectance (outsideRefIndex, insideRefIndex, cosIncidentAngle, u01(rng))) + { +// if (cosIncidentAngle > 0) // If ray going from inside to outside. +// normal = -normal; // Flip the normal for reflection. + r.direction = glm::normalize (reflectRay (r.direction, normal)); + retVal = 1; + } + else + { + // As given in Real-Time Rendering, Third Edition, pp. 396. + /*float w = (outsideRefIndex / insideRefIndex) * glm::dot (lightDir, normal); + float k = sqrt (1 + ((w + (outsideRefIndex / insideRefIndex)) * (w - (outsideRefIndex / insideRefIndex)))); + r.direction = (w - k)*normal - (outsideRefIndex / insideRefIndex)*lightDir;*/ + r.direction = glm::normalize (glm::refract (r.direction, normal, outsideRefIndex/insideRefIndex)); + r.origin = intersect+0.01f*r.direction; + retVal = 2; + } + } + else if (m.hasReflective) // m.hasReflective between 0 and 1 signifies diffuse reflectance. + { + r.direction = glm::normalize (calculateDirectionInLobeAroundNormal (normal, rng)); + retVal = 1; + } + else + { + float xi1, xi2; + r.direction = glm::normalize (calculateRandomDirectionInHemisphere (normal, u01 (rng), u02 (rng))); + } + return retVal; +}; + #endif diff --git a/src/intersections.h b/src/intersections.h index a6b9469..500c433 100755 --- a/src/intersections.h +++ b/src/intersections.h @@ -12,16 +12,59 @@ #include "utilities.h" #include -//Some forward declarations +//Self explanatory __host__ __device__ glm::vec3 getPointOnRay(ray r, float t); + +//LOOK: This is a custom function for multiplying cudaMat4 4x4 matrixes with vectors. +//This is a workaround for GLM matrix multiplication not working properly on pre-Fermi NVIDIA GPUs. +//Multiplies a cudaMat4 matrix and a vec4 and returns a vec3 clipped from the vec4 __host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v); +//Overload for doing the same with a cudaMat4 and a vec3. +__host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec3 v, bool isVector = false); + +// Component-wise vector multiply function. Multiplies two vec3s. +__host__ __device__ glm::vec3 multiplyVV(glm::vec3 a, glm::vec3 b); + +// Component-wise vector multiply. Multiplies two vec4s. +__host__ __device__ glm::vec4 multiplyVV(glm::vec4 a, glm::vec4 b); + +//Gets sign of each component of a ray's inverse direction __host__ __device__ glm::vec3 getSignOfRay(ray r); + +//Gets 1/direction for a ray __host__ __device__ glm::vec3 getInverseDirectionOfRay(ray r); + +//TODO: Done! +//Cube intersection test, return -1 if no intersection, otherwise, distance to intersection __host__ __device__ float boxIntersectionTest(staticGeom sphere, ray r, glm::vec3& intersectionPoint, glm::vec3& normal); -__host__ __device__ float boxIntersectionTest(glm::vec3 boxMin, glm::vec3 boxMax, staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal); + +//LOOK: Here's an intersection test example from a sphere. Now you just need to figure out cube and, optionally, triangle. +//Sphere intersection test, return -1 if no intersection, otherwise, distance to intersection __host__ __device__ float sphereIntersectionTest(staticGeom sphere, ray r, glm::vec3& intersectionPoint, glm::vec3& normal); + +//LOOK: Example for generating a random point on an object using thrust. +//Generates a random point on a given cube __host__ __device__ glm::vec3 getRandomPointOnCube(staticGeom cube, float randomSeed); +//Handy dandy little hashing function that provides seeds for random number generation +__host__ __device__ unsigned int hash(unsigned int a); + +//Quick and dirty epsilon check +__host__ __device__ bool epsilonCheck(float a, float b); + +//returns x,y,z half-dimensions of tightest bounding box +__host__ __device__ glm::vec3 getRadiuses(staticGeom geom); + +//TODO: Done! +//Generates a random point on a given sphere +__host__ __device__ glm::vec3 getRandomPointOnSphere(staticGeom sphere, float randomSeed); + + +//------------------------------------------------------------------------------------------------ +// IMPLEMENTATIONS +//------------------------------------------------------------------------------------------------ + + //Handy dandy little hashing function that provides seeds for random number generation __host__ __device__ unsigned int hash(unsigned int a){ a = (a+0x7ed55d16) + (a<<12); @@ -47,9 +90,6 @@ __host__ __device__ glm::vec3 getPointOnRay(ray r, float t){ return r.origin + float(t-.0001)*glm::normalize(r.direction); } -//LOOK: This is a custom function for multiplying cudaMat4 4x4 matrixes with vectors. -//This is a workaround for GLM matrix multiplication not working properly on pre-Fermi NVIDIA GPUs. -//Multiplies a cudaMat4 matrix and a vec4 and returns a vec3 clipped from the vec4 __host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v){ glm::vec3 r(1,1,1); r.x = (m.x.x*v.x)+(m.x.y*v.y)+(m.x.z*v.z)+(m.x.w*v.w); @@ -58,113 +98,48 @@ __host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v){ return r; } -//Gets 1/direction for a ray -__host__ __device__ glm::vec3 getInverseDirectionOfRay(ray r){ - return glm::vec3(1.0/r.direction.x, 1.0/r.direction.y, 1.0/r.direction.z); +__host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec3 v, bool isVector) +{ + float w; + if (isVector) + w = 0; + else + w = 1; + glm::vec3 r(1,1,1); + r.x = (m.x.x*v.x)+(m.x.y*v.y)+(m.x.z*v.z)+(m.x.w*w); + r.y = (m.y.x*v.x)+(m.y.y*v.y)+(m.y.z*v.z)+(m.y.w*w); + r.z = (m.z.x*v.x)+(m.z.y*v.y)+(m.z.z*v.z)+(m.z.w*w); + return r; } -//Gets sign of each component of a ray's inverse direction -__host__ __device__ glm::vec3 getSignOfRay(ray r){ - glm::vec3 inv_direction = getInverseDirectionOfRay(r); - return glm::vec3((int)(inv_direction.x < 0), (int)(inv_direction.y < 0), (int)(inv_direction.z < 0)); +__host__ __device__ glm::vec3 multiplyVV(glm::vec3 a, glm::vec3 b) +{ + glm::vec3 r(0,0,0); + r.x = a.x * b.x; + r.y = a.y * b.y; + r.z = a.z * b.z; + return r; } -//Wrapper for cube intersection test for testing against unit cubes -__host__ __device__ float boxIntersectionTest(staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){ - return boxIntersectionTest(glm::vec3(-.5,-.5,-.5), glm::vec3(.5,.5,.5), box, r, intersectionPoint, normal); +__host__ __device__ glm::vec4 multiplyVV(glm::vec4 a, glm::vec4 b) +{ + glm::vec4 r(0,0,0,0); + r.x = a.x * b.x; + r.y = a.y * b.y; + r.z = a.z * b.z; + r.w = a.w * b.w; + return r; } -//Cube intersection test, return -1 if no intersection, otherwise, distance to intersection -__host__ __device__ float boxIntersectionTest(glm::vec3 boxMin, glm::vec3 boxMax, staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){ - glm::vec3 currentNormal = glm::vec3(0,0,0); - - ray ro = r; - - glm::vec3 iP0 = multiplyMV(box.inverseTransform,glm::vec4(r.origin, 1.0f)); - glm::vec3 iP1 = multiplyMV(box.inverseTransform,glm::vec4(r.origin+r.direction, 1.0f)); - glm::vec3 iV0 = iP1 - iP0; - - r.origin = iP0; - r.direction = glm::normalize(iV0); - - float tmin, tmax, tymin, tymax, tzmin, tzmax; - - glm::vec3 rsign = getSignOfRay(r); - glm::vec3 rInverseDirection = getInverseDirectionOfRay(r); - - if((int)rsign.x==0){ - tmin = (boxMin.x - r.origin.x) * rInverseDirection.x; - tmax = (boxMax.x - r.origin.x) * rInverseDirection.x; - }else{ - tmin = (boxMax.x - r.origin.x) * rInverseDirection.x; - tmax = (boxMin.x - r.origin.x) * rInverseDirection.x; - } - - if((int)rsign.y==0){ - tymin = (boxMin.y - r.origin.y) * rInverseDirection.y; - tymax = (boxMax.y - r.origin.y) * rInverseDirection.y; - }else{ - tymin = (boxMax.y - r.origin.y) * rInverseDirection.y; - tymax = (boxMin.y - r.origin.y) * rInverseDirection.y; - } - - if ( (tmin > tymax) || (tymin > tmax) ){ - return -1; - } - if (tymin > tmin){ - tmin = tymin; - } - if (tymax < tmax){ - tmax = tymax; - } - - if((int)rsign.z==0){ - tzmin = (boxMin.z - r.origin.z) * rInverseDirection.z; - tzmax = (boxMax.z - r.origin.z) * rInverseDirection.z; - }else{ - tzmin = (boxMax.z - r.origin.z) * rInverseDirection.z; - tzmax = (boxMin.z - r.origin.z) * rInverseDirection.z; - } - - if ( (tmin > tzmax) || (tzmin > tmax) ){ - return -1; - } - if (tzmin > tmin){ - tmin = tzmin; - } - if (tzmax < tmax){ - tmax = tzmax; - } - if(tmin<0){ - return -1; - } - - glm::vec3 osintersect = r.origin + tmin*r.direction; - - if(abs(osintersect.x-abs(boxMax.x))<.001){ - currentNormal = glm::vec3(1,0,0); - }else if(abs(osintersect.y-abs(boxMax.y))<.001){ - currentNormal = glm::vec3(0,1,0); - }else if(abs(osintersect.z-abs(boxMax.z))<.001){ - currentNormal = glm::vec3(0,0,1); - }else if(abs(osintersect.x+abs(boxMin.x))<.001){ - currentNormal = glm::vec3(-1,0,0); - }else if(abs(osintersect.y+abs(boxMin.y))<.001){ - currentNormal = glm::vec3(0,-1,0); - }else if(abs(osintersect.z+abs(boxMin.z))<.001){ - currentNormal = glm::vec3(0,0,-1); - } - - intersectionPoint = multiplyMV(box.transform, glm::vec4(osintersect, 1.0)); - - +__host__ __device__ glm::vec3 getInverseDirectionOfRay(ray r){ + return glm::vec3(1.0/r.direction.x, 1.0/r.direction.y, 1.0/r.direction.z); +} - normal = multiplyMV(box.transform, glm::vec4(currentNormal,0.0)); - return glm::length(intersectionPoint-ro.origin); +__host__ __device__ glm::vec3 getSignOfRay(ray r){ + glm::vec3 inv_direction = getInverseDirectionOfRay(r); + return glm::vec3((int)(inv_direction.x < 0), (int)(inv_direction.y < 0), (int)(inv_direction.z < 0)); } -//LOOK: Here's an intersection test example from a sphere. Now you just need to figure out cube and, optionally, triangle. -//Sphere intersection test, return -1 if no intersection, otherwise, distance to intersection __host__ __device__ float sphereIntersectionTest(staticGeom sphere, ray r, glm::vec3& intersectionPoint, glm::vec3& normal){ float radius = .5; @@ -215,8 +190,6 @@ __host__ __device__ glm::vec3 getRadiuses(staticGeom geom){ return glm::vec3(xradius, yradius, zradius); } -//LOOK: Example for generating a random point on an object using thrust. -//Generates a random point on a given cube __host__ __device__ glm::vec3 getRandomPointOnCube(staticGeom cube, float randomSeed){ thrust::default_random_engine rng(hash(randomSeed)); @@ -261,20 +234,145 @@ __host__ __device__ glm::vec3 getRandomPointOnCube(staticGeom cube, float random } -//Generates a random point on a given sphere -__host__ __device__ glm::vec3 getRandomPointOnSphere(staticGeom sphere, float randomSeed){ - float radius=.5f; - thrust::default_random_engine rng(hash(randomSeed)); - thrust::uniform_real_distribution u01(-1,1); - thrust::uniform_real_distribution u02(0,TWO_PI); - - float theta = (float)u02(rng); - float cosphi = (float)u01(rng); - float sinphi = sqrt(1 - cosphi*cosphi); - glm::vec3 point = radius*glm::vec3(sinphi*cos(theta),sinphi*sin(theta),cosphi); - glm::vec3 randPoint = multiplyMV(sphere.transform, glm::vec4(point,1.0f)); - - return randPoint; +__host__ __device__ glm::vec3 getRandomPointOnSphere(staticGeom sphere, float randomSeed) +{ + thrust::default_random_engine rng(hash(randomSeed)); + thrust::uniform_real_distribution u (0, 1); + thrust::uniform_real_distribution v (0, 1); + float radius = getRadiuses (sphere).x; + return glm::vec3 (radius*sin (PI*u(rng))*cos (TWO_PI*v(rng)), radius*sin (PI*u(rng))*sin (TWO_PI*v(rng)), radius*cos (PI*u(rng))); +} + +__host__ __device__ float boxIntersectionTest(staticGeom box, ray r, glm::vec3& intersectionPoint, glm::vec3& normal, glm::vec2 &UVcoords) +{ + // Uses the slab method to check for intersection. + // Refer http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm for details. + + // Define the constants. tnear = -INFINITY ; tfar = +INFINITY (+/- 1e6 for practical purposes) + float tnear = -1e6, tfar = 1e6; + float epsilon = 1e-3; + + // Body space extremities. + float lowerLeftBack [3] = {-0.5, -0.5, -0.5}; + float upperRightFront [3] = {0.5, 0.5, 0.5}; + + ray transformedRay; + // Transform the ray from global to model space. + transformedRay.origin = multiplyMV (box.inverseTransform, glm::vec4 (r.origin, 1.0)); + transformedRay.direction = glm::normalize (multiplyMV (box.inverseTransform, glm::vec4 (r.direction, 0.0))); + + float transRayOrigArr [3]; + transRayOrigArr [0] = transformedRay.origin.x; + transRayOrigArr [1] = transformedRay.origin.y; + transRayOrigArr [2] = transformedRay.origin.z; + + float transRayDirArr [3]; + transRayDirArr [0] = transformedRay.direction.x; + transRayDirArr [1] = transformedRay.direction.y; + transRayDirArr [2] = transformedRay.direction.z; + + // For each X, Y and Z, check for intersections using the slab method as described above. + for (int loopVar = 0; loopVar < 3; loopVar ++) + { + if (fabs (transRayDirArr [loopVar]) < epsilon) + { + if ((transRayOrigArr [loopVar] < lowerLeftBack [loopVar]-epsilon) || (transRayOrigArr [loopVar] > upperRightFront [loopVar]+epsilon)) + return -1; + } + else + { + float t1 = (lowerLeftBack [loopVar] - transRayOrigArr [loopVar]) / transRayDirArr [loopVar]; + float t2 = (upperRightFront [loopVar] - transRayOrigArr [loopVar]) / transRayDirArr [loopVar]; + + if (t1 > t2+epsilon) + { + t2 += t1; + t1 = t2 - t1; + t2 -= t1; + } + + if (tnear < t1-epsilon) + tnear = t1; + + if (tfar > t2-epsilon) + tfar = t2; + + if (tnear > tfar+epsilon) + return -1; + + if (tfar < 0-epsilon) + return -1; + } + } + + // Get the intersection point in model space. + glm::vec4 intersectionPointInBodySpace = glm::vec4 (getPointOnRay (transformedRay, tnear), 1.0); + + UVcoords = glm::vec2 (0, 0); + if ( (isApproximate (intersectionPointInBodySpace.x, 0.5)) || // YZ Faces + (isApproximate (intersectionPointInBodySpace.x, -0.5)) ) + { + UVcoords.x = intersectionPointInBodySpace.y; + UVcoords.y = intersectionPointInBodySpace.z; + } + else if ((isApproximate (intersectionPointInBodySpace.y, 0.5)) || // XZ Faces + (isApproximate (intersectionPointInBodySpace.y, -0.5))) + { + UVcoords.x = intersectionPointInBodySpace.x; + UVcoords.y = intersectionPointInBodySpace.z; + } + else if ((isApproximate (intersectionPointInBodySpace.z, 0.5)) || // XY Face + (isApproximate (intersectionPointInBodySpace.z, -0.5))) + { + UVcoords.x = intersectionPointInBodySpace.x; + UVcoords.y = intersectionPointInBodySpace.y; + } + + UVcoords.x += 0.5; // u + UVcoords.y += 0.5; // v + + glm::vec4 bodySpaceOrigin = glm::vec4 (0,0,0,1); + + normal = glm::vec3 (0, 0, 0); + + float normalArr [3]; + normalArr [0] = normal.x; + normalArr [1] = normal.y; + normalArr [2] = normal.z; + + float intrPtBodySpaceArr [3]; + intrPtBodySpaceArr [0] = intersectionPointInBodySpace.x; + intrPtBodySpaceArr [1] = intersectionPointInBodySpace.y; + intrPtBodySpaceArr [2] = intersectionPointInBodySpace.z; + + float bodySpaceOrigArr [3]; + bodySpaceOrigArr [0] = bodySpaceOrigin.x; + bodySpaceOrigArr [1] = bodySpaceOrigin.y; + bodySpaceOrigArr [2] = bodySpaceOrigin.z; + + for (int loopVar = 0; loopVar < 3; loopVar ++) + { + float diff = intrPtBodySpaceArr [loopVar] - bodySpaceOrigArr [loopVar]; + float diffAbs = fabs (diff); + if ((diffAbs >= 0.5-epsilon) && (diffAbs <= 0.5+epsilon)) + { + normalArr [loopVar] = diff / diffAbs; + break; + } + } + + glm::vec4 normalTobeTransformed = glm::vec4 (normalArr [0], normalArr [1], normalArr [2], 0); + cudaMat4 transposeBoxInvTransform; + transposeBoxInvTransform.x.x = box.inverseTransform.x.x; transposeBoxInvTransform.x.y = box.inverseTransform.y.x; transposeBoxInvTransform.x.z = box.inverseTransform.z.x; transposeBoxInvTransform.x.w = box.inverseTransform.w.x; + transposeBoxInvTransform.y.x = box.inverseTransform.x.y; transposeBoxInvTransform.y.y = box.inverseTransform.y.y; transposeBoxInvTransform.y.z = box.inverseTransform.z.y; transposeBoxInvTransform.y.w = box.inverseTransform.w.y; + transposeBoxInvTransform.z.x = box.inverseTransform.x.z; transposeBoxInvTransform.z.y = box.inverseTransform.y.z; transposeBoxInvTransform.z.z = box.inverseTransform.z.z; transposeBoxInvTransform.z.w = box.inverseTransform.w.z; + transposeBoxInvTransform.w.x = box.inverseTransform.x.w; transposeBoxInvTransform.w.y = box.inverseTransform.y.w; transposeBoxInvTransform.w.z = box.inverseTransform.z.w; transposeBoxInvTransform.w.w = box.inverseTransform.w.w; + + // Transform the intersection point & the normal to world space. + intersectionPoint = multiplyMV (box.transform, intersectionPointInBodySpace); + normal = multiplyMV (transposeBoxInvTransform, normalTobeTransformed); + normal = glm::normalize (normal); + return glm::length (r.origin - intersectionPoint); } #endif diff --git a/src/main.cpp b/src/main.cpp index 81836b1..5e27888 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -52,7 +52,7 @@ int main(int argc, char** argv){ renderCam = &renderScene->renderCam; width = renderCam->resolution[0]; height = renderCam->resolution[1]; - +// renderCam-> if(targetFrame>=renderCam->frames){ cout << "Warning: Specified target frame is out of range, defaulting to frame 0." << endl; targetFrame = 0; @@ -105,7 +105,7 @@ void runCuda(){ // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer - if(iterationsiterations){ + if(iterations<1/*renderCam->iterations*/){ uchar4 *dptr=NULL; iterations++; cudaGLMapBufferObject((void**)&dptr, pbo); @@ -113,20 +113,22 @@ void runCuda(){ //pack geom and material arrays geom* geoms = new geom[renderScene->objects.size()]; material* materials = new material[renderScene->materials.size()]; - + mytexture* textures = new mytexture [renderScene->textures.size ()]; for(int i=0; iobjects.size(); i++){ geoms[i] = renderScene->objects[i]; } for(int i=0; imaterials.size(); i++){ materials[i] = renderScene->materials[i]; } + for(int i=0; itextures.size(); i++){ + textures[i] = renderScene->textures[i]; + } - - // execute the kernel - cudaRaytraceCore(dptr, renderCam, targetFrame, iterations, materials, renderScene->materials.size(), geoms, renderScene->objects.size() ); - + // execute the kernel + cudaRaytraceCore(dptr, renderCam, targetFrame, iterations, materials, renderScene->materials.size(), geoms, renderScene->objects.size(), + textures, renderScene->textures.size ()); // unmap buffer object - cudaGLUnmapBufferObject(pbo); + cudaGLUnmapBufferObject(pbo); }else{ if(!finishedRender){ @@ -142,8 +144,8 @@ void runCuda(){ gammaSettings gamma; gamma.applyGamma = true; - gamma.gamma = 1.0; - gamma.divisor = 1.0; //renderCam->iterations; + gamma.gamma = 1.0;///2.2; + gamma.divisor = renderCam->iterations; outputImage.setGammaSettings(gamma); string filename = renderCam->imageName; string s; diff --git a/src/main.h b/src/main.h index 0bab7cb..b4b2828 100755 --- a/src/main.h +++ b/src/main.h @@ -29,6 +29,7 @@ #include "raytraceKernel.h" #include "utilities.h" #include "scene.h" +//#include "cuPrintf.cuh" #if CUDA_VERSION >= 5000 #include diff --git a/src/raytraceKernel.cu b/src/raytraceKernel.cu index 87a65a6..0471505 100755 --- a/src/raytraceKernel.cu +++ b/src/raytraceKernel.cu @@ -8,22 +8,57 @@ #include #include #include +#include +#include #include "sceneStructs.h" +#include "glm/glm.hpp" #include "utilities.h" #include "raytraceKernel.h" #include "intersections.h" #include "interactions.h" -#include -#include "glm/glm.hpp" + +#if CUDA_VERSION >= 5000 + #include +#else + #include +#endif + +const glm::vec3 bgColour = glm::vec3 (0.55, 0.25, 0); void checkCUDAError(const char *msg) { + cudaDeviceSynchronize(); cudaError_t err = cudaGetLastError(); if( cudaSuccess != err) { fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + std::cin.get (); exit(EXIT_FAILURE); } } +//Sets up the projection half vectors. +void setupProjection (projectionInfo &ProjectionParams, glm::vec3 eye, glm::vec3 view, glm::vec3 up, glm::vec2 fov) +{ + //Set up the projection variables: + float degToRad = 3.1415926 / 180.0; + float radToDeg = 1.0 / degToRad; + + ProjectionParams.centreProj = eye+view; + glm::vec3 eyeToProjCentre = ProjectionParams.centreProj - eye; + glm::vec3 A = glm::cross (ProjectionParams.centreProj, up); + glm::vec3 B = glm::cross (A, ProjectionParams.centreProj); + float lenEyeToProjCentre = glm::length (eyeToProjCentre); + + ProjectionParams.halfVecH = glm::normalize (A) * lenEyeToProjCentre * (float)tan ((fov.x*degToRad)); + ProjectionParams.halfVecV = glm::normalize (B) * lenEyeToProjCentre * (float)tan ((fov.y*degToRad)); +} + +// Reflects the incidentRay around the normal. +__host__ __device__ glm::vec3 reflectRay (glm::vec3 incidentRay, glm::vec3 normal) +{ + glm::vec3 reflectedRay = incidentRay - (2.0f*glm::dot (incidentRay, normal))*normal; + return reflectedRay; +} + //LOOK: This function demonstrates how to use thrust for random number generation on the GPU! //Function that generates static. __host__ __device__ glm::vec3 generateRandomNumberFromThread(glm::vec2 resolution, float time, int x, int y){ @@ -35,41 +70,20 @@ __host__ __device__ glm::vec3 generateRandomNumberFromThread(glm::vec2 resolutio return glm::vec3((float) u01(rng), (float) u01(rng), (float) u01(rng)); } -//Kernel that does the initial raycast from the camera. -__host__ __device__ ray raycastFromCameraKernel(glm::vec2 resolution, float time, int x, int y, glm::vec3 eye, glm::vec3 view, glm::vec3 up, glm::vec2 fov){ - - int index = x + (y * resolution.x); - - thrust::default_random_engine rng(hash(index*time)); - thrust::uniform_real_distribution u01(0,1); - - //standard camera raycast stuff - glm::vec3 E = eye; - glm::vec3 C = view; - glm::vec3 U = up; - float fovx = fov.x; - float fovy = fov.y; - - float CD = glm::length(C); - - glm::vec3 A = glm::cross(C, U); - glm::vec3 B = glm::cross(A, C); - glm::vec3 M = E+C; - glm::vec3 H = (A*float(CD*tan(fovx*(PI/180))))/float(glm::length(A)); - glm::vec3 V = (B*float(CD*tan(-fovy*(PI/180))))/float(glm::length(B)); - - float sx = (x)/(resolution.x-1); - float sy = (y)/(resolution.y-1); - - glm::vec3 P = M + (((2*sx)-1)*H) + (((2*sy)-1)*V); - glm::vec3 PmE = P-E; - glm::vec3 R = E + (float(200)*(PmE))/float(glm::length(PmE)); - - glm::vec3 direction = glm::normalize(R); - //major performance cliff at this point, TODO: find out why! +//Function that does the initial raycast from the camera +__host__ __device__ ray raycastFromCameraKernel(glm::vec2 resolution, float time, int x, int y, glm::vec3 eye, glm::vec3 view, glm::vec3 up, glm::vec2 fov, glm::vec3 centreProj, + glm::vec3 halfVecH, glm::vec3 halfVecV) +{ ray r; r.origin = eye; - r.direction = direction; + r.direction = glm::vec3(0,0,-1); + + float normDeviceX = (float)x / (resolution.x-1); + float normDeviceY = 1 - ((float)y / (resolution.y-1)); + + glm::vec3 P = centreProj + (2*normDeviceX - 1)*halfVecH + (2*normDeviceY - 1)*halfVecV; + r.direction = glm::normalize (P - r.origin); + return r; } @@ -83,16 +97,16 @@ __global__ void clearImage(glm::vec2 resolution, glm::vec3* image){ } } -//Kernel that writes the image to the OpenGL PBO directly. -__global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3* image){ +//Kernel that writes the image to the OpenGL PBO directly. +__global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3* image, int nLights){ int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; int index = x + (y * resolution.x); if(x<=resolution.x && y<=resolution.y){ - - glm::vec3 color; + image [index] /= nLights; + glm::vec3 color; color.x = image[index].x*255.0; color.y = image[index].y*255.0; color.z = image[index].z*255.0; @@ -111,90 +125,445 @@ __global__ void sendImageToPBO(uchar4* PBOpos, glm::vec2 resolution, glm::vec3* // Each thread writes one pixel location in the texture (textel) PBOpos[index].w = 0; - PBOpos[index].x = color.x; + PBOpos[index].x = color.x; PBOpos[index].y = color.y; PBOpos[index].z = color.z; } } -//TODO: IMPLEMENT THIS FUNCTION +// Intersects the castRay with all the geometry in the scene (geoms) and returns the intercept information. +__device__ interceptInfo getIntercept (staticGeom * geoms, sceneInfo objectCountInfo, ray castRay, material* textureArray) +{ + glm::vec3 intrPoint = glm::vec3 (0, 0, 0); + glm::vec3 intrNormal = glm::vec3 (0, 0, 0); + glm::vec2 UVcoords = glm::vec2 (0, 0); + + float interceptValue = -32767; + + material newMaterial; + newMaterial.color = glm::vec3 (0,0,0); + newMaterial.specularExponent = 1.0; + newMaterial.hasReflective = 0.0; + newMaterial.hasRefractive = 0.0; + + interceptInfo theRightIntercept; // Stores the lowest intercept. + theRightIntercept.interceptVal = interceptValue; // Initially, it is empty/invalid + theRightIntercept.intrNormal = intrNormal; // Intially, Normal - 0,0,0 + theRightIntercept.intrMaterial = newMaterial; + + float min = 1e6; + // Two different loops to intersect ray with cubes and spheres. + for (int i = 0; i < objectCountInfo.nCubes; ++i) + { + staticGeom currentGeom = geoms [i]; + + interceptValue = boxIntersectionTest(currentGeom, castRay, intrPoint, intrNormal, UVcoords); + if (interceptValue > 0) + { + if (interceptValue < min) + { + min = interceptValue; + + theRightIntercept.interceptVal = min; + theRightIntercept.intrNormal = intrNormal; + theRightIntercept.intrMaterial = textureArray [currentGeom.materialid]; + theRightIntercept.UV = UVcoords; + } + } + } + + for (int i = objectCountInfo.nCubes; i <= (objectCountInfo.nCubes+objectCountInfo.nSpheres); ++i) + { + staticGeom currentGeom = geoms [i]; + + interceptValue = sphereIntersectionTest(currentGeom, castRay, intrPoint, intrNormal); + if (interceptValue > 0) + { + if (interceptValue < min) + { + min = interceptValue; + + theRightIntercept.interceptVal = min; + theRightIntercept.intrNormal = intrNormal; + theRightIntercept.intrMaterial = textureArray [currentGeom.materialid]; + } + } + } + + return theRightIntercept; +} + +// Given MaxWidth of a 2D array, and the x and y co-ordinates or indices of an element, returns the equivalent 1D array index. +__device__ unsigned long getIndex (int x, int y, int MaxWidth) +{ return (unsigned long) y*MaxWidth + x ; } + +// Check for approximate equality. +__host__ __device__ bool isApproximate (float valToBeCompared, float valToBeCheckedAgainst) +{ if ((valToBeCompared >= valToBeCheckedAgainst-0.001) && (valToBeCompared <= valToBeCheckedAgainst+0.001)) return true; return false; } + +// Given the UV coordinates (UVcoords) and a Texture, this returns the bilinearly interpolated colour at that point. +__device__ glm::vec3 getColour (mytexture &Texture, glm::vec2 UVcoords) +{ + unsigned long texelXY, texelXPlusOneY, texelXYPlusOne, texelXPlusOneYPlusOne; + float xInterp = (Texture.texelWidth * UVcoords.x) - floor (Texture.texelWidth * UVcoords.x); + float yInterp = (Texture.texelHeight * UVcoords.y) - floor (Texture.texelHeight * UVcoords.y); + + texelXY = getIndex ((int)floor (Texture.texelWidth * UVcoords.x), (int)floor (Texture.texelHeight * UVcoords.y), Texture.texelWidth); + texelXPlusOneY = getIndex ((int)ceil (Texture.texelWidth * UVcoords.x), (int)floor (Texture.texelHeight * UVcoords.y), Texture.texelWidth); + texelXYPlusOne = getIndex ((int)floor (Texture.texelWidth * UVcoords.x), (int)ceil (Texture.texelHeight * UVcoords.y), Texture.texelWidth); + texelXPlusOneYPlusOne = getIndex ((int)ceil (Texture.texelWidth * UVcoords.x), (int)ceil (Texture.texelHeight * UVcoords.y), Texture.texelWidth); + + glm::vec3 xInterpedColour1, xInterpedColour2, finalColour; + xInterpedColour1 = xInterp * Texture.texels [texelXPlusOneY] + (1-xInterp)* Texture.texels [texelXY]; + xInterpedColour2 = xInterp * Texture.texels [texelXPlusOneYPlusOne] + (1-xInterp)* Texture.texels [texelXYPlusOne]; + finalColour = yInterp * xInterpedColour2 + (1-yInterp) * xInterpedColour1; + + return finalColour; +} + +// Calclates the direct lighting at a given point, which is calculated from castRay and interceptVal of theRightIntercept. +__device__ glm::vec3 calcShade (interceptInfo theRightIntercept, mytexture* textureArray) +{ + glm::vec3 shadedColour = glm::vec3 (0,0,0); + if ((theRightIntercept.interceptVal > 0)) + { + if ((theRightIntercept.intrMaterial.hasReflective >= 1.0) || + (theRightIntercept.intrMaterial.hasRefractive >= 1.0)) + shadedColour = theRightIntercept.intrMaterial.specularColor; +// else if (theRightIntercept.intrMaterial.hasTexture) +// shadedColour = getColour (textureArray [theRightIntercept.intrMaterial.textureid], theRightIntercept.UV); + else + shadedColour = theRightIntercept.intrMaterial.color; + } + + return shadedColour; +} + +//TODO: Done! //Core raytracer kernel -__global__ void raytraceRay(glm::vec2 resolution, float time, float bounce, cameraData cam, int rayDepth, glm::vec3* colors, - staticGeom* geoms, int numberOfGeoms, material* materials, int numberOfMaterials){ +__global__ void raytraceRay (float time, cameraData cam, int rayDepth, glm::vec3* colors, staticGeom* geoms, + material* textureArray, mytexture * Textures, sceneInfo objectCountInfo, + bool *primaryArrayOnDevice, ray *rayPoolOnDevice, int rayPoolLength) +{ + extern __shared__ glm::vec3 arrayPool []; + __shared__ glm::vec3 *colourBlock; + __shared__ bool *primArrayBlock; + __shared__ ray *rayPoolBlock; - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - int index = x + (y * resolution.x); + if ((threadIdx.x == 0) && (threadIdx.y == 0)) + { + colourBlock = arrayPool; + primArrayBlock = (bool *) &colourBlock [blockDim.x * blockDim.y]; + rayPoolBlock = (ray *) &primArrayBlock [blockDim.x * blockDim.y]; + } - ray r = raycastFromCameraKernel(resolution, time, x, y, cam.position, cam.view, cam.up, cam.fov); - - if((x<=resolution.x && y<=resolution.y)){ - - float MAX_DEPTH = 100000000000000000; - float depth = MAX_DEPTH; - - for(int i=0; i-EPSILON){ - MAX_DEPTH = depth; - colors[index] = materials[geoms[i].materialid].color; - } - } + __syncthreads (); // Block all threads until the colourBlock, rayPoolBlock + // and primArrayBlock pointers have been bound properly. + // We have a 1-D array of blocks in the grid. From a thread's perspective, it is a 2-D array. + // Ray pool is a massive 1-D array, so we need to compute the index of the element of ray pool + // that each thread will handle. + int index = (blockIdx.x * blockDim.x) + threadIdx.x + // X-part: straightforward + (threadIdx.y * (int)(blockDim.x * ceil ((float)rayPoolLength / (float)(blockDim.x*blockDim.y)))); // Y-part: as below: + // No. of blocks in the grid = ceil (rayPoolLength / (blockDim.x*blockDim.y)) + // Multiplying that with the no. threads in a block gives the no. of threads in a single row of grid. + // Multiplying that with row number (threadIdx.y) and adding the x offset (X-part) gives the index. - //colors[index] = generateRandomNumberFromThread(resolution, time, x, y); - } + // threadID gives the index of the thread when the block of threads is flattened out into a 1D array. + // We need this because we're using shared memory. + int threadID = threadIdx.y*blockDim.x + threadIdx.x; + int colourIndex; + + glm::vec3 shadedColour = glm::vec3 (0); + + if (index < rayPoolLength) + { + primArrayBlock [threadID] = primaryArrayOnDevice [index]; + rayPoolBlock [threadID] = rayPoolOnDevice [index]; + // We compute the index for the colour array separately since it represents a frame + // and each index represents a pixel. If we don't, stream compaction would mess things up. + colourIndex = rayPoolBlock [threadID].y*cam.resolution.x + rayPoolBlock [threadID].x; + colourBlock [threadID] = colors [colourIndex]; + // colourBlock [threadID] therefore represents colour computed by ray through the pixel (x,y) + + interceptInfo theRightIntercept = getIntercept (geoms, objectCountInfo, rayPoolBlock [threadID], textureArray); + shadedColour += calcShade (theRightIntercept, Textures); + + if ((theRightIntercept.intrMaterial.emittance > 0) || (theRightIntercept.interceptVal < 0)) + primArrayBlock [threadID] = false; // Ray did not hit anything or it hit light, so kill it. + else + calculateBSDF (rayPoolBlock [threadID], + rayPoolBlock [threadID].origin + rayPoolBlock [threadID].direction * theRightIntercept.interceptVal, + theRightIntercept.intrNormal, glm::vec3 (0), AbsorptionAndScatteringProperties (), + index*time, theRightIntercept.intrMaterial.color, glm::vec3 (0), theRightIntercept.intrMaterial); + + if (glm::length (colourBlock [threadID]) > 0) + colourBlock [threadID] *= shadedColour; // Add computed shade to shadedColour. + else + colourBlock [threadID] = shadedColour; + } + + __syncthreads (); + + // Copy the rayPool, Colour and Primary arrays back to global memory. + if (index < rayPoolLength) + { + primaryArrayOnDevice [index] = primArrayBlock [threadID]; + rayPoolOnDevice [index] = rayPoolBlock [threadID]; + colors [colourIndex] = colourBlock [threadID]; + } +} + +// Kernel to create the initial pool of rays. +__global__ void createRayPool (ray *rayPool, bool *primaryArray, int *secondaryArray, + cameraData cam, projectionInfo ProjectionParams) +{ + int x = (blockDim.x * blockIdx.x) + threadIdx.x; + int y = (blockDim.y * blockIdx.y) + threadIdx.y; + int threadID = x + + y * cam.resolution.y; + if (threadID < cam.resolution.x*cam.resolution.y) + { + rayPool [threadID] = raycastFromCameraKernel (cam.resolution, 0, x, y, cam.position, + cam.view, cam.up, cam.fov, ProjectionParams.centreProj, + ProjectionParams.halfVecH, ProjectionParams.halfVecV); + rayPool [threadID].x = (blockDim.x * blockIdx.x) + threadIdx.x; + rayPool [threadID].y = (blockDim.y * blockIdx.y) + threadIdx.y; + + primaryArray [threadID] = true; + secondaryArray [threadID] = 0; + } } +__global__ void copyArray (bool *from, int *to, int fromLength) +{ + int globalIndex = blockDim.x*blockIdx.x + threadIdx.x; + + if (globalIndex < fromLength) + to [globalIndex] = (int)from [globalIndex]; +} -//TODO: FINISH THIS FUNCTION +__global__ void copyArray (ray *from, ray *to, int fromLength) +{ + int globalIndex = blockDim.x*blockIdx.x + threadIdx.x; + + if (globalIndex < fromLength) + to [globalIndex] = from [globalIndex]; +} + +__global__ void copyArray (int *from, int *to, int fromLength) +{ + int globalIndex = blockDim.x*blockIdx.x + threadIdx.x; + + if (globalIndex < fromLength) + to [globalIndex] = from [globalIndex]; +} + +// Kernel to do inclusive scan. +// Do NOT copy the results back in the same kernel as threads in other blocks might be still accessing the same location in +// global memory, causing a read/write conflict. Use copyArray or cudaMemcpy. +__global__ void inclusiveScan (int *secondaryArray, int *tmpArray, int primArrayLength, int iteration) +{ + unsigned long curIndex = blockDim.x*blockIdx.x + threadIdx.x; + long prevIndex = curIndex - floor (pow ((float)2.0, (float)(iteration-1))); + + if (curIndex < primArrayLength) + { + if (/*curIndex >= floor (pow ((float)2.0, (float)(iteration-1)))*/prevIndex >= 0) + tmpArray [curIndex] = secondaryArray [curIndex] + secondaryArray [prevIndex]; + } +} + +// Kernel to shift all elements of Array to the right. +// The last element is thrown out in the process and the first element becomes 0. +// Can convert an inclusive scan result to an exclusive scan. +// Do NOT copy the results back in the same kernel as threads in other blocks might be still accessing the same location in +// global memory, causing a read/write conflict and erroneous values. Use copyArray or cudaMemcpy. +__global__ void shiftRight (int *Array, bool *primaryArray, int arrayLength) +{ + unsigned long curIndex = blockDim.x*blockIdx.x + threadIdx.x; + if (curIndex < arrayLength) + { + if (primaryArray [curIndex]) + Array [curIndex] = Array [curIndex] - 1; + } +} + + +// Kernel to do stream compaction. +__global__ void compactStream (ray *rayPoolOnDevice, ray *tempRayPool, bool *primaryArrayOnDevice, int *secondaryArray, int rayPoolLengthOnDevice) +{ + unsigned long curIndex = blockDim.x*blockIdx.x + threadIdx.x; + if (curIndex < rayPoolLengthOnDevice) + { + int secondArrayIndex = secondaryArray [curIndex]; + if (primaryArrayOnDevice [curIndex]) + tempRayPool [secondArrayIndex] = rayPoolOnDevice [curIndex]; + } +} + +// This kernel will accumulate all the colours calculated in an iteration into the actual colour array. +__global__ void accumulateIterationColour (glm::vec3* accumulator, glm::vec3* iterationColour, glm::vec2 resolution) +{ + int index = (blockDim.y*blockIdx.y + threadIdx.y) * resolution.x + + (blockDim.x*blockIdx.x + threadIdx.x); + if (index < resolution.x*resolution.y) + accumulator [index] += iterationColour [index]; +} + +// This kernel replaces the colours of the respective pixels of all the rays in the ray pool with noise (0,0,0) +__global__ void addNoise (glm::vec3 *localColours, ray *rayPoolOnDevice, int rayPoolLength, glm::vec2 resolution) +{ + // Index calculation, as in raytraceRay + int index = (blockIdx.x * blockDim.x) + threadIdx.x + // X-part + (threadIdx.y * (int)(blockDim.x * ceil ((float)rayPoolLength / (float)(blockDim.x*blockDim.y)))); // Y-part + if (index < rayPoolLength) + { + // Index re-calculation for colour array, as in raytraceRay + ray currentRay = rayPoolOnDevice [index]; + int colourIndex = currentRay.y * resolution.x + currentRay.x; + localColours [colourIndex] = glm::vec3 (0); + } +} + +//TODO: Done! // Wrapper for the __global__ call that sets up the kernel calls and does a ton of memory management -void cudaRaytraceCore(uchar4* PBOpos, camera* renderCam, int frame, int iterations, material* materials, int numberOfMaterials, geom* geoms, int numberOfGeoms){ +void cudaRaytraceCore(uchar4* PBOpos, camera* renderCam, int frame, int iterations, + material* materials, int numberOfMaterials, geom* geoms, int numberOfGeoms, + mytexture* textures, int numberOfTextures){ int traceDepth = 1; //determines how many bounces the raytracer traces + projectionInfo ProjectionParams; + float degToRad = 3.1415926 / 180.0; + + // Set up projection. + ProjectionParams.centreProj = renderCam->positions [frame]+renderCam->views [frame]; + glm::vec3 eyeToProjCentre = ProjectionParams.centreProj - renderCam->positions [frame]; + glm::vec3 A = glm::cross (eyeToProjCentre, renderCam->ups [frame]); + glm::vec3 B = glm::cross (A, eyeToProjCentre); + float lenEyeToProjCentre = glm::length (eyeToProjCentre); + + ProjectionParams.halfVecH = glm::normalize (A) * lenEyeToProjCentre * (float)tan ((renderCam->fov.x*degToRad) / 2.0); + ProjectionParams.halfVecV = glm::normalize (B) * lenEyeToProjCentre * (float)tan ((renderCam->fov.y*degToRad) / 2.0); // set up crucial magic int tileSize = 8; dim3 threadsPerBlock(tileSize, tileSize); - dim3 fullBlocksPerGrid((int)ceil(float(renderCam->resolution.x)/float(tileSize)), (int)ceil(float(renderCam->resolution.y)/float(tileSize))); + dim3 fullBlocksPerGrid((int)ceil(float(renderCam->resolution.x)/float(tileSize))); //send image to GPU - glm::vec3* cudaimage = NULL; - cudaMalloc((void**)&cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3)); - cudaMemcpy( cudaimage, renderCam->image, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyHostToDevice); - - //package geometry and materials and sent to GPU + glm::vec3* cudaFinalImage = NULL; + cudaMalloc((void**)&cudaFinalImage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3)); + cudaMemcpy( cudaFinalImage, renderCam->image, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyHostToDevice); + + // package geometry to be sent to GPU global memory staticGeom* geomList = new staticGeom[numberOfGeoms]; - for(int i=0; iups[frame]; cam.fov = renderCam->fov; - //kernel launches - for(int bounce = 1; bounce <= 1; ++bounce) + unsigned int nIterations = renderCam->iterations; + + time_t startTime = time (NULL); + std::default_random_engine randomNumGen (hash (startTime)); + std::uniform_real_distribution jitter ((float)0, (float)0.142); + + float movement = 3.0/nIterations; // For motion blur. + int nBounces = 6; + int oneEighthDivisor = nIterations / 8; // For antialiasing. + int errCount = 0; + // For each point sampled in the area light, launch the raytraceRay Kernel which will compute the diffuse, specular, ambient + // and shadow colours. It will also compute reflected colours for reflective surfaces. + for (int i = 0; i < nIterations; i ++) { - raytraceRay<<>>(renderCam->resolution, (float)iterations, (float)bounce, cam, traceDepth, cudaimage, cudageoms, numberOfGeoms, cudamaterials, numberOfMaterials); + glm::vec3* cudaimage = NULL; + cudaMalloc((void**)&cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3)); + cudaMemset (cudaimage, 0, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3)); + + + float zAdd = jitter (randomNumGen); + float xAdd = jitter (randomNumGen); + glm::vec3 curLightSamplePos = lightPosInBodySpace; + + if (!(i%oneEighthDivisor)) // Supersampling at 8x! + { + cam.position.y += zAdd*0.002; + cam.position.x += xAdd*0.002; + } + + if (!((i*4)/(3*nIterations))) + { + // Motion blur! + geomList [primCounts.nCubes].translation += glm::vec3 (movement, 0, 0); + glm::mat4 transform = utilityCore::buildTransformationMatrix(geomList [primCounts.nCubes].translation, + geomList [primCounts.nCubes].rotation, + geomList [primCounts.nCubes].scale); + geomList [primCounts.nCubes].transform = utilityCore::glmMat4ToCudaMat4(transform); + geomList [primCounts.nCubes].inverseTransform = utilityCore::glmMat4ToCudaMat4(glm::inverse(transform)); + } + // Now copy the geometry list to the GPU global memory. + cudaMemcpy( cudageoms, geomList, numberOfGeoms*sizeof(staticGeom), cudaMemcpyHostToDevice); + + // Create Ray Pool. + int rayPoolLength = cam.resolution.x * cam.resolution.y; + ray *rayPoolOnDevice = NULL; + cudaMalloc ((void **)&rayPoolOnDevice, rayPoolLength * sizeof (ray)); + + // Primary Array -> Array holding the true/false value specifying whether the ray is alive (true) or dead (false). + bool *primaryArrayOnHost = new bool [rayPoolLength]; + memset (primaryArrayOnHost, true, rayPoolLength * sizeof(bool)); + bool *primaryArrayOnDevice = NULL; + cudaMalloc ((void **)&primaryArrayOnDevice, rayPoolLength * sizeof (bool)); + + // Secondary Array -> Array that will hold the indices of rays that are alive. Used in stream compaction. + int *secondaryArrayOnDevice = NULL; + cudaMalloc ((void **)&secondaryArrayOnDevice, rayPoolLength * sizeof (int)); + int *secondaryArrayOnHost = new int [rayPoolLength]; + + // Launch createRayPool kernel to create the ray pool and populate the primary and secondary arrays. + fullBlocksPerGrid = dim3 ((int)ceil(float(cam.resolution.x)/threadsPerBlock.x), (int)ceil(float(cam.resolution.y)/threadsPerBlock.y)); + createRayPool<<>> (rayPoolOnDevice, primaryArrayOnDevice, secondaryArrayOnDevice, cam, ProjectionParams); + + dim3 threadsPerBlock1D (threadsPerBlock.x*threadsPerBlock.y); + // Iterate until nBounces: launch kernel to trace each ray bounce. + for (int j = 0; j < nBounces; ++j) + { + // The core raytraceRay kernel launch + fullBlocksPerGrid = dim3 ((int)ceil(float(rayPoolLength)/(threadsPerBlock.x*threadsPerBlock.y))); + raytraceRay<<>> + ((float)j+(i*nBounces), cam, j, cudaimage, cudageoms, materialColours, textureArray, primCounts, primaryArrayOnDevice, + rayPoolOnDevice, rayPoolLength); + + /// ----- CPU/GPU Hybrid Stream Compaction ----- /// + // Scan is done on the CPU, the actual compaction happens on the GPU. + // ------------------------------------------------------------------ + // Copy the primary array from device to host. + cudaMemcpy (primaryArrayOnHost, primaryArrayOnDevice, rayPoolLength * sizeof (bool), cudaMemcpyDeviceToHost); + + // Exclusive scan. + secondaryArrayOnHost [0] = 0; + for (int k = 1; k < rayPoolLength; ++ k) + secondaryArrayOnHost [k] = secondaryArrayOnHost [k-1] + primaryArrayOnHost [k-1]; + // This is because the compactStream kernel should run on the whole, uncompacted array. + // We'll set this to rayPoolLength once compactStream has done its job. + int compactedRayPoolLength = secondaryArrayOnHost [rayPoolLength-1] + primaryArrayOnHost [rayPoolLength-1]; + + // Stream compaction. Compact the ray pool into tmpRayPool. + ray *tmpRayPool = NULL; + cudaMalloc ((void **)&tmpRayPool, rayPoolLength * sizeof (ray)); + cudaMemcpy (secondaryArrayOnDevice, secondaryArrayOnHost, rayPoolLength * sizeof (int), cudaMemcpyHostToDevice); + compactStream<<>> (rayPoolOnDevice, tmpRayPool, primaryArrayOnDevice, secondaryArrayOnDevice, rayPoolLength); + + // Now set rayPoolLength to the compacted array size, compactedRayPoolLength. + rayPoolLength = compactedRayPoolLength; + + // Copy the ray pool from tmpRayPool back into rayPoolOnDevice. + copyArray<<>> (tmpRayPool, rayPoolOnDevice, rayPoolLength); + cudaFree (tmpRayPool); + + // Set the primary array to all trues because all rays in the ray pool are alive, + // now that stream compaction has already happened. + cudaMemset (primaryArrayOnDevice, true, rayPoolLength * sizeof (bool)); + } + checkCUDAError ("One or more of the raytrace/stream compaction kernels failed. "); + + // At this point, since stream compaction has already taken place, + // it means that rayPoolOnDevice contains only rays that are still alive. + fullBlocksPerGrid = dim3 ((int)ceil(float(rayPoolLength)/(threadsPerBlock.x*threadsPerBlock.y))); + addNoise<<>>(cudaimage, rayPoolOnDevice, rayPoolLength, cam.resolution); + + fullBlocksPerGrid = dim3 ((int)ceil(float(cam.resolution.x)/threadsPerBlock.x), (int)ceil(float(cam.resolution.y)/threadsPerBlock.y)); + accumulateIterationColour<<>>(cudaFinalImage, cudaimage, cam.resolution); + checkCUDAError("accumulateIterationColour Kernel failed!"); + + cudaFree (rayPoolOnDevice); + cudaFree (primaryArrayOnDevice); + cudaFree (secondaryArrayOnDevice); + cudaFree (cudaimage); + + rayPoolOnDevice = NULL; + primaryArrayOnDevice = NULL; + cudaimage = NULL; + + delete [] primaryArrayOnHost; + delete [] secondaryArrayOnHost; + + std::cout << "\rRendering.. " << ceil ((float)i/(nIterations-1) * 100) << " percent complete."; } - sendImageToPBO<<>>(PBOpos, renderCam->resolution, cudaimage); + // Accumulate all the colours in the cudaFinalImage memory block on the GPU, and divide + // by the no. of light samples to get the final colour. + sendImageToPBO<<>>(PBOpos, renderCam->resolution, cudaFinalImage, nIterations); + std::cout.precision (4); + std::cout << "\nRendered in " << difftime (time (NULL), startTime) << " seconds. \n\n"; + //retrieve image from GPU - cudaMemcpy( renderCam->image, cudaimage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyDeviceToHost); + cudaMemcpy( renderCam->image, cudaFinalImage, (int)renderCam->resolution.x*(int)renderCam->resolution.y*sizeof(glm::vec3), cudaMemcpyDeviceToHost); //free up stuff, or else we'll leak memory like a madman - cudaFree( cudaimage ); - cudaFree( cudageoms ); - cudaFree( cudamaterials ); - delete [] geomList; + if (cudaFinalImage) + cudaFree( cudaFinalImage ); + if (cudageoms) + cudaFree( cudageoms ); + if (materialColours) + { + cudaFree (materialColours); + } + if (textureArray) + { + cudaFree (textureArray); + } + + cudaFinalImage = NULL; + cudageoms = NULL; + materialColours = NULL; - // make certain the kernel has completed + // make certain the kernel has completed cudaThreadSynchronize(); + + delete [] geomList; checkCUDAError("Kernel failed!"); } diff --git a/src/raytraceKernel.h b/src/raytraceKernel.h index 5fcf5a3..c33da12 100755 --- a/src/raytraceKernel.h +++ b/src/raytraceKernel.h @@ -20,6 +20,16 @@ #include #endif -void cudaRaytraceCore(uchar4* pos, camera* renderCam, int frame, int iterations, material* materials, int numberOfMaterials, geom* geoms, int numberOfGeoms); +void cudaRaytraceCore(uchar4* pos, camera* renderCam, int frame, int iterations, material* materials, int numberOfMaterials, geom* geoms, int numberOfGeoms, mytexture* textures, int numberOfTextures); +void setupProjection (projectionInfo &ProjectionParams, glm::vec3 eye, glm::vec3 view, glm::vec3 up, glm::vec2 fov); +void onDeviceErrorExit (cudaError_t errorCode, glm::vec3 *cudaimage, staticGeom *cudageom, material * materialColours, int numberOfMaterials); + +__host__ __device__ glm::vec3 reflectRay (glm::vec3 incidentRay, glm::vec3 normal); +__device__ bool isShadowRayBlocked (ray r, glm::vec3 lightPos, staticGeom *geomsList, sceneInfo objectCountInfo); +__host__ __device__ bool isApproximate (float valToBeCompared, float valToBeCheckedAgainst) ; +//{ if ((valToBeCompared >= valToBeCheckedAgainst-0.001) && (valToBeCompared <= valToBeCheckedAgainst+0.001)) return true; return false; } +__device__ unsigned long getIndex (int x, int y, int MaxWidth); +//{ return (unsigned long) y*MaxWidth + x ; } +__device__ glm::vec3 getColour (material Material, glm::vec2 UVcoords); #endif diff --git a/src/scene.cpp b/src/scene.cpp index 415d627..6398c1c 100755 --- a/src/scene.cpp +++ b/src/scene.cpp @@ -7,6 +7,7 @@ #include #include "scene.h" #include +#include "stb_image/stb_image.h" scene::scene(string filename){ cout << "Reading scene from " << filename << " ..." << endl; @@ -29,11 +30,24 @@ scene::scene(string filename){ loadCamera(); cout << " " << endl; } + else if(strcmp(tokens[0].c_str(), "TEXTUREID")==0){ + loadTexture(tokens[1]); + cout << " " << endl; + } } } } } +scene::~scene () +{ + for (int i = 0; i < textures.size (); i++) + { + if (textures [i].texels) + delete [] textures [i].texels; + } +} + int scene::loadObject(string objectid){ int id = atoi(objectid.c_str()); if(id!=objects.size()){ @@ -227,7 +241,13 @@ int scene::loadMaterial(string materialid){ }else{ cout << "Loading Material " << id << "..." << endl; material newMaterial; - + + newMaterial.hasTexture = false; + newMaterial.textureid = 0; + + newMaterial.hasNormalMap = false; + newMaterial.nmapid = 0; + //load static properties for(int i=0; i<10; i++){ string line; @@ -255,11 +275,57 @@ int scene::loadMaterial(string materialid){ }else if(strcmp(tokens[0].c_str(), "RSCTCOEFF")==0){ newMaterial.reducedScatterCoefficient = atof(tokens[1].c_str()); }else if(strcmp(tokens[0].c_str(), "EMITTANCE")==0){ - newMaterial.emittance = atof(tokens[1].c_str()); - + newMaterial.emittance = atof(tokens[1].c_str()); + } + else if (strcmp(tokens[0].c_str(), "TEXTURE")==0) + { + newMaterial.hasTexture = true; + newMaterial.textureid = atof(tokens[1].c_str()); + } + else if (strcmp(tokens[0].c_str(), "NMAP")==0) + { + newMaterial.hasNormalMap = true; + newMaterial.nmapid = atof(tokens[1].c_str()); + } } - } materials.push_back(newMaterial); return 1; + } + + + } + +int scene::loadTexture(string textureid){ + int id = atoi(textureid.c_str()); + if(id!=textures.size()){ + cout << "ERROR: TEXTURE ID does not match expected number of textures" << endl; + return -1; + }else{ + cout << "Loading Texture " << id << "..." << endl; + mytexture newTexture; + + //load static properties + string line; + utilityCore::safeGetline(fp_in,line); + vector tokens = utilityCore::tokenizeString(line); + if (strcmp(tokens[0].c_str(), "FILE")==0) + { + int nComps; + unsigned char *bytes = stbi_load(tokens [1].c_str (), &newTexture.texelWidth, &newTexture.texelHeight, &nComps, 3); + if (bytes) + { + newTexture.texels = new glm::vec3 [newTexture.texelWidth * newTexture.texelHeight]; + for (int i = 0; i < (newTexture.texelWidth * newTexture.texelHeight); i ++) + { + newTexture.texels [i].r = bytes [3*i] / 255.0; + newTexture.texels [i].g = bytes [3*i + 1] / 255.0; + newTexture.texels [i].b = bytes [3*i + 2] / 255.0; + } + } + stbi_image_free (bytes); + } + textures.push_back(newTexture); + return 1; + } + } -} diff --git a/src/scene.h b/src/scene.h index 9bfa71f..05f8dcb 100755 --- a/src/scene.h +++ b/src/scene.h @@ -21,6 +21,7 @@ class scene{ ifstream fp_in; int loadMaterial(string materialid); int loadObject(string objectid); + int loadTexture (string textureID); int loadCamera(); public: scene(string filename); @@ -28,6 +29,7 @@ class scene{ vector objects; vector materials; + vector textures; camera renderCam; }; diff --git a/src/sceneStructs.h b/src/sceneStructs.h index b10f1cf..1555f0a 100755 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -16,6 +16,7 @@ enum GEOMTYPE{ SPHERE, CUBE, MESH }; struct ray { glm::vec3 origin; glm::vec3 direction; + unsigned short x, y; }; struct geom { @@ -60,6 +61,14 @@ struct camera { std::string imageName; }; +struct mytexture +{ + unsigned int textureID; + int texelHeight; + int texelWidth; + glm::vec3 * texels; +}; + struct material{ glm::vec3 color; float specularExponent; @@ -68,9 +77,49 @@ struct material{ float hasRefractive; float indexOfRefraction; float hasScatter; + bool hasTexture; + unsigned int textureid; + bool hasNormalMap; + unsigned int nmapid; glm::vec3 absorptionCoefficient; float reducedScatterCoefficient; float emittance; }; +struct projectionInfo +{ + glm::vec3 centreProj; + glm::vec3 halfVecH; + glm::vec3 halfVecV; +}; + +struct interceptInfo +{ + float interceptVal; + glm::vec3 intrNormal; + material intrMaterial; + glm::vec2 UV; +}; + +struct sceneInfo +{ + int nCubes; + int nSpheres; + int nMeshes; +}; + +struct renderInfo +{ + float kd; + float ks; + float ka; + + int nLights; + int sqrtLights; + float lightStepSize; + glm::vec3 lightPos; + glm::vec3 lightCol; +}; + + #endif //CUDASTRUCTS_H diff --git a/src/utilities.h b/src/utilities.h index 84ec55f..e23ba1f 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -20,7 +20,7 @@ #define PI 3.1415926535897932384626422832795028841971 #define TWO_PI 6.2831853071795864769252867665590057683943 #define SQRT_OF_ONE_THIRD 0.5773502691896257645091487805019574556476 -#define NATURAL_E 2.7182818284590452353602874713526624977572 +#define E 2.7182818284590452353602874713526624977572 #define EPSILON .000000001 #define ZERO_ABSORPTION_EPSILON 0.00001 #define RAY_BIAS_AMOUNT 0.0002