NVlabs · Tom94 · Jan 17, 2022 · Jan 17, 2022 · Jan 17, 2022 · Jan 17, 2022
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -21,6 +21,12 @@ jobs:
           - os: ubuntu-18.04
             cuda: "10.2"
             arch: 75
+          - os: ubuntu-18.04
+            cuda: "10.2"
+            arch: 70
+          - os: ubuntu-18.04
+            cuda: "10.2"
+            arch: 60
     env:
       build_dir: "build"
       config: "Release"
@@ -53,10 +59,18 @@ jobs:
             visual_studio: "Visual Studio 16 2019"
             cuda: "11.5.1"
             arch: 86
-        #   - os: windows-2016
-        #     visual_studio: "Visual Studio 15 2017"
-        #     cuda: "10.2.89"
-        #     arch: 75
+          - os: windows-2019
+            visual_studio: "Visual Studio 16 2019"
+            cuda: "11.5.1"
+            arch: 75
+          - os: windows-2019
+            visual_studio: "Visual Studio 16 2019"
+            cuda: "11.5.1"
+            arch: 70
+          - os: windows-2019
+            visual_studio: "Visual Studio 16 2019"
+            cuda: "11.5.1"
+            arch: 60
     env:
       build_dir: "build"
       config: "Release"

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -181,6 +181,7 @@ set(TCNN_BUILD_EXAMPLES OFF)
 add_subdirectory(dependencies/tiny-cuda-nn)
 include_directories("dependencies/tiny-cuda-nn/include")
 include_directories("dependencies/tiny-cuda-nn/dependencies")
+add_definitions(${TCNN_DEFINITIONS})
 
 ##############
 # tinylogger #

diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ sudo apt-get install build-essential git python3-dev python3-pip libopenexr-dev
                      libglfw3-dev libglew-dev libomp-dev libxinerama-dev libxcursor-dev
 ```
 
-We also recommend installing [CUDA](https://developer.nvidia.com/cuda-toolkit) and [OptiX](https://developer.nvidia.com/optix) in `/usr/local/` and adding the CUDA installation to your path.
+We also recommend installing [CUDA](https://developer.nvidia.com/cuda-toolkit) and [OptiX](https://developer.nvidia.com/optix) in `/usr/local/` and adding the CUDA installation to your PATH.
 For example, if you have CUDA 11.4, add the following to your `~/.bashrc`
 ```sh
 export PATH="/usr/local/cuda-11.4/bin:$PATH"
@@ -128,7 +128,7 @@ instant-ngp$ ./build/testbed --mode volume --scene data/volume/wdas_cloud_quarte
 Our NeRF implementation expects initial camera parameters to be provided in a `transforms.json` file in a format compatible with [the original NeRF codebase](https://www.matthewtancik.com/nerf).
 We provide a script as a convenience, `scripts/colmap2nerf.py`, that can be used to process a video file or sequence of images, using the open source [COLMAP](https://colmap.github.io/) structure from motion software to extract the necessary camera data.
 
-Make sure that you have installed [COLMAP](https://colmap.github.io/) and that it is available in your PATH. If you are using a video file as input, also be sure to install [FFMPEG](https://www.ffmpeg.org/) and make sure that it is available in your PATH.
+Make sure that you have installed [COLMAP](https://colmap.github.io/) and that it is available in your PATH. If you are using a video file as input, also be sure to install [FFmpeg](https://www.ffmpeg.org/) and make sure that it is available in your PATH.
 To check that this is the case, from a terminal window, you should be able to run `colmap` and `ffmpeg -?` and see some help text from each.
 
 If you are training from a video file, run the `colmap2nerf.py` script from the folder containing the video, with the following recommended parameters:
@@ -145,12 +145,12 @@ For training from images, place them in a subfolder called `images` and then use
 data-folder$ python [path-to-instant-ngp]/scripts/colmap2nerf.py --colmap_matcher exhaustive --run_colmap --aabb_scale 16
 ```
 
-The script will run ffmpeg and/or COLMAP as needed, followed by a conversion step to the required `transforms.json` format, which will be written in the current directory. 
+The script will run FFmpeg and/or COLMAP as needed, followed by a conversion step to the required `transforms.json` format, which will be written in the current directory. 
 
-By default, the script invokes colmap with the 'sequential matcher', which is suitable for images taken from a smoothly changing camera path, as in a video. The exhaustive matcher is more appropriate if the images are in no particular order, as shown in the image example above.
+By default, the script invokes colmap with the "sequential matcher", which is suitable for images taken from a smoothly changing camera path, as in a video. The exhaustive matcher is more appropriate if the images are in no particular order, as shown in the image example above.
 For more options, you can run the script with `--help`. For more advanced uses of COLMAP or for challenging scenes, please see the [COLMAP documentation](https://colmap.github.io/cli.html); you may need to modify the `scripts/colmap2nerf.py` script itself.
 
-The `aabb_scale` parameter is the most important `instant-ngp` specific parameter. It specifies the extent of the scene, defaulting to 1; that is, the scene is scaled such that the camera positions are at an average distance of 1 unit from the origin. For small synthetic scenes such as the original NeRF dataset, the default `aabb_scale` of 1 is ideal and leads to fastest training. The NeRF model makes the assumption that the training images can entirely be explained by a scene contained within this bounding box. However, for natural scenes where there is a background that extends beyond this bounding box, the NeRF model will struggle and may hallucinate 'floaters' at the boundaries of the box. By setting `aabb_scale` to a larger power of 2 (up to a maximum of 16), the NeRF model will extend rays to a much larger bounding box. Note that this can impact training speed slightly. If in doubt, for natural scenes, start with an `aabb_scale` of 16, and subsequently reduce it if possible. The value can be directly edited in the `transforms.json` output file, without re-running the `colmap2nerf` script.
+The `aabb_scale` parameter is the most important `instant-ngp` specific parameter. It specifies the extent of the scene, defaulting to 1; that is, the scene is scaled such that the camera positions are at an average distance of 1 unit from the origin. For small synthetic scenes such as the original NeRF dataset, the default `aabb_scale` of 1 is ideal and leads to fastest training. The NeRF model makes the assumption that the training images can entirely be explained by a scene contained within this bounding box. However, for natural scenes where there is a background that extends beyond this bounding box, the NeRF model will struggle and may hallucinate "floaters" at the boundaries of the box. By setting `aabb_scale` to a larger power of 2 (up to a maximum of 16), the NeRF model will extend rays to a much larger bounding box. Note that this can impact training speed slightly. If in doubt, for natural scenes, start with an `aabb_scale` of 16, and subsequently reduce it if possible. The value can be directly edited in the `transforms.json` output file, without re-running the `colmap2nerf` script.
 
 Assuming success, you can now train your NeRF model as follows, starting in the `instant-ngp` folder:
 
@@ -162,7 +162,7 @@ instant-ngp$ ./build/testbed --mode nerf --scene [path to training data folder c
 
 The NeRF model trains best with between 50-150 images which exhibit minimal scene movement, motion blur or other blurring artefacts. The quality of reconstruction is predicated on COLMAP being able to extract accurate camera parameters from the images.
 
-The `colmap2nerf.py` script assumes that the training images are all pointing approximately at a shared 'point of interest', which it places at the origin. This point is found by taking a weighted average of the closest points of approach between the rays through the central pixel of all pairs of training images. In practice, this means that the script works best when the training images have been captured 'pointing inwards' towards the object of interest, although they do not need to complete a full 360 view of it. Any background visible behind the object of interest will still be reconstructed if `aabb_scale` is set to a number larger than 1, as explained above.
+The `colmap2nerf.py` script assumes that the training images are all pointing approximately at a shared point of interest, which it places at the origin. This point is found by taking a weighted average of the closest points of approach between the rays through the central pixel of all pairs of training images. In practice, this means that the script works best when the training images have been captured pointing inwards towards the object of interest, although they do not need to complete a full 360 view of it. Any background visible behind the object of interest will still be reconstructed if `aabb_scale` is set to a number larger than 1, as explained above.
 
 ## Python bindings
 

diff --git a/dependencies/tiny-cuda-nn b/dependencies/tiny-cuda-nn
diff --git a/include/neural-graphics-primitives/marching_cubes.h b/include/neural-graphics-primitives/marching_cubes.h
@@ -14,6 +14,9 @@
 
 #pragma once
 
+#include <tiny-cuda-nn/common.h>
+
+
 NGP_NAMESPACE_BEGIN
 
 void marching_cubes_gpu(BoundingBox aabb, Eigen::Vector3i res_3d, float thresh, const tcnn::GPUMemory<float> &density, tcnn::GPUMemory<Eigen::Vector3f> &vert_out, tcnn::GPUMemory<uint32_t> &indices_out);
@@ -22,37 +25,37 @@ void marching_cubes_gpu(BoundingBox aabb, Eigen::Vector3i res_3d, float thresh,
 void compute_mesh_1ring(const tcnn::GPUMemory<Eigen::Vector3f> &verts, const tcnn::GPUMemory<uint32_t> &indices, tcnn::GPUMemory<Eigen::Vector4f> &output_pos, tcnn::GPUMemory<Eigen::Vector3f> &output_normals);
 
 void compute_mesh_opt_gradients(float thresh,
-	const tcnn::GPUMemory<Eigen::Vector3f> &verts, const tcnn::GPUMemory<Eigen::Vector3f> &vert_normals,
-	const tcnn::GPUMemory<Eigen::Vector4f> &verts_smoothed,
-	uint32_t padded_output_width, const __half *densities,
+	const tcnn::GPUMemory<Eigen::Vector3f>& verts, const tcnn::GPUMemory<Eigen::Vector3f>& vert_normals,
+	const tcnn::GPUMemory<Eigen::Vector4f>& verts_smoothed,
+	uint32_t padded_output_width, const tcnn::network_precision_t* densities,
 	uint32_t input_gradient_width, 	const float *input_gradients,
 	tcnn::GPUMemory<Eigen::Vector3f> &verts_gradient_out,
 	float k_smooth_amount,	float k_density_amount,	float k_inflate_amount
 );
 
-void save_mesh(tcnn::GPUMemory<Eigen::Vector3f> &verts,
-	tcnn::GPUMemory<Eigen::Vector3f> &normals,
-	tcnn::GPUMemory<uint32_t> &indices,
-	const char *optional_outputname,
+void save_mesh(tcnn::GPUMemory<Eigen::Vector3f>& verts,
+	tcnn::GPUMemory<Eigen::Vector3f>& normals,
+	tcnn::GPUMemory<uint32_t>& indices,
+	const char* optional_outputname,
 	bool unwrap_it,
 	float nerf_scale,
 	Eigen::Vector3f nerf_offset
 );
 
 #ifdef NGP_GUI
 void draw_mesh_gl(
-	const tcnn::GPUMemory<Eigen::Vector3f> &verts,
-	const tcnn::GPUMemory<Eigen::Vector3f> &normals,
-	const tcnn::GPUMemory<Eigen::Vector3f> &cols,
-	const tcnn::GPUMemory<uint32_t> &indices,
+	const tcnn::GPUMemory<Eigen::Vector3f>& verts,
+	const tcnn::GPUMemory<Eigen::Vector3f>& normals,
+	const tcnn::GPUMemory<Eigen::Vector3f>& cols,
+	const tcnn::GPUMemory<uint32_t>& indices,
 	Eigen::Vector2i resolution,	Eigen::Vector2f focal_length,
 	Eigen::Matrix<float, 3, 4> camera_matrix,
 	Eigen::Vector2f screen_center,
 	int mesh_render_mode
 );
 #endif
 
-void save_density_grid_to_png(const tcnn::GPUMemory<float> &density, const char *filename, Eigen::Vector3i res3d, float thresh, bool swap_y_z=true);
+void save_density_grid_to_png(const tcnn::GPUMemory<float> &density, const char *filename, Eigen::Vector3i res3d, float thresh, bool swap_y_z = true);
 
 NGP_NAMESPACE_END
 
diff --git a/src/marching_cubes.cu b/src/marching_cubes.cu
@@ -194,20 +194,20 @@ void main() {
 	glUniform2i(glGetUniformLocation(program, "res"), resolution.x(),resolution.y());
 	glUniform1i(glGetUniformLocation(program, "mode"), mesh_render_mode);
 	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, els);
-    GLuint posat = (GLuint)glGetAttribLocation(program, "pos");
+	GLuint posat = (GLuint)glGetAttribLocation(program, "pos");
 	GLuint norat = (GLuint)glGetAttribLocation(program, "nor");
 	GLuint colat = (GLuint)glGetAttribLocation(program, "col");
-    glEnableVertexAttribArray(posat);
+	glEnableVertexAttribArray(posat);
 	glEnableVertexAttribArray(norat);
 	glEnableVertexAttribArray(colat);
-    glBindBuffer(GL_ARRAY_BUFFER, VBO[0]);
-    glVertexAttribPointer(posat, 3, GL_FLOAT, GL_FALSE, 3*4, 0);
+	glBindBuffer(GL_ARRAY_BUFFER, VBO[0]);
+	glVertexAttribPointer(posat, 3, GL_FLOAT, GL_FALSE, 3*4, 0);
 	glBindBuffer(GL_ARRAY_BUFFER, VBO[1]);
-    glVertexAttribPointer(norat, 3, GL_FLOAT, GL_FALSE, 3*4, 0);
+	glVertexAttribPointer(norat, 3, GL_FLOAT, GL_FALSE, 3*4, 0);
 	glBindBuffer(GL_ARRAY_BUFFER, VBO[2]);
-    glVertexAttribPointer(colat, 3, GL_FLOAT, GL_FALSE, 3*4, 0);
-    glCullFace(GL_BACK);
-    glDisable(GL_CULL_FACE);
+	glVertexAttribPointer(colat, 3, GL_FLOAT, GL_FALSE, 3*4, 0);
+	glCullFace(GL_BACK);
+	glDisable(GL_CULL_FACE);
 	glEnable(GL_DEPTH_TEST);
 	glDrawElements(GL_TRIANGLES, (GLsizei)indices.size(), GL_UNSIGNED_INT , (GLvoid*)0);
 	glDisable(GL_CULL_FACE);
@@ -231,7 +231,7 @@ with z=1
 
 edges 8-11 go in +z direction from vertex 0-3
 */
-__global__ void gen_vertices(BoundingBox aabb, Eigen::Vector3i res_3d, const float* __restrict__ density, int*__restrict__ vertidx_grid, Eigen::Vector3f* verts_out, float thresh, uint32_t *__restrict__ counters) {
+__global__ void gen_vertices(BoundingBox aabb, Eigen::Vector3i res_3d, const float* __restrict__ density, int*__restrict__ vertidx_grid, Eigen::Vector3f* verts_out, float thresh, uint32_t* __restrict__ counters) {
 	uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
 	uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;
 	uint32_t z = blockIdx.z * blockDim.z + threadIdx.z;
@@ -678,9 +678,10 @@ void compute_mesh_1ring(const tcnn::GPUMemory<Eigen::Vector3f> &verts, const tcn
 	linear_kernel(accumulate_1ring, 0, nullptr, indices.size()/3, indices.data(), verts.data(), output_pos.data(), output_normals.data());
 }
 
-__global__ void compute_mesh_opt_gradients_kernel(uint32_t n_verts, float thresh, const Eigen::Vector3f *verts, const Eigen::Vector3f *normals, const Eigen::Vector4f *verts_smoothed,
-	uint32_t padded_output_width, const __half *densities,
-	uint32_t input_gradient_width, const float *input_gradients, Eigen::Vector3f *verts_gradient_out,
+__global__ void compute_mesh_opt_gradients_kernel(
+	uint32_t n_verts, float thresh, const Eigen::Vector3f* verts, const Eigen::Vector3f* normals, const Eigen::Vector4f* verts_smoothed,
+	uint32_t padded_output_width, const network_precision_t* densities,
+	uint32_t input_gradient_width, const float* input_gradients, Eigen::Vector3f* verts_gradient_out,
 	float k_smooth_amount,	float k_density_amount,	float k_inflate_amount
 ) {
 	uint32_t i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -706,7 +707,7 @@ __global__ void compute_mesh_opt_gradients_kernel(uint32_t n_verts, float thresh
 void compute_mesh_opt_gradients(float thresh,
 	const tcnn::GPUMemory<Eigen::Vector3f> &verts, const tcnn::GPUMemory<Eigen::Vector3f> &normals,
 	const tcnn::GPUMemory<Eigen::Vector4f> &verts_smoothed,
-	uint32_t padded_output_width, const __half *densities,
+	uint32_t padded_output_width, const network_precision_t* densities,
 	uint32_t input_gradients_width, const float *input_gradients,
 	GPUMemory<Eigen::Vector3f> &verts_gradient_out,
 	float k_smooth_amount,	float k_density_amount,	float k_inflate_amount
@@ -731,7 +732,7 @@ void compute_mesh_opt_gradients(float thresh,
 	);
 }
 
-void marching_cubes_gpu(BoundingBox aabb, Eigen::Vector3i res_3d, float thresh, const tcnn::GPUMemory<float> &density, tcnn::GPUMemory<Eigen::Vector3f> &verts_out, tcnn::GPUMemory<uint32_t> &indices_out) {
+void marching_cubes_gpu(BoundingBox aabb, Eigen::Vector3i res_3d, float thresh, const tcnn::GPUMemory<float> &density, tcnn::GPUMemory<Eigen::Vector3f>& verts_out, tcnn::GPUMemory<uint32_t>& indices_out) {
 	GPUMemory<uint32_t> counters;
 
 	counters.enlarge(4);