Read: 3DGS | Debug Code

The 2 submodules are CUDA projects that should be debugged separately from the python project, because the python debugger can’t access the PyTorch CUDA extensions.

Debug Settings

(2023-11-08)

VSCode Intellisense

To enable intellisense, the configuration of “C/C++ extension” must be put in the “.vscode/” of (top-level) current working directory (.vscode/c_cpp_properties.json), not in submodule’s “.vscode/” (submodules/diff-gaussian-rasterization/.vscode/c_cpp_properties.json).

Otherwise, the settings won’t be loaded.

CUDA syntax (e.g.,__global__) and header (<cooperative_groups/reduce.h>, min) won’t be recognized, if includePath and compiler are incorrect,

Doubtful Attempts

Put the includes paths of torch before cuda’s include. Otherwise #include <cooperative_groups/reduce.h> in “forward.cu” can’t be found.

Set the includePath in “.vscode/c_cpp_properties.json” as:

1
2
3
4
5
6
7


"includePath": [
    "/home/yi/anaconda3/envs/AIkui/include/python3.10",
    "/home/yi/anaconda3/envs/AIkui/lib/python3.10/site-packages/torch/include",
    "/home/yi/anaconda3/envs/AIkui/lib/python3.10/site-packages/torch/include/torch/csrc/api/include",
    "${workspaceFolder}/**",
    "/usr/local/cuda-11.6/include"
],

Error in “rasterizer_impl.cu”: cannot open source file glm/glm.hpp (dependency of cub/cub.cuh)
1

sudo apt-get install libglm-dev
Why can’t C++ find GLM headers? - SO
Error in “backward.cu”: namespace "cooperative_groups" has no member "this_grid"

Change CMakelist: SO

Didn’t fix
Error: identifier "min" is undefinedC/C++(20)

Didn’t fix

Solution: Use nvcc as compiler. Verified “.vscode/c_cpp_properties.json”:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19


{
    "configurations": [
        {
            "name": "Linux",
            "includePath": [
                "${workspaceFolder}/**",
                "/home/yi/anaconda3/envs/gaussian_splatting/include/python3.7m",
                "/home/yi/anaconda3/envs/gaussian_splatting/lib/python3.7/site-packages/torch/include",
                "/home/yi/anaconda3/envs/gaussian_splatting/lib/python3.7/site-packages/torch/include/torch/csrc/api/include"
            ],
            "defines": [],
            "compilerPath": "/usr/local/cuda-11.6/bin/nvcc",
            "cStandard": "c17",
            "cppStandard": "gnu++14",
            "intelliSenseMode": "linux-gcc-x64"
        }
    ],
    "version": 4
}

Debug diffRast

(2023-11-18)

Action: Write a “main.cpp” that calls the methods of the compiled library “CudaRasterizer” for debugging. Repo for debugging DiffRast: debug_diff_rust

“CMakeLists.txt”

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16


cmake_minimum_required(VERSION 3.20 FATAL_ERROR)
project(MyApp)    # ${PROJECT_NAME}

find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

add_subdirectory(external/diff-gaussian-rasterization-comments)

add_executable(MyApp main.cpp)
set_property(TARGET MyApp PROPERTY CXX_STANDARD 17)

target_link_libraries(MyApp "${TORCH_LIBRARIES}")
target_link_libraries(MyApp CudaRasterizer)

target_sources(MyApp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/external/diff-gaussian-rasterization-comments/rasterize_points.cu)
target_include_directories(MyApp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/external/diff-gaussian-rasterization-comments)

“launch.json”

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14


{
    "version": "0.2.0",
    "configurations": [
        {
            "name": "DiffRast Launch",
            "type": "cppdbg",
            "request": "launch",
            "program": "${fileDirname}/build/MyApp",
            "args": [],
            "preLaunchTask": "Build with cmake", 
            "cwd": "${workspaceFolder}",
        }
    ]
}

“tasks.json”

open

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44


{
 "version": "2.0.0",
 "tasks": [
   {
 	"label": "cmake-configure",
 	"type": "shell",
 	"command": "cmake",
 	"args": [
 	  "-DCMAKE_BUILD_TYPE=Debug",
 	  "-DCMAKE_PREFIX_PATH=/usr/local/libtorch",
 	  "..",
 	],
 	"options": {
 	  "cwd": "${workspaceFolder}/build" 
 	},
 	"group": {
 	  "kind": "build",
 	  "isDefault": true
 	}
   },
   {
 	"label": "cmake-build",
 	"type": "shell",
 	"command": "cmake",
 	"args": [
 	  "--build",
 	  ".",
 	//   "--config",
 	//   "Debug"
 	],
 	"options": {
 	  "cwd": "${workspaceFolder}/build"  // Set the build directory
 	},
 	"group": {
 	  "kind": "build",
 	  "isDefault": true
 	}
   },
   {
 	"label": "Build with cmake",
 	"dependsOn": ["cmake-configure", "cmake-build"]
   }
 ]
}

Enter CUDA Kernels

Building project as above won’t allow to step into CUDA kernels, e.g., preprocessCUDA.

(2024-01-20)

Enable CUDA language in CMAKE and set for debugging:

1
2
3
4
5


project(MyApp LANGUAGES CXX CUDA)
set(CMAKE_BUILD_TYPE Debug)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G")
endif()

Build to binary file:

1
2
3


# cd ~/Downloads/debug_diff_rast
cmake -B ./build -DCMAKE_PREFIX_PATH=/usr/local/libtorch -GNinja
cmake --build ./build

Launch cuda-gdb: cuda-gdb ./build/MyApp. It’s convenient to use cuda-gdb in the terminal of vscode, where I can jump to the code by clicking the path.
Add breakpoint inside preprocessCUDA: (cuda-gdb) b forward.cu:182

Not sure if it worked.

(2024-01-26) Nsight (CUDA-GDB)

Generate Makefile with cmake:

1
2


cmake -B ./build -DCMAKE_PREFIX_PATH=/usr/local/libtorch -G"Unix Makefiles"
cmake --build ./build

To let cmake produce Makefile, do not use -GNinja. Why isn’t the command “cmake .” generating a makefile? - SO

(2024-04-23)

Error: “Caffe2: Cannot find cuDNN library”

Solved by installing cudnn-xxx.deb

Error Message

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46


(gaussian_splatting) yi@yi-Alien:~/Downloads/debug_diff_rast$ cmake -B ./build -DCMAKE_PREFIX_PATH=/usr/local/libtorch -G"Unix Makefiles"
-- The CXX compiler identification is GNU 9.4.0
-- The CUDA compiler identification is NVIDIA 11.6.55
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Check for working CUDA compiler: /usr/local/cuda-11.6/bin/nvcc - skipped
-- Detecting CUDA compile features
-- Detecting CUDA compile features - done
-- Found CCache: /usr/local/bin/ccache
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
-- Looking for pthread_create in pthreads
-- Looking for pthread_create in pthreads - not found
-- Looking for pthread_create in pthread
-- Looking for pthread_create in pthread - found
-- Found Threads: TRUE  
-- Found CUDA: /usr/local/cuda-11.6 (found version "11.6") 
-- Caffe2: CUDA detected: 11.6
-- Caffe2: CUDA nvcc is: /usr/local/cuda-11.6/bin/nvcc
-- Caffe2: CUDA toolkit directory: /usr/local/cuda-11.6
-- Caffe2: Header version is: 11.6
-- Could NOT find CUDNN (missing: CUDNN_LIBRARY_PATH CUDNN_INCLUDE_PATH) 
CMake Warning at /usr/local/libtorch/share/cmake/Caffe2/public/cuda.cmake:120 (message):
  Caffe2: Cannot find cuDNN library.  Turning the option off
Call Stack (most recent call first):
  /usr/local/libtorch/share/cmake/Caffe2/Caffe2Config.cmake:88 (include)
  /usr/local/libtorch/share/cmake/Torch/TorchConfig.cmake:68 (find_package)
  CMakeLists.txt:19 (find_package)


-- /usr/local/cuda-11.6/lib64/libnvrtc.so shorthash is 280a23f6
-- Autodetected CUDA architecture(s):  6.1
-- Added CUDA NVCC flags for: -gencode;arch=compute_61,code=sm_61
CMake Error at /usr/local/libtorch/share/cmake/Caffe2/Caffe2Config.cmake:96 (message):
  Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN
  libraries.  Please set the proper cuDNN prefixes and / or install cuDNN.
Call Stack (most recent call first):
  /usr/local/libtorch/share/cmake/Torch/TorchConfig.cmake:68 (find_package)
  CMakeLists.txt:19 (find_package)

-- Configuring incomplete, errors occurred!

I didn’t encounter this error before.

There is no libcudnn.so under directory: /usr/local/cuda-11.6/lib64/
libcudnn.so does’t apper in the output of python -m torch.utils.collect_env as shown in issue#30
Download cuDNN Library for CUDA11 Ubuntu 20.04. Referring to this answer: spconv - issues#277

1
2
3
4
5


wget https://developer.download.nvidia.com/compute/cudnn/9.1.0/local_installers/cudnn-local-repo-ubuntu2004-9.1.0_1.0-1_amd64.deb
sudo dpkg -i cudnn-local-repo-ubuntu2004-9.1.0_1.0-1_amd64.deb
sudo cp /var/cudnn-local-repo-ubuntu2004-9.1.0/cudnn-*-keyring.gpg /usr/share/keyrings/
sudo apt-get update
sudo apt-get -y install cudnn-cuda-11

The libcudnn and cudnn are installed under /usr/lib/ and /usr/include:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


(gaussian_splatting) yi@yi:~/Downloads/debug_diff_rast$ whereis libcudnn
libcudnn: /usr/lib/x86_64-linux-gnu/libcudnn.so

(gaussian_splatting) yi@yi:~/Downloads/debug_diff_rast$ whereis cudnn
cudnn: /usr/include/cudnn.h

(gaussian_splatting) yi@yi:~/Downloads/debug_diff_rast$ dpkg -l | grep cudnn
ii  cudnn-local-repo-ubuntu2004-9.1.0  1.0-1        amd64  cudnn-local repository configuration files
ii  cudnn9-cuda-11                     9.1.0.70-1   amd64  NVIDIA cuDNN for CUDA 11
ii  cudnn9-cuda-11-8                   9.1.0.70-1   amd64  NVIDIA cuDNN for CUDA 11.8
ii  libcudnn9-cuda-11                  9.1.0.70-1   amd64  cuDNN runtime libraries for CUDA 11.8
ii  libcudnn9-dev-cuda-11              9.1.0.70-1   amd64  cuDNN development headers and symlinks for CUDA 11.8
ii  libcudnn9-static-cuda-11           9.1.0.70-1   amd64  cuDNN static libraries for CUDA 11.8

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15


(base) yi@yi-Alien:~$ apt-cache search cudnn
cudnn9-cuda-11-8 - NVIDIA cuDNN for CUDA 11.8
cudnn9-cuda-11 - NVIDIA cuDNN for CUDA 11
cudnn9-cuda-12-4 - NVIDIA cuDNN for CUDA 12.4
cudnn9-cuda-12 - NVIDIA cuDNN for CUDA 12
cudnn9 - NVIDIA CUDA Deep Neural Network library (cuDNN)
cudnn - NVIDIA CUDA Deep Neural Network library (cuDNN)
libcudnn9-cuda-11 - cuDNN runtime libraries for CUDA 11.8
libcudnn9-cuda-12 - cuDNN runtime libraries for CUDA 12.4
libcudnn9-dev-cuda-11 - cuDNN development headers and symlinks for CUDA 11.8
libcudnn9-dev-cuda-12 - cuDNN development headers and symlinks for CUDA 12.4
libcudnn9-samples - cuDNN samples
libcudnn9-static-cuda-11 - cuDNN static libraries for CUDA 11.8
libcudnn9-static-cuda-12 - cuDNN static libraries for CUDA 12.4
cudnn-local-repo-ubuntu2004-9.1.0 - cudnn-local repository configuration files

After installation, the cmake configuration works fine.

I didn’t copy it to /usr/local/cuda/lib64 or /usr/local/cuda/include, like: CuDNN not found while compiling PyTorch C++ extension - Forum
I didn’t set environment variable neither.

(2024-05-01)

Lambda server met the same problem.

The current cudnn 9.1 doesn’t have deb option for Ubuntu 18.04. I downloaded the history version cudnn 8.9.7 (2023/12/05): Local Installer for Ubuntu18.04 x86_64 (Deb) (839MB)

wget cannot download it. Have to use browser.

1

wget https://developer.nvidia.com/downloads/compute/cudnn/secure/8.9.7/local_installers/11.x/cudnn-local-repo-ubuntu1804-8.9.7.29_1.0-1_amd64.deb/

Install the deb package with reference to cudnn 9.1:

1
2
3


sudo dpkg -i cudnn-local-repo-ubuntu1804-8.9.7.29_1.0-1_amd64.deb
sudo cp /var/cudnn-local-repo-ubuntu1804-8.9.7.29/cudnn-local-AE31B5F1-keyring.gpg /usr/share/keyrings/
sudo apt-get update

The name maybe mismatched:

1
2
3
4
5


root@lambda-server:/data2/zi# sudo apt-get -y install cudnn-cuda-11
Reading package lists... Done
Building dependency tree       
Reading state information... Done
E: Unable to locate package cudnn-cuda-11

There are several cudnn package could be installed:

1
2
3
4
5
6
7


root@lambda-server:/data2/zi# apt-cache search cudnn
libcudnn8 - cuDNN runtime libraries
libcudnn8-dev - cuDNN development libraries and headers
libcudnn8-samples - cuDNN samples
libcudnn7-dev - cuDNN development libraries and headers
libcudnn7 - cuDNN runtime libraries
cudnn-local-repo-ubuntu1804-8.9.7.29 - cudnn-local repository configuration files

Search: How to install cudnn Local Installer for Ubuntu18.04 x86_64 (Deb) on DDG:

Found: NVIDIA cuDNN - NVIDIA Documentation Hub

Referring to the pdf docs, specify the versions:

1
2
3
4
5


root@lambda-server:/data2/zi# sudo apt-get install libcudnn8-dev=8.9.7.29-1+cuda11.6
Reading package lists... Done
Building dependency tree       
Reading state information... Done
E: Version '8.9.7.29-1+cuda11.6' for 'libcudnn8-dev' was not found

The installation succeed without adding version:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22


root@lambda-server:/data2/zichen# sudo apt-get install libcudnn8-dev

The following additional packages will be installed:
  libcudnn8
The following NEW packages will be installed:
  libcudnn8 libcudnn8-dev
0 upgraded, 2 newly installed, 0 to remove and 337 not upgraded.
Need to get 0 B/878 MB of archives.
After this operation, 2,366 MB of additional disk space will be used.
Do you want to continue? [Y/n] y
Get:1 file:/var/cudnn-local-repo-ubuntu1804-8.9.7.29  libcudnn8 8.9.7.29-1+cuda11.8 [441 MB]
Get:2 file:/var/cudnn-local-repo-ubuntu1804-8.9.7.29  libcudnn8-dev 8.9.7.29-1+cuda11.8 [437 MB]                                                   
Selecting previously unselected package libcudnn8.                                                                                                 
(Reading database ... 231804 files and directories currently installed.)
Preparing to unpack .../libcudnn8_8.9.7.29-1+cuda11.8_amd64.deb ...
Unpacking libcudnn8 (8.9.7.29-1+cuda11.8) ...
Selecting previously unselected package libcudnn8-dev.
Preparing to unpack .../libcudnn8-dev_8.9.7.29-1+cuda11.8_amd64.deb ...
Unpacking libcudnn8-dev (8.9.7.29-1+cuda11.8) ...
Setting up libcudnn8 (8.9.7.29-1+cuda11.8) ...
Setting up libcudnn8-dev (8.9.7.29-1+cuda11.8) ...
update-alternatives: using /usr/include/x86_64-linux-gnu/cudnn_v8.h to provide /usr/include/cudnn.h (libcudnn) in auto mode

The debugger can step into kernel functions. However, the variables are not visible in the panel: Cannot instantiate printer for default visualizer

Create tasks.json

Following this tutorial: Getting Started with the CUDA Debugger :: NVIDIA Nsight VSCE Documentation

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15


{ 
  "label": "CUDA Make",
  "type": "shell",
  "command": "make dbg=1",
  "group": {
    "kind": "build",
    "isDefault": true
  },
  "problemMatcher": [
  	"$nvcc"
  ],
  "options": {
    "cwd": "${workspaceFolder}/build"  // Makefile
  },
},

And then click the menu bar in vscode “Terminal” -> “Run Build Task …” -> select “CUDA Make”.

Create launch.json:
1 2 3 4 5 6

{ "name": "CUDA: Debug with CUDA-GDB", "type": "cuda-gdb", "request": "launch", "program": "${workspaceFolder}/build/MyApp", },
Select the configuration: “CUDA: Debug with CUDA-GDB” beside the “Run” button. Then, click “Run” to debug.

The program can hit the breakpoint in the kernel function preprocessCUDA, for example: float3 p_view; at “diff-gaussian-rasterization/cuda_rasterizer/forward.cu#192”

(2024-04-24) On Ubuntu 20.04, CUDA-11.6, gcc 9.4.0

Re-verified practice: 2 steps enable debugging with breakpoints set inside kernel functions.

Compile with cmake:

1
2


(gaussian_splatting) yi@yi:~/Downloads/debug_diff_rast$ cmake -B ./build -DCMAKE_PREFIX_PATH=/usr/local/libtorch -G"Unix Makefiles"
(gaussian_splatting) yi@yi:~/Downloads/debug_diff_rast$ cmake --build ./build

without clicking: Terminal -> Run Build Task... -> CUDA Make

Launch debugger with the above launch.json: “CUDA: Debug with CUDA-GDB”

Table of contents

Debug Settings

VSCode Intellisense

Debug diffRast

Enter CUDA Kernels