1. nvidia-smi:command not found,先清除nvidia佔用,將使用次數102降到0.
root@ubuntu:~# lsmod | grep nvidia
nvidia_modeset 1114112 2
nvidia_uvm 819200 0
nvidia 19046400 102 nvidia_uvm,nvidia_modeset
ipmi_msghandler 53248 2 ipmi_devintf,nvidia
root@ubuntu:~# rmmod nvidia_uvm
root@ubuntu:~# rmmod nvidia_modeset
rmmod: ERROR: Module nvidia_modeset is in use
root@ubuntu:~# lsof -n -w /dev/nvidia*
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
Xorg 15045 root mem CHR 195,255 478 /dev/nvidiactl
Xorg 15045 root mem CHR 195,1 533 /dev/nvidia1
Xorg 15045 root mem CHR 195,0 547 /dev/nvidia0
Xorg 15045 root 13u CHR 195,255 0t0 478 /dev/nvidiactl
Xorg 15045 root 15u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 16u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 17u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 18u CHR 195,254 0t0 546 /dev/nvidia-modeset
Xorg 15045 root 20u CHR 195,0 0t0 547 /dev/nvidia0
Xorg 15045 root 21u CHR 195,0 0t0 547 /dev/nvidia0
Xorg 15045 root 22u CHR 195,0 0t0 547 /dev/nvidia0
Xorg 15045 root 23u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 24u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 26u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 27u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 28u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 29u CHR 195,255 0t0 478 /dev/nvidiactl
Xorg 15045 root 30u CHR 195,254 0t0 546 /dev/nvidia-modeset
Xorg 15045 root 31u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 32u CHR 195,1 0t0 533 /dev/nvidia1
Xorg 15045 root 33u CHR 195,0 0t0 547 /dev/nvidia0
Xorg 15045 root 34u CHR 195,0 0t0 547 /dev/nvidia0
root@ubuntu:~# kill -9 15045
root@ubuntu:~# rmmod nvidia_modeset
root@ubuntu:~# lsmod | grep nvidia
nvidia 19046400 0
ipmi_msghandler 53248 2 ipmi_devintf,nvidia
2. /usr/bin/nvidia-smi: 是一個目錄,必須將其變成一個文件,再轉換爲軟鏈接
root@ubuntu:~# nvidia-smi
-bash: /usr/bin/nvidia-smi: 是一個目錄
root@ubuntu:~# cd /usr/bin/
root@ubuntu:/usr/bin# rm -rf nvidia-smi && touch nvidia-smi
root@ubuntu:/usr/bin# rm -rf nvidia-smi && ln -s /etc/alternatives/x86_64-linux-gnu_nvidia_smi nvidia-smi
root@ubuntu:/usr/bin# ll nvidia-smi
lrwxrwxrwx 1 root root 45 Jan 13 16:23 nvidia-smi -> /etc/alternatives/x86_64-linux-gnu_nvidia_smi*
root@ubuntu:/usr/bin# cd
root@ubuntu:~# nvidia-smi
Failed to initialize NVML: Driver/library version mismatch
3. Failed to initialize NVML: Driver/library version mismatch,驅動版本匹配,(我選擇重裝)
4. 禁用nouveau,若lsmod | grep nouveau無輸出,說明禁用成功
root@ubuntu:~# cat /etc/modprobe.d/blacklist-nouveau.conf
blacklist nouveau
options nouveau modeset=0
root@ubuntu:~# update-initramfs -u
root@ubuntu:~# lsmod | grep nouveau
5. 禁用x-window服務
root@ubuntu:~# systemctl stop lightdm
6. 卸載nvidia,卸載後nvidia-smi又變成了一個目錄
root@ubuntu:~# apt-get autoremove --purge nvidia-*
root@ubuntu:~# nvidia-smi
-bash: /usr/bin/nvidia-smi: 是一個目錄
root@ubuntu:~# cd /usr/bin/
root@ubuntu:/usr/bin# rm -rf nvidia-smi && touch nvidia-smi
root@ubuntu:/usr/bin# nvidia-smi
-bash: /usr/bin/nvidia-smi: 權限不夠
7.卸載cuda
root@ubuntu:~# apt-get autoremove --purge cuda-*
8. reboot
9. 下載NVIDIA安裝包,如:http://us.download.nvidia.com/XFree86/Linux-x86_64/418.88/NVIDIA-Linux-x86_64-418.88.run
10. 安裝,加--silent靜默安裝。
root@ubuntu:~# chmod +x NVIDIA-Linux-x86_64-418.88.run
root@ubuntu:~# ./NVIDIA-Linux-x86_64-418.88.run -no-x-check -no-nouveau-check -no-opengl-files --silent
Verifying archive integrity... OK
Uncompressing NVIDIA Accelerated Graphics Driver for Linux-x86_64 418.88..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
root@ubuntu:~# nvidia-smi
Mon Jan 13 16:53:04 2020
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.88 Driver Version: 418.88 CUDA Version: 10.1 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 GeForce GTX 108... Off | 00000000:05:00.0 Off | N/A |
| 33% 29C P5 22W / 250W | 0MiB / 11177MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 1 GeForce GTX 108... Off | 00000000:09:00.0 Off | N/A |
| 31% 18C P0 51W / 250W | 0MiB / 11178MiB | 6% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
root@ubuntu:/var/supply/supply_device# apt-get install dkms
正在讀取軟件包列表... 完成
正在分析軟件包的依賴關係樹
正在讀取狀態信息... 完成
dkms 已經是最新版 (2.2.0.3-2ubuntu11.8)。
升級了 0 個軟件包,新安裝了 0 個軟件包,要卸載 0 個軟件包,有 146 個軟件包未被升級。
root@ubuntu:/var/supply/supply_device# cd
root@ubuntu:~# ll /usr/src/
總用量 468
drwxr-xr-x 11 root root 4096 Jan 13 16:52 ./
drwxr-xr-x 12 root root 4096 Jan 13 14:44 ../
-rw-r--r-- 1 root root 64667 Oct 31 23:56 fortran.c
-rw-r--r-- 1 root root 17859 Oct 31 23:56 fortran_common.h
-rw-r--r-- 1 root root 39040 Oct 31 23:56 fortran.h
-rw-r--r-- 1 root root 269462 Oct 31 23:56 fortran_thunking.c
-rw-r--r-- 1 root root 34362 Oct 31 23:56 fortran_thunking.h
drwxr-xr-x 25 root root 4096 Sep 23 10:19 linux-headers-4.15.0-64/
drwxr-xr-x 8 root root 4096 Sep 23 10:19 linux-headers-4.15.0-64-generic/
drwxr-xr-x 25 root root 4096 Oct 2 06:40 linux-headers-4.15.0-65/
drwxr-xr-x 8 root root 4096 Oct 2 06:40 linux-headers-4.15.0-65-generic/
drwxr-xr-x 25 root root 4096 Oct 24 06:16 linux-headers-4.15.0-66/
drwxr-xr-x 8 root root 4096 Oct 24 06:16 linux-headers-4.15.0-66-generic/
drwxr-xr-x 4 root root 4096 Jan 13 11:59 linux-source-4.4.0/
lrwxrwxrwx 1 root root 45 Dec 3 20:17 linux-source-4.4.0.tar.bz2 -> linux-source-4.4.0/linux-source-4.4.0.tar.bz2
drwxr-xr-x 6 root root 4096 Jan 13 10:34 nvidia-418.87.00/
drwxr-xr-x 7 root root 4096 Jan 13 16:52 nvidia-418.88/
root@ubuntu:~# dkms install -m nvidia -v 418.88
Module nvidia/418.88 already installed on kernel 4.15.0-66-generic/x86_64
root@tesra:~# reboot