ubuntu安裝顯卡驅動後報錯,重裝nvidia-smi

1. nvidia-smi:command not found,先清除nvidia佔用,將使用次數102降到0.

root@ubuntu:~# lsmod | grep nvidia
nvidia_modeset       1114112  2
nvidia_uvm            819200  0
nvidia              19046400  102 nvidia_uvm,nvidia_modeset
ipmi_msghandler        53248  2 ipmi_devintf,nvidia
root@ubuntu:~# rmmod nvidia_uvm
root@ubuntu:~# rmmod nvidia_modeset
rmmod: ERROR: Module nvidia_modeset is in use
root@ubuntu:~# lsof -n -w /dev/nvidia*
COMMAND   PID USER   FD   TYPE  DEVICE SIZE/OFF NODE NAME
Xorg    15045 root  mem    CHR 195,255           478 /dev/nvidiactl
Xorg    15045 root  mem    CHR   195,1           533 /dev/nvidia1
Xorg    15045 root  mem    CHR   195,0           547 /dev/nvidia0
Xorg    15045 root   13u   CHR 195,255      0t0  478 /dev/nvidiactl
Xorg    15045 root   15u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   16u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   17u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   18u   CHR 195,254      0t0  546 /dev/nvidia-modeset
Xorg    15045 root   20u   CHR   195,0      0t0  547 /dev/nvidia0
Xorg    15045 root   21u   CHR   195,0      0t0  547 /dev/nvidia0
Xorg    15045 root   22u   CHR   195,0      0t0  547 /dev/nvidia0
Xorg    15045 root   23u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   24u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   26u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   27u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   28u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   29u   CHR 195,255      0t0  478 /dev/nvidiactl
Xorg    15045 root   30u   CHR 195,254      0t0  546 /dev/nvidia-modeset
Xorg    15045 root   31u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   32u   CHR   195,1      0t0  533 /dev/nvidia1
Xorg    15045 root   33u   CHR   195,0      0t0  547 /dev/nvidia0
Xorg    15045 root   34u   CHR   195,0      0t0  547 /dev/nvidia0
root@ubuntu:~# kill -9 15045
root@ubuntu:~# rmmod nvidia_modeset 
root@ubuntu:~# lsmod | grep nvidia
nvidia              19046400  0
ipmi_msghandler        53248  2 ipmi_devintf,nvidia


2. /usr/bin/nvidia-smi: 是一個目錄,必須將其變成一個文件,再轉換爲軟鏈接

root@ubuntu:~# nvidia-smi
-bash: /usr/bin/nvidia-smi: 是一個目錄
root@ubuntu:~# cd /usr/bin/
root@ubuntu:/usr/bin# rm -rf nvidia-smi && touch nvidia-smi
root@ubuntu:/usr/bin# rm -rf nvidia-smi && ln -s /etc/alternatives/x86_64-linux-gnu_nvidia_smi nvidia-smi
root@ubuntu:/usr/bin# ll nvidia-smi
lrwxrwxrwx 1 root root 45 Jan 13 16:23 nvidia-smi -> /etc/alternatives/x86_64-linux-gnu_nvidia_smi*
root@ubuntu:/usr/bin# cd
root@ubuntu:~# nvidia-smi
Failed to initialize NVML: Driver/library version mismatch

3. Failed to initialize NVML: Driver/library version mismatch,驅動版本匹配,(我選擇重裝)

4. 禁用nouveau,若lsmod | grep nouveau無輸出,說明禁用成功

root@ubuntu:~# cat /etc/modprobe.d/blacklist-nouveau.conf
blacklist nouveau
options nouveau modeset=0
root@ubuntu:~# update-initramfs -u
root@ubuntu:~# lsmod | grep nouveau

5. 禁用x-window服務

root@ubuntu:~# systemctl stop lightdm

6. 卸載nvidia,卸載後nvidia-smi又變成了一個目錄

root@ubuntu:~# apt-get autoremove --purge nvidia-*

root@ubuntu:~# nvidia-smi
-bash: /usr/bin/nvidia-smi: 是一個目錄
root@ubuntu:~# cd /usr/bin/
root@ubuntu:/usr/bin# rm -rf nvidia-smi && touch nvidia-smi
root@ubuntu:/usr/bin# nvidia-smi
-bash: /usr/bin/nvidia-smi: 權限不夠

7.卸載cuda

root@ubuntu:~# apt-get autoremove --purge cuda-*

8. reboot

9. 下載NVIDIA安裝包,如:http://us.download.nvidia.com/XFree86/Linux-x86_64/418.88/NVIDIA-Linux-x86_64-418.88.run

10. 安裝,加--silent靜默安裝。

root@ubuntu:~# chmod +x NVIDIA-Linux-x86_64-418.88.run
root@ubuntu:~# ./NVIDIA-Linux-x86_64-418.88.run -no-x-check -no-nouveau-check -no-opengl-files --silent
Verifying archive integrity... OK
Uncompressing NVIDIA Accelerated Graphics Driver for Linux-x86_64 418.88..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
root@ubuntu:~# nvidia-smi 
Mon Jan 13 16:53:04 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.88       Driver Version: 418.88       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  GeForce GTX 108...  Off  | 00000000:05:00.0 Off |                  N/A |
| 33%   29C    P5    22W / 250W |      0MiB / 11177MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  Off  | 00000000:09:00.0 Off |                  N/A |
| 31%   18C    P0    51W / 250W |      0MiB / 11178MiB |      6%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

root@ubuntu:/var/supply/supply_device# apt-get install dkms
正在讀取軟件包列表... 完成
正在分析軟件包的依賴關係樹       
正在讀取狀態信息... 完成       
dkms 已經是最新版 (2.2.0.3-2ubuntu11.8)。
升級了 0 個軟件包,新安裝了 0 個軟件包,要卸載 0 個軟件包,有 146 個軟件包未被升級。
root@ubuntu:/var/supply/supply_device# cd
root@ubuntu:~# ll /usr/src/
總用量 468
drwxr-xr-x 11 root root   4096 Jan 13 16:52 ./
drwxr-xr-x 12 root root   4096 Jan 13 14:44 ../
-rw-r--r--  1 root root  64667 Oct 31 23:56 fortran.c
-rw-r--r--  1 root root  17859 Oct 31 23:56 fortran_common.h
-rw-r--r--  1 root root  39040 Oct 31 23:56 fortran.h
-rw-r--r--  1 root root 269462 Oct 31 23:56 fortran_thunking.c
-rw-r--r--  1 root root  34362 Oct 31 23:56 fortran_thunking.h
drwxr-xr-x 25 root root   4096 Sep 23 10:19 linux-headers-4.15.0-64/
drwxr-xr-x  8 root root   4096 Sep 23 10:19 linux-headers-4.15.0-64-generic/
drwxr-xr-x 25 root root   4096 Oct  2 06:40 linux-headers-4.15.0-65/
drwxr-xr-x  8 root root   4096 Oct  2 06:40 linux-headers-4.15.0-65-generic/
drwxr-xr-x 25 root root   4096 Oct 24 06:16 linux-headers-4.15.0-66/
drwxr-xr-x  8 root root   4096 Oct 24 06:16 linux-headers-4.15.0-66-generic/
drwxr-xr-x  4 root root   4096 Jan 13 11:59 linux-source-4.4.0/
lrwxrwxrwx  1 root root     45 Dec  3 20:17 linux-source-4.4.0.tar.bz2 -> linux-source-4.4.0/linux-source-4.4.0.tar.bz2
drwxr-xr-x  6 root root   4096 Jan 13 10:34 nvidia-418.87.00/
drwxr-xr-x  7 root root   4096 Jan 13 16:52 nvidia-418.88/
root@ubuntu:~# dkms install -m nvidia -v 418.88
Module nvidia/418.88 already installed on kernel 4.15.0-66-generic/x86_64
root@tesra:~# reboot

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章