Some cool commands: nvidia-smi, neofetch, watch -n1 nvidia-smi, anaconda-navigator, conda info --envs, conda remove -n yourenvname --all
Note: Install TF and PyTorch using Update 2 only, rest is what I tried and failed.
On Windows:
Install Anaconda.
Open Anaconda Prompt
conda create --name tf_gpu tensorflow-gpu
Source: https://towardsdatascience.com/tensorflow-gpu-installation-made-easy-use-conda-instead-of-pip-52e5249374bc
python -c "import tensorflow as tf; print(tf.__version__)"
Install Keras after TF:
pip install keras [Source https://www.quantinsti.com/blog/install-tensorflow-gpu]
python -c "import keras; print(keras.__version__)"
conda create --name torch
activate torch
conda install pytorch -c pytorch
Source: https://medium.com/@bryant.kou/how-to-install-pytorch-on-windows-step-by-step-cc4d004adb2a
Linux:
python
Python 3.7.4 (default, Aug 13 2019, 20:35:49)
[GCC 7.3.0] :: Anaconda, Inc. on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> torch.cuda.get_device_name(0)
'GeForce RTX 2070'
>>> torch.cuda.is_available()
True
>>> torch.cuda.device_count()
1
>>>
********** Update 2, CUDA 10.0, cuDNN 7.4*****
**Do not update pip and conda
https://medium.com/@cjanze/how-to-install-tensorflow-with-gpu-support-on-ubuntu-18-04-lts-with-cuda-10-nvidia-gpu-312a693744b5
lsb_release -a
dpkg -s gcc
sudo apt update && sudo apt install gcc
dpkg -s build-essential
sudo apt update && sudo apt install build-essential
sudo apt update && sudo apt install freeglut3 freeglut3-dev libxi-dev libxmu-dev
cd ~/Downloads
sudo sh ./NVIDIA-Linux-x86_64-410.104.run
update your X configuration file: yes
nvidia-smi
cd ~/Downloads
sudo sh cuda_10.0.130_410.48_linux.run
dont install driver
install toolkit and samples only
sudo nano ~/.bashrc
Scroll down and add:
export PATH=/usr/local/cuda-10.0/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
Ctrl + o to save
Enter to accept
Ctrl + x to close nano
cat ~/.bashrc
source ~/.bashrc
nvcc -V
cd ~/NVIDIA_CUDA-10.0_Samples
sudo sh cuda_10.0.130_410.48_linux.run
cd ~/NVIDIA_CUDA-10.0_Samples
sudo make
cd ~/NVIDIA_CUDA-10.0_Samples/bin/x86_64/linux/release
./deviceQuery
cd ~/Downloads
sudo dpkg -i libcudnn7_7.4.2.24-1+cuda10.0_amd64.deb
sudo dpkg -i libcudnn7-dev_7.4.2.24-1+cuda10.0_amd64.deb
sudo dpkg -i libcudnn7-doc_7.4.2.24-1+cuda10.0_amd64.deb
cp -r /usr/src/cudnn_samples_v7/ $HOME
cd $HOME/cudnn_samples_v7/mnistCUDNN
make clean && make
./mnistCUDNN
cd ~/Downloads
bash Anaconda3-2018.12-Linux-x86_64.sh
source ~/.bashrc
**Do not update pip and conda
conda create --name tensorflow
pip install tf-nightly-gpu
python -c "from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())"
physical_device_desc: "device: 0, name: GeForce RTX 2070, pci bus id: 0000:01:00.0, compute capability: 7.5"
Installing PyTorch / Torch
Cuda 10.1 Did not work
Sources:
https://ai.atamai.biz/post/install-ubuntu/
https://medium.com/@cjanze/how-to-install-tensorflow-with-gpu-support-on-ubuntu-18-04-lts-with-cuda-10-nvidia-gpu-312a693744b5
Download files:
https://developer.nvidia.com/cuda-toolkit-archive
cuda_10.0.130_410.48_linux.run
https://developer.nvidia.com/rdp/cudnn-download
Download cuDNN v7.5.0 (Feb 21, 2019), for CUDA 10.0
cuDNN Library for Linux
cudnn-10.0-linux-x64-v7.5.0.56.tgz
Anaconda3-2018.12-Linux-x86_64.sh
sudo /usr/local/cuda*/bin/uninstall_cuda*
sudo nvidia-uninstall
sudo rm -rf /usr/local/cuda-*
sudo apt-get update
sudo apt-get upgrade
sudo apt autoremove
sudo apt-get install -y gcc g++ gfortran git libopenblas-dev
sudo apt-get install -y linux-image-generic linux-headers-generic linux-source linux-image-extra-virtual
sudo apt-get install -y libgl1-mesa-dev libgl1-mesa-glx libosmesa6-dev python3-pip python3-numpy python3-scipy
sudo gedit /etc/modprobe.d/blacklist-nouveau.conf
blacklist nouveau
options nouveau modeset=0
or
sudo bash -c "echo blacklist nouveau > /etc/modprobe.d/blacklist-nvidia-nouveau.conf"
sudo bash -c "echo options nouveau modeset=0 >> /etc/modprobe.d/blacklist-nvidia-nouveau.conf"
To see contents:
cat /etc/modprobe.d/blacklist-nvidia-nouveau.conf
uname -r
sudo gedit /etc/default/grub
GRUB_DEFAULT="1>2"
[indexing starts from 0]
sudo update-grub
https://askubuntu.com/questions/82140/how-can-i-boot-with-an-older-kernel-version
4.15.0-45-generic [Select from Advanced options during boot]
sudo bash cuda_10.0.130_410.48_linux.run --extract=/tmp/cudaInstall
sudo bash cuda_10.0.130_410.48_linux.run --no-opengl-libs
[n]-> for driver
[y]-> for cuda toolkit
[n]-> for all others
sudo gedit ~/.bashrc
export CUDA_HOME="/usr/local/cuda"
export LD_LIBRARY_PATH="$CUDA_HOME/lib64:$LD_LIBRARY_PATH"
export PATH="$CUDA_HOME/bin:$PATH"
source ~/.bashrc
tar xvf cudnn-10.0-linux-x64-v7.5.0.56.tgz
sudo cp -rapv cuda/include/cudnn.h /usr/local/cuda/include/
sudo cp -rapv cuda/lib64/* /usr/local/cuda/lib64/
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
cd /tmp/cudaInstall/
sudo ./NVIDIA-Linux-x86_64-410.48.run --no-opengl-files
bash Anaconda3-2018.12-Linux-x86_64.sh
source ~/.bashrc
conda create -n tensorflow python=3.5 anaconda
conda activate tensorflow
conda update --all
pip install tf-nightly-gpu
pip install twisted==18.7.0
pip install --upgrade pip
python -c "from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())"
physical_device_desc: "device: 0, name: GeForce RTX 2070, pci bus id: 0000:01:00.0, compute capability: 7.5"
python _basic_mnist.py
conda install pytorch torchvision cudatoolkit=10.0 -c pytorch
*****UPDATE ENDS*******
Step 1. NVIDIA® GPU drivers:
Source: https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-18-04-bionic-beaver-linux
Manual Install using the Official Nvidia.com driver:
Identify your NVIDIA VGA card Model:
$ lshw -numeric -C display
or
$ lspci -vnn | grep VGA
https://www.nvidia.com/Download/index.aspx
GeForce, GeForce RTX 20 Series, GeForce RTX 2070, Linux 64-bit.
My filename: NVIDIA-Linux-x86_64-418.43.run
Install Prerequisites: **Reduces some 32 bit error**
sudo dpkg --add-architecture i386
sudo apt update
sudo apt install build-essential libc6:i386
Some additional Steps:
Source: https://askubuntu.com/questions/1105570/black-screen-after-installing-nvidia-drivers-390-410-415-for-geforce-1050-ti
sudo gedit /etc/initramfs-tools/modules
# List of modules that you want to include in your initramfs.
# They will be loaded at boot time in the order below.
#
# Syntax: module_name [args ...]
#
# You must run update-initramfs(8) to effect this change.
#
# Examples:
#
# raid1
# sd_mod
nvidia
nvidia-drm
nvidia-modeset
nvidia-uvm
sudo gedit /etc/modprobe.d/nvidia-graphics-drivers.conf
options nvidia-drm modeset=1
blacklist nouveau
blacklist lbm-nouveau
alias nouveau off
alias lbm-nouveau off
Disable the default nouveau Nvidia driver:
sudo bash -c "echo blacklist nouveau > /etc/modprobe.d/blacklist-nvidia-nouveau.conf"
sudo bash -c "echo options nouveau modeset=0 >> /etc/modprobe.d/blacklist-nvidia-nouveau.conf"
**This will create a file named blacklist-nvidia-nouveau.conf in /etc/modprobe.d with contents as below**
blacklist nouveau
options nouveau modeset=0
To see contents:
cat /etc/modprobe.d/blacklist-nvidia-nouveau.conf
Update / regenerate initramfs:
sudo update-initramfs -u -k all
The update-initramfs script manages your initramfs images on your local box.
It keeps track of the existing initramfs archives in /boot.
There are three modes of operation create, update or delete.
At boot time, the kernel unpacks that archive into RAM disk, mounts and uses it as initial root file system.
Before reboot, sometimes after the driver installation, the black screen occurs after login, to avoid it, do:
sudo gedit /etc/default/grub
Before:
GRUB_CMDLINE_LINUX_DEFAULT="quiet splash"
GRUB_CMDLINE_LINUX=""
After:
GRUB_CMDLINE_LINUX_DEFAULT=""
GRUB_CMDLINE_LINUX=""
sudo update-grub
sudo reboot
[Sometimes you get the black screen and cannot access terminal to edit the grub, in that case you can press CTRL + ALT + F1 or F2 or F3 (With 'Fn' Key depending on your keyboard) when you are at the login screen to open TTYL session where you can input these commands, sometimes this doesn't work either, in that case you can run ubuntu in recovery mode from boot and then no need to change anything - just do resume boot, then do CTRL + ALT + F1 or F2 or F3, for me it worked.]
Now the actual installation of Driver:
In order to install new Nvidia driver we need to stop the current display server.
The easiest way to do this is to change into runlevel 3 using the telinit command.
After executing the following linux command the display server will stop,
therefore save your work before you proceed:
sudo telinit 3
CTRL + ALT + F1 (With 'Fn' Key depending on your keyboard)
Login with your username and password to open TTY1 session
sudo bash /home/rb/Downloads/NVIDIA-Linux-x86_64-418.43.run
The distribution-provided pre-install script failed!
Are you sure you want to continue? -> CONTINUE INSTALLATION
Would you like to run the nvidia-xconfig utility? -> YES
(Mostly do "YES" on everything you get)
sudo reboot
Optional:
After reboot you should be able to start NVIDIA X Server Settings app from the Activities menu.
Step 2. CUDA Toolkit:
https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&target_distro=Ubuntu&target_version=1804&target_type=deblocal
sudo dpkg -i cuda-repo-ubuntu1804-10-1-local-10.1.105-418.39_1.0-1_amd64.deb
sudo apt-key add /var/cuda-repo-10-1-local-10.1.105-418.39/7fa2af80.pub
sudo apt-get update
sudo apt-get install cuda
[You might get unmet dependencies error like missing cuda-10-1]
So, either you can install dependencies one-by-one as:
sudo apt-get install cuda-10-1
But you will have to install a lot of dependencies one-by-one.
So better way:
sudo apt-get install aptitude
sudo aptitude install cuda
Maybe aptitude crawls back, gets all the missing dependencies and installs it for you
Then finally install the main package.
Update your PATH variable to add cuda libraries/files
sudo gedit ~/.bashrc
export PATH=/usr/local/cuda-10.1/bin${PATH:+:$PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-10.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
source ~/.bashrc
or
. ~/.bashrc
[Yes 2 }} at the end]
Will basically add /usr/local/cuda-10.1/bin to path
Step 3. cuDNN install:
https://developer.nvidia.com/cudnn
Download "cuDNN Library for Linux"
tar -xzvf cudnn-10.1-linux-x64-v7.5.0.56.tgz
sudo cp cuda/include/cudnn.h /usr/local/cuda/include
sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
Done cuDNN
Step 4. Anaconda and Tensorflow GPU:
https://www.anaconda.com/distribution/#download-section
Download anaconda 3 [2 might not work with tenforflow]
You can work with python 2 as well in virtual environment
Read More: https://algidus.blogspot.com/p/virtual-environment-python.html
Download the SH file [Mine: Linux - 64-Bit (x86) Installer (652.5 MB)]:
My file: Anaconda3-2018.12-Linux-x86_64.sh
Run:
bash Anaconda3-2018.12-Linux-x86_64.sh
"Yes" to add to bashrc
source ~/.bashrc
To confirm installation:
conda list
Installing Tenforflow GPU in a virtual env:
conda create --name tfgpu
conda env list
source activate tfgpu
conda install -c anaconda tensorflow-gpu
conda install jupyter notebook
jupyter notebook
import tensorflow as tf
print(tf.__version__)
hello = tf.constant('hello tensorflow')
with tf.Session() as sess:
print(sess.run(hello))
Output:
1.12.0
b'hello tensorflow'
To Remove Virtual Environment:
conda remove -n tfgpu --all
Check Tensorflow GPU:
Find if tensorflow is actually using gpu: https://stackoverflow.com/questions/38009682
import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
Output:
Found device 0 with properties:
name: GeForce RTX 2070 major: 7 minor: 5 memoryClockRate(GHz): 1.62
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce RTX 2070, pci bus id: 0000:01:00.0, compute capability: 7.5
import tensorflow as tf
with tf.device('/gpu:0'):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
with tf.Session() as sess:
print (sess.run(c))
Output:
name: GeForce RTX 2070 major: 7 minor: 5 memoryClockRate(GHz): 1.62
pciBusID: 0000:01:00.0
totalMemory: 7.76GiB freeMemory: 7.34GiB
physical GPU (device: 0, name: GeForce RTX 2070, pci bus id: 0000:01:00.0, compute capability: 7.5)
[[22. 28.]
[49. 64.]]
Some important packages:
conda install jupyter notebook
pip install matplotlib
pip install scikit-image
pip install scikit-learn
More here: https://algidus.blogspot.com/2019/03/tensorflow-cpu-gpu.html
Code Default (GPU):
Code CPU:
C:\Program Files\NVIDIA Corporation\NVSMI
watch -d -n 0.5 nvidia-smi
Note: Install TF and PyTorch using Update 2 only, rest is what I tried and failed.
On Windows:
Install Anaconda.
Open Anaconda Prompt
conda create --name tf_gpu tensorflow-gpu
Source: https://towardsdatascience.com/tensorflow-gpu-installation-made-easy-use-conda-instead-of-pip-52e5249374bc
python -c "import tensorflow as tf; print(tf.__version__)"
Install Keras after TF:
pip install keras [Source https://www.quantinsti.com/blog/install-tensorflow-gpu]
python -c "import keras; print(keras.__version__)"
conda create --name torch
activate torch
conda install pytorch -c pytorch
Source: https://medium.com/@bryant.kou/how-to-install-pytorch-on-windows-step-by-step-cc4d004adb2a
Linux:
python
Python 3.7.4 (default, Aug 13 2019, 20:35:49)
[GCC 7.3.0] :: Anaconda, Inc. on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> torch.cuda.get_device_name(0)
'GeForce RTX 2070'
>>> torch.cuda.is_available()
True
>>> torch.cuda.device_count()
1
>>>
********** Update 2, CUDA 10.0, cuDNN 7.4*****
**Do not update pip and conda
https://medium.com/@cjanze/how-to-install-tensorflow-with-gpu-support-on-ubuntu-18-04-lts-with-cuda-10-nvidia-gpu-312a693744b5
lsb_release -a
dpkg -s gcc
sudo apt update && sudo apt install gcc
dpkg -s build-essential
sudo apt update && sudo apt install build-essential
sudo apt update && sudo apt install freeglut3 freeglut3-dev libxi-dev libxmu-dev
cd ~/Downloads
sudo sh ./NVIDIA-Linux-x86_64-410.104.run
update your X configuration file: yes
nvidia-smi
cd ~/Downloads
sudo sh cuda_10.0.130_410.48_linux.run
dont install driver
install toolkit and samples only
sudo nano ~/.bashrc
Scroll down and add:
export PATH=/usr/local/cuda-10.0/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
Ctrl + o to save
Enter to accept
Ctrl + x to close nano
cat ~/.bashrc
source ~/.bashrc
nvcc -V
cd ~/NVIDIA_CUDA-10.0_Samples
sudo sh cuda_10.0.130_410.48_linux.run
cd ~/NVIDIA_CUDA-10.0_Samples
sudo make
cd ~/NVIDIA_CUDA-10.0_Samples/bin/x86_64/linux/release
./deviceQuery
cd ~/Downloads
sudo dpkg -i libcudnn7_7.4.2.24-1+cuda10.0_amd64.deb
sudo dpkg -i libcudnn7-dev_7.4.2.24-1+cuda10.0_amd64.deb
sudo dpkg -i libcudnn7-doc_7.4.2.24-1+cuda10.0_amd64.deb
cp -r /usr/src/cudnn_samples_v7/ $HOME
cd $HOME/cudnn_samples_v7/mnistCUDNN
make clean && make
./mnistCUDNN
cd ~/Downloads
bash Anaconda3-2018.12-Linux-x86_64.sh
source ~/.bashrc
**Do not update pip and conda
conda create --name tensorflow
pip install tf-nightly-gpu
python -c "from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())"
physical_device_desc: "device: 0, name: GeForce RTX 2070, pci bus id: 0000:01:00.0, compute capability: 7.5"
Installing PyTorch / Torch
(tensorflow) rb@rbhost:/media/rb/Omega/tensorflow$ python --version Python 3.7.2 (tensorflow) rb@rbhost:/media/rb/Omega/tensorflow$ nvcc --version nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2018 NVIDIA Corporation Built on Sat_Aug_25_21:08:01_CDT_2018 Cuda compilation tools, release 10.0, V10.0.130 (tensorflow) rb@rbhost:/media/rb/Omega/tensorflow$ watch -n1 nvidia-smi Thu Mar 14 12:32:50 2019 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 410.104 Driver Version: 410.104 CUDA Version: 10.0 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | |===============================+======================+======================| | 0 GeForce RTX 2070 Off | 00000000:01:00.0 On | N/A | | 0% 39C P8 3W / 175W | 566MiB / 7944MiB | 3% Default | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: GPU Memory | | GPU PID Type Process name Usage | |=============================================================================| | 0 1173 G /usr/lib/xorg/Xorg 18MiB | | 0 1202 G /usr/bin/gnome-shell 50MiB | | 0 1988 G /usr/lib/xorg/Xorg 169MiB | | 0 2116 G /usr/bin/gnome-shell 212MiB | | 0 8426 G ...quest-channel-token=2460008960031053301 77MiB | | 0 9720 G /opt/viber/Viber 35MiB | +-----------------------------------------------------------------------------+ Check CUDNN Version: (tensorflow) rb@rbhost:/media/rb/Omega/tensorflow$ whereis cudnn.h cudnn: /usr/include/cudnn.h (tensorflow) rb@rbhost:/media/rb/Omega/tensorflow$ cat /usr/include/cudnn.h | grep CUDNN_MAJOR -A 2 #define CUDNN_MAJOR 7 #define CUDNN_MINOR 4 #define CUDNN_PATCHLEVEL 2 -- #define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL) #include "driver_types.h" which means the version is 7.4.2. So Mine: NVIDIA Driver Version: 410.104 CUDA Version: release 10.0, V10.0.130 cuDNN VErsion: 7.4.2 (tensorflow) rb@rbhost:/media/rb/Omega/tensorflow$ conda install pytorch torchvision cudatoolkit=10.0 -c pytorch (tensorflow) rb@rbhost:/media/rb/Omega/tensorflow$ python -c 'import torch; print(torch.__version__)' 1.0.1.post2********** Update 1 ***********
Cuda 10.1 Did not work
Sources:
https://ai.atamai.biz/post/install-ubuntu/
https://medium.com/@cjanze/how-to-install-tensorflow-with-gpu-support-on-ubuntu-18-04-lts-with-cuda-10-nvidia-gpu-312a693744b5
Download files:
https://developer.nvidia.com/cuda-toolkit-archive
cuda_10.0.130_410.48_linux.run
https://developer.nvidia.com/rdp/cudnn-download
Download cuDNN v7.5.0 (Feb 21, 2019), for CUDA 10.0
cuDNN Library for Linux
cudnn-10.0-linux-x64-v7.5.0.56.tgz
Anaconda3-2018.12-Linux-x86_64.sh
sudo /usr/local/cuda*/bin/uninstall_cuda*
sudo nvidia-uninstall
sudo rm -rf /usr/local/cuda-*
sudo apt-get update
sudo apt-get upgrade
sudo apt autoremove
sudo apt-get install -y gcc g++ gfortran git libopenblas-dev
sudo apt-get install -y linux-image-generic linux-headers-generic linux-source linux-image-extra-virtual
sudo apt-get install -y libgl1-mesa-dev libgl1-mesa-glx libosmesa6-dev python3-pip python3-numpy python3-scipy
sudo gedit /etc/modprobe.d/blacklist-nouveau.conf
blacklist nouveau
options nouveau modeset=0
or
sudo bash -c "echo blacklist nouveau > /etc/modprobe.d/blacklist-nvidia-nouveau.conf"
sudo bash -c "echo options nouveau modeset=0 >> /etc/modprobe.d/blacklist-nvidia-nouveau.conf"
To see contents:
cat /etc/modprobe.d/blacklist-nvidia-nouveau.conf
uname -r
sudo gedit /etc/default/grub
GRUB_DEFAULT="1>2"
[indexing starts from 0]
sudo update-grub
https://askubuntu.com/questions/82140/how-can-i-boot-with-an-older-kernel-version
4.15.0-45-generic [Select from Advanced options during boot]
sudo bash cuda_10.0.130_410.48_linux.run --extract=/tmp/cudaInstall
sudo bash cuda_10.0.130_410.48_linux.run --no-opengl-libs
[n]-> for driver
[y]-> for cuda toolkit
[n]-> for all others
sudo gedit ~/.bashrc
export CUDA_HOME="/usr/local/cuda"
export LD_LIBRARY_PATH="$CUDA_HOME/lib64:$LD_LIBRARY_PATH"
export PATH="$CUDA_HOME/bin:$PATH"
source ~/.bashrc
tar xvf cudnn-10.0-linux-x64-v7.5.0.56.tgz
sudo cp -rapv cuda/include/cudnn.h /usr/local/cuda/include/
sudo cp -rapv cuda/lib64/* /usr/local/cuda/lib64/
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
cd /tmp/cudaInstall/
sudo ./NVIDIA-Linux-x86_64-410.48.run --no-opengl-files
bash Anaconda3-2018.12-Linux-x86_64.sh
source ~/.bashrc
conda create -n tensorflow python=3.5 anaconda
conda activate tensorflow
conda update --all
pip install tf-nightly-gpu
pip install twisted==18.7.0
pip install --upgrade pip
python -c "from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())"
physical_device_desc: "device: 0, name: GeForce RTX 2070, pci bus id: 0000:01:00.0, compute capability: 7.5"
python _basic_mnist.py
conda install pytorch torchvision cudatoolkit=10.0 -c pytorch
*****UPDATE ENDS*******
Step 1. NVIDIA® GPU drivers:
Source: https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-18-04-bionic-beaver-linux
Manual Install using the Official Nvidia.com driver:
Identify your NVIDIA VGA card Model:
$ lshw -numeric -C display
or
$ lspci -vnn | grep VGA
https://www.nvidia.com/Download/index.aspx
GeForce, GeForce RTX 20 Series, GeForce RTX 2070, Linux 64-bit.
My filename: NVIDIA-Linux-x86_64-418.43.run
Install Prerequisites: **Reduces some 32 bit error**
sudo dpkg --add-architecture i386
sudo apt update
sudo apt install build-essential libc6:i386
Some additional Steps:
Source: https://askubuntu.com/questions/1105570/black-screen-after-installing-nvidia-drivers-390-410-415-for-geforce-1050-ti
sudo gedit /etc/initramfs-tools/modules
# List of modules that you want to include in your initramfs.
# They will be loaded at boot time in the order below.
#
# Syntax: module_name [args ...]
#
# You must run update-initramfs(8) to effect this change.
#
# Examples:
#
# raid1
# sd_mod
nvidia
nvidia-drm
nvidia-modeset
nvidia-uvm
sudo gedit /etc/modprobe.d/nvidia-graphics-drivers.conf
options nvidia-drm modeset=1
blacklist nouveau
blacklist lbm-nouveau
alias nouveau off
alias lbm-nouveau off
Disable the default nouveau Nvidia driver:
sudo bash -c "echo blacklist nouveau > /etc/modprobe.d/blacklist-nvidia-nouveau.conf"
sudo bash -c "echo options nouveau modeset=0 >> /etc/modprobe.d/blacklist-nvidia-nouveau.conf"
**This will create a file named blacklist-nvidia-nouveau.conf in /etc/modprobe.d with contents as below**
blacklist nouveau
options nouveau modeset=0
To see contents:
cat /etc/modprobe.d/blacklist-nvidia-nouveau.conf
Update / regenerate initramfs:
sudo update-initramfs -u -k all
The update-initramfs script manages your initramfs images on your local box.
It keeps track of the existing initramfs archives in /boot.
There are three modes of operation create, update or delete.
At boot time, the kernel unpacks that archive into RAM disk, mounts and uses it as initial root file system.
Before reboot, sometimes after the driver installation, the black screen occurs after login, to avoid it, do:
sudo gedit /etc/default/grub
Before:
GRUB_CMDLINE_LINUX_DEFAULT="quiet splash"
GRUB_CMDLINE_LINUX=""
After:
GRUB_CMDLINE_LINUX_DEFAULT=""
GRUB_CMDLINE_LINUX=""
sudo update-grub
sudo reboot
[Sometimes you get the black screen and cannot access terminal to edit the grub, in that case you can press CTRL + ALT + F1 or F2 or F3 (With 'Fn' Key depending on your keyboard) when you are at the login screen to open TTYL session where you can input these commands, sometimes this doesn't work either, in that case you can run ubuntu in recovery mode from boot and then no need to change anything - just do resume boot, then do CTRL + ALT + F1 or F2 or F3, for me it worked.]
Now the actual installation of Driver:
In order to install new Nvidia driver we need to stop the current display server.
The easiest way to do this is to change into runlevel 3 using the telinit command.
After executing the following linux command the display server will stop,
therefore save your work before you proceed:
sudo telinit 3
CTRL + ALT + F1 (With 'Fn' Key depending on your keyboard)
Login with your username and password to open TTY1 session
sudo bash /home/rb/Downloads/NVIDIA-Linux-x86_64-418.43.run
The distribution-provided pre-install script failed!
Are you sure you want to continue? -> CONTINUE INSTALLATION
Would you like to run the nvidia-xconfig utility? -> YES
(Mostly do "YES" on everything you get)
sudo reboot
Optional:
After reboot you should be able to start NVIDIA X Server Settings app from the Activities menu.
Step 2. CUDA Toolkit:
https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&target_distro=Ubuntu&target_version=1804&target_type=deblocal
sudo dpkg -i cuda-repo-ubuntu1804-10-1-local-10.1.105-418.39_1.0-1_amd64.deb
sudo apt-key add /var/cuda-repo-10-1-local-10.1.105-418.39/7fa2af80.pub
sudo apt-get update
sudo apt-get install cuda
[You might get unmet dependencies error like missing cuda-10-1]
So, either you can install dependencies one-by-one as:
sudo apt-get install cuda-10-1
But you will have to install a lot of dependencies one-by-one.
So better way:
sudo apt-get install aptitude
sudo aptitude install cuda
Maybe aptitude crawls back, gets all the missing dependencies and installs it for you
Then finally install the main package.
Update your PATH variable to add cuda libraries/files
sudo gedit ~/.bashrc
export PATH=/usr/local/cuda-10.1/bin${PATH:+:$PATH}}
export LD_LIBRARY_PATH=/usr/local/cuda-10.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
source ~/.bashrc
or
. ~/.bashrc
[Yes 2 }} at the end]
Will basically add /usr/local/cuda-10.1/bin to path
Step 3. cuDNN install:
https://developer.nvidia.com/cudnn
Download "cuDNN Library for Linux"
tar -xzvf cudnn-10.1-linux-x64-v7.5.0.56.tgz
sudo cp cuda/include/cudnn.h /usr/local/cuda/include
sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
Done cuDNN
Step 4. Anaconda and Tensorflow GPU:
https://www.anaconda.com/distribution/#download-section
Download anaconda 3 [2 might not work with tenforflow]
You can work with python 2 as well in virtual environment
Read More: https://algidus.blogspot.com/p/virtual-environment-python.html
Download the SH file [Mine: Linux - 64-Bit (x86) Installer (652.5 MB)]:
My file: Anaconda3-2018.12-Linux-x86_64.sh
Run:
bash Anaconda3-2018.12-Linux-x86_64.sh
"Yes" to add to bashrc
source ~/.bashrc
To confirm installation:
conda list
Installing Tenforflow GPU in a virtual env:
conda create --name tfgpu
conda env list
source activate tfgpu
conda install -c anaconda tensorflow-gpu
conda install jupyter notebook
jupyter notebook
import tensorflow as tf
print(tf.__version__)
hello = tf.constant('hello tensorflow')
with tf.Session() as sess:
print(sess.run(hello))
Output:
1.12.0
b'hello tensorflow'
To Remove Virtual Environment:
conda remove -n tfgpu --all
Check Tensorflow GPU:
Find if tensorflow is actually using gpu: https://stackoverflow.com/questions/38009682
import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
Output:
Found device 0 with properties:
name: GeForce RTX 2070 major: 7 minor: 5 memoryClockRate(GHz): 1.62
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce RTX 2070, pci bus id: 0000:01:00.0, compute capability: 7.5
import tensorflow as tf
with tf.device('/gpu:0'):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
with tf.Session() as sess:
print (sess.run(c))
Output:
name: GeForce RTX 2070 major: 7 minor: 5 memoryClockRate(GHz): 1.62
pciBusID: 0000:01:00.0
totalMemory: 7.76GiB freeMemory: 7.34GiB
physical GPU (device: 0, name: GeForce RTX 2070, pci bus id: 0000:01:00.0, compute capability: 7.5)
[[22. 28.]
[49. 64.]]
Some important packages:
conda install jupyter notebook
pip install matplotlib
pip install scikit-image
pip install scikit-learn
More here: https://algidus.blogspot.com/2019/03/tensorflow-cpu-gpu.html
Code Default (GPU):
import tensorflow as tf # Creates a graph. a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') c = tf.matmul(a, b) # Creates a session with log_device_placement set to True. to see logs and which variable uses what cpu or gpu sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) # Runs the op. print(sess.run(c))
Code CPU:
import tensorflow as tf with tf.device('/cpu:0'): a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') c = tf.matmul(a, b) sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) print(sess.run(c))
Code CPU and GPU:
import tensorflow as tf with tf.device('/cpu:0'): a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') c = tf.matmul(a, b) sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) print(sess.run(c))Code MNIST:
from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) import tensorflow as tf tf.reset_default_graph() with tf.device('/gpu:0'): # Define placeholders x_batch = tf.placeholder(tf.float32, shape=[None, 784]) y_batch = tf.placeholder(tf.float32, shape=[None, 10]) images = tf.reshape(x_batch, shape=[-1, 28, 28, 1]) w1 = tf.get_variable('w1', [5, 5, 1, 32], initializer=tf.truncated_normal_initializer()) b1 = tf.get_variable('b1', [32], initializer=tf.random_normal_initializer()) conv1_ = tf.nn.conv2d(images, w1, strides=[1, 1, 1, 1], padding='SAME') conv1 = tf.nn.relu(conv1_ + b1) pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') w2 = tf.get_variable('w2', [14 * 14 * 32, 512], initializer=tf.truncated_normal_initializer()) b2 = tf.get_variable('b2', [512], initializer=tf.constant_initializer(0.0)) pool1_flat = tf.reshape(pool1, [-1, 14 * 14 * 32]) fc1 = tf.nn.relu(tf.matmul(pool1_flat, w2) + b2) fc = tf.nn.dropout(fc1, keep_prob = 0.5) w3 = tf.get_variable('w3', [512, 10], initializer=tf.truncated_normal_initializer()) b3 = tf.get_variable('biases', [10], initializer=tf.random_normal_initializer()) logits = tf.matmul(fc, w3) + b3 entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_batch, logits=logits) loss = tf.reduce_mean(entropy, name='loss') opt = tf.train.AdamOptimizer(0.05).minimize(loss) config = tf.ConfigProto() config.log_device_placement = True # to log device placement (on which device the operation ran) config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU #To remove error: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize # if you dont use allow growth, the memory of graphics card will be allocated for use by that one process only and other processes cant use it # that one process might not need much gpu memory at all # doing allow_growth allows other processes to use it as well with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for i in range(10): batch = mnist.train.next_batch(100) #batch_size = 100 feed_dict = {x_batch: batch[0], y_batch: batch[1]} l,_ = sess.run([loss, opt], feed_dict=feed_dict) print(l)