distribution=$(. /etc/os-release;echo $ID$VERSION_ID) yum-config-manager --add-repo https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo dnf clean expire-cache --refresh dnf install -y nvidia-docker2 systemctl restart docker
docker pull nvidia/cuda-arm64:11.1-devel-centos8
docker run -it nvidia/cuda-arm64:11.1-devel-centos8 /bin/bash
vi /etc/yum.repos.d/nvidia-ml.repo
[nvidia-ml] name=nvidia-ml baseurl=https://developer.download.nvidia.com/compute/machine-learning/repos/rhel8/sbsa enabled=1 gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA sslverify=false
vi /etc/yum.repos.d/cuda.repo
[cuda] name=cuda baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa enabled=1 gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA sslverify=false
yum makecache
yum install -y zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel python3 python3-devel gcc wget cmake3 libarchive git which
wget https://developer.download.nvidia.cn/compute/machine-learning/repos/rhel8/sbsa/libcudnn8-8.0.4.30-1.cuda11.1.aarch64.rpm --no-check-certificate rpm -ivh libcudnn8-8.0.4.30-1.cuda11.1.aarch64.rpm wget https://developer.download.nvidia.cn/compute/machine-learning/repos/rhel8/sbsa/libcudnn8-devel-8.0.4.30-1.cuda11.1.aarch64.rpm --no-check-certificate rpm –ivh libcudnn8-devel-8.0.4.30-1.cuda11.1.aarch64.rpm
执行./Anaconda3-2021.05-Linux-aarch64.sh时,需手动输入“Enter”和“yes”,默认部署在“/root/anaconda3”目录。
wget https://repo.anaconda.com/archive/Anaconda3-2021.05-Linux-aarch64.sh chmod +x Anaconda3-2021.05-Linux-aarch64.sh ./Anaconda3-2021.05-Linux-aarch64.sh export PATH=/root/anaconda3/bin/:$PATH
conda install astunparse numpy ninja pyyaml setuptools cmake cffi typing_extensions future six requests dataclasses
(可选)conda代理配置方法如下:
cat > /root/.condarcexport PATH=/root/anaconda3/bin:$PATH channels: - conda-forge - defaults proxy_servers: http: xxxxxx https: xxxxxx ssl_verify: false
export PATH=/root/anaconda3/bin:$PATH export LD_LIBRARY_PATH=/usr/local/cuda/lib64
export GIT_SSL_NO_VERIFY=1 git clone --recursive https://github.com/pytorch/pytorch --depth=1 cd pytorch git submodule sync git submodule update --init --recursive --jobs 0 export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} python setup.py install
docker ps
PyTorch源码等大文件移到容器外后删除再导出镜像。
docker cp 4ad81495b7ef:/pytorch .
rm -rf pytorch/ rm -rf Anaconda3-2021.05-Linux-aarch64.sh rm -f libcudnn8-8.0.4.30-1.cuda11.1.aarch64.rp rm -f libcudnn8-devel-8.0.4.30-1.cuda11.1.aarch64.rpm
docker export -o pytorch_cuda.tar 4ad81495b7ef