-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
145 lines (105 loc) · 4.92 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
ARG FEDORA_VERSION=38
FROM nvcr.io/nvidia/cuda:12.6.1-base-ubi8 as build
ARG TARGETARCH
SHELL ["/bin/bash", "-c"]
RUN dnf install -y git wget
ENV GOLANG_VERSION=1.22.7
# download appropriate binary based on the target architecture for multi-arch builds
RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \
curl https://dl.google.com/go/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV PATH /usr/local/go/bin:$PATH
WORKDIR /work
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work
ARG FEDORA_VERSION=38
FROM fedora:${FEDORA_VERSION}
LABEL org.opencontainers.image.source https://github.com/maastrichtu-library/dsri-nvidia-driver
ARG TARGETARCH
ENV TARGETARCH=$TARGETARCH
ARG KERNEL_TYPE
ENV KERNEL_TYPE=$KERNEL_TYPE
SHELL ["/bin/bash", "-c"]
#ARG BASE_URL=http://us.download.nvidia.com/XFree86/Linux-x86_64
ARG BASE_URL=https://us.download.nvidia.com/tesla
ENV BASE_URL=${BASE_URL}
ARG DRIVER_VERSION=550.127.08
ENV DRIVER_VERSION=$DRIVER_VERSION
# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
ARG DISABLE_VGPU_VERSION_CHECK=true
ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
# Avoid dependency of container-toolkit for driver container
ENV NVIDIA_VISIBLE_DEVICES=void
# getopt etc.
RUN dnf install -y util-linux 'dnf-command(download)'
RUN dnf install -y patch
ADD install.sh /tmp/
RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
OS_ARCH=${TARGETARCH/amd64/x86_64} && OS_ARCH=${OS_ARCH/arm64/sbsa} && \
curl -fsSL "https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$OS_ARCH/D42D0685.pub" | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -
RUN sh /tmp/install.sh depinstall && \
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
curl -fsSL -o /usr/local/bin/extract-vmlinux https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux && \
chmod +x /usr/local/bin/donkey /usr/local/bin/extract-vmlinux && \
ln -s /sbin/ldconfig /sbin/ldconfig.real
ADD drivers drivers/
# Fetch the installer automatically for passthrough/baremetal types
# RUN echo $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
cd drivers && \
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} && \
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run; fi
# ******************************************************************************
# REMOVED FABRIC MANAGER RHEL WORK FOR NOW - HAVEN'T ATTEMPTED INCLUSION
# ******************************************************************************
# Install fabric-manager packages
# RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$TARGETARCH" != "arm64" ]; then \
# versionArray=(${DRIVER_VERSION//./ }); \
# DRIVER_BRANCH=${versionArray[0]}; \
# if [ ${versionArray[0]} -ge 470 ] || ([ ${versionArray[0]} == 460 ] && [ ${versionArray[1]} -ge 91 ]); then \
# fmPackage=nvidia-fabric-manager-${DRIVER_VERSION}-1; \
# else \
# fmPackage=nvidia-fabricmanager-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; \
# fi; \
# nscqPackage=libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; \
# dnf module enable -y nvidia-driver:${DRIVER_BRANCH} && \
# dnf install -y ${fmPackage} ${nscqPackage}; fi
COPY nvidia-driver /usr/local/bin
RUN chmod 755 /usr/local/bin/nvidia-driver
RUN cat /usr/local/bin/nvidia-driver
WORKDIR /usr/src/nvidia-$DRIVER_VERSION
# COPY ocp_dtk_entrypoint /usr/local/bin
COPY common.sh /usr/local/bin
COPY --from=build /work/vgpu-util /usr/local/bin
WORKDIR /drivers
ARG PUBLIC_KEY=empty
COPY ${PUBLIC_KEY} kernel/pubkey.x509
#ARG PRIVATE_KEY
ARG KERNEL_VERSION=latest
LABEL io.k8s.display-name="NVIDIA Driver Container"
LABEL name="NVIDIA Driver Container"
LABEL vendor="NVIDIA"
LABEL version="${DRIVER_VERSION}"
LABEL release="N/A"
LABEL summary="Provision the NVIDIA driver through containers"
LABEL description="See summary"
# Add NGC DL license from the CUDA image
COPY --from=build /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
yum update -y ${CVE_UPDATES} && \
rm -rf /var/cache/yum/*; \
fi
# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/yum.repos.d/cuda.repo
ENTRYPOINT ["nvidia-driver", "init"]