mps device plugin from nebuly

cc33602f · Anshul · 49299981 · cc33602f · cc33602f · cc33602f
Commit cc33602f authored Jul 14, 2023 by Anshul
14 changed files
--- a/config.yaml
+++ b/config.yaml
+version: v1
+flags:
+  migStrategy: "none"
+  failOnInitError: true
+  nvidiaDriverRoot: "/lib/x86_64-linux-gnu"
+  plugin:
+    passDeviceSpecs: false
+    deviceListStrategy: envvar
+    deviceIDStrategy: uuid
\ No newline at end of file
--- a/config_map.yaml
+++ b/config_map.yaml
+{
+  "version": "v1",
+  "flags": {
+    "migStrategy": "none",
+    "failOnInitError": true,
+    "nvidiaDriverRoot": "/",
+    "gdsEnabled": false,
+    "mofedEnabled": false,
+    "plugin": {
+      "passDeviceSpecs": false,
+      "deviceListStrategy": [
+        "envar"
+      ],
+      "deviceIDStrategy": "uuid",
+      "cdiAnnotationPrefix": "cdi.k8s.io/",
+      "nvidiaCTKPath": "/usr/bin/nvidia-ctk",
+      "containerDriverRoot": "/driver-root"
+    }
+  },
+  "resources": {
+    "gpus": [
+      {
+        "pattern": "*",
+        "name": "nvidia.com/gpu"
+      }
+    ]
+  },
+  "sharing": {
+    "timeSlicing": {}
+  }
+}
\ No newline at end of file
--- a/deploy_nvidia_device_plugin.sh
+++ b/deploy_nvidia_device_plugin.sh
+#!/bin/bash
+
+# Setup kubernetes cluster
+sudo kubeadm reset   # Delete existing master
+rm $HOME/.kube/config
+sudo rm -rf /etc/cni/net.d
+sudo swapoff -a     # Swapoff
+sudo kubeadm init --pod-network-cidr=10.244.0.0/16  # Initialize cluster
+mkdir -p $HOME/.kube    
+sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
+sudo chown $(id -u):$(id -g) $HOME/.kube/config
+kubectl apply -f https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml     # Use flannel for networking
+kubectl taint nodes ub-10 node-role.kubernetes.io/master-   # Allow device plugins and pods to run on master
+
+# Deploy nvidia daemonset
+# kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0/nvidia-device-plugin.yml
+
+# helm install oci://ghcr.io/nebuly-ai/helm-charts/nvidia-device-plugin --version 0.13.0 --generate-name -n nebuly-nvidia --create-namespace
+kubectl label node ub-10 nos.nebuly.com/gpu-partitioning=mps
+
+helm install oci://ghcr.io/nebuly-ai/helm-charts/nvidia-device-plugin --version 0.13.0 --generate-name -n nebuly-nvidia --create-namespace --values mps-config.yaml
+# helm upgrade -i nvdp nvdp/nvidia-device-plugin --version=0.14.0 --namespace nvidia-device-plugin --create-namespace --set-file config.map.config=./config.yaml
+# helm upgrade -i nvdp nvdp/nvidia-device-plugin --version=0.14.0 --namespace nvidia-device-plugin --create-namespace --set-file config.map.config=./config_map.yaml
+# helm upgrade -i nvdp nvdp/nvidia-device-plugin --version=0.14.0 --namespace nvidia-device-plugin --create-namespace
\ No newline at end of file
--- a/device_plugin/mps-device-plugin.go
+++ b/device_plugin/mps-device-plugin.go
@@ -200,21 +200,64 @@ func (ps *pluginService) Allocate(ctx context.Context, rqt *pluginapi.AllocateRe
 	for _, req := range rqt.ContainerRequests {
 		containerAllocateResponse := &pluginapi.ContainerAllocateResponse{}
 		totalCompute := len(req.DevicesIDs)
+
 		envs := make(map[string]string)
 		envs["CUDA_MPS_PIPE_DIRECTORY"] = "/tmp/nvidia-mps"
 		envs["CUDA_MPS_ACTIVE_THREAD_PERCENTAGE"] = strconv.Itoa(totalCompute)
 		envs["CUDA_MPS_PINNED_DEVICE_MEM_LIMIT"] = "0=2G"
+		envs["NVIDIA_DRIVER_CAPABILITIES"] = "compute,utility"
+		envs["NVIDIA_RUNTIME"] = "nvidia"
+		envs["NVIDIA_VERSION"] = "11.7"
+		envs["NVIDIA_VISIBLE_DEVICES"] = "0"
+
+		containerAllocateResponse.Devices = append(containerAllocateResponse.Devices, &pluginapi.DeviceSpec{
+			HostPath:      "/dev/nvidiactl",
+			ContainerPath: "/dev/nvidiactl",
+			Permissions:   "mrw",
+		})
+		containerAllocateResponse.Devices = append(containerAllocateResponse.Devices, &pluginapi.DeviceSpec{
+			HostPath:      "/dev/nvidia-modeset",
+			ContainerPath: "/dev/nvidia-modeset",
+			Permissions:   "mrw",
+		})
+		containerAllocateResponse.Devices = append(containerAllocateResponse.Devices, &pluginapi.DeviceSpec{
+			HostPath:      "/dev/nvidia-uvm",
+			ContainerPath: "/dev/nvidia-uvm",
+			Permissions:   "mrw",
+		})
+		containerAllocateResponse.Devices = append(containerAllocateResponse.Devices, &pluginapi.DeviceSpec{
+			HostPath:      "/dev/nvidia-uvm-tools",
+			ContainerPath: "/dev/nvidia-uvm-tools",
+			Permissions:   "mrw",
+		})
+		containerAllocateResponse.Devices = append(containerAllocateResponse.Devices, &pluginapi.DeviceSpec{
+			HostPath:      "/dev/nvidia0",
+			ContainerPath: "/dev/nvidia0",
+			Permissions:   "mrw",
+		})
+		containerAllocateResponse.Devices = append(containerAllocateResponse.Devices, &pluginapi.DeviceSpec{
+			HostPath:      "/dev/nvidia-caps/nvidia-cap1",
+			ContainerPath: "/dev/nvidia-caps/nvidia-cap1",
+			Permissions:   "mrw",
+		})
+		containerAllocateResponse.Devices = append(containerAllocateResponse.Devices, &pluginapi.DeviceSpec{
+			HostPath:      "/dev/nvidia-caps/nvidia-cap2",
+			ContainerPath: "/dev/nvidia-caps/nvidia-cap2",
+			Permissions:   "mrw",
+		})
 		mounts := []*pluginapi.Mount{
-			{
-				ContainerPath: "/usr/local/nvidia",
-				HostPath:      "/usr/local/nvidia",
-			},
 			{
 				ContainerPath: "/tmp/nvidia-mps",
 				HostPath:      "/tmp/nvidia-mps",
 				ReadOnly:      false,
 			},
+			{
+				ContainerPath: "/usr/local/nvidia",
+				HostPath:      "/usr/src/nvidia-515.76",
+				ReadOnly:      false,
+			},
 		}
+		log.Println("Mounted the required directories")
 		containerAllocateResponse.Envs = envs
 		containerAllocateResponse.Mounts = mounts
 		allocateResponse.ContainerResponses = append(allocateResponse.ContainerResponses, containerAllocateResponse)

--- a/device_plugin/mps-manager.yaml
+++ b/device_plugin/mps-manager.yaml
@@ -20,7 +20,7 @@ spec:
        mps-gpu-enabled: "true"
      containers:
        - name: mps-device-plugin
-          image: xzaviourr/mps-device-plugin:v6.2
+          image: xzaviourr/mps-device-plugin:v7.3
          securityContext:
            privileged: true
          volumeMounts:

--- a/device_plugin/test_pods/gpu_detection/Dockerfile
+++ b/device_plugin/test_pods/gpu_detection/Dockerfile
+FROM nvidia/cuda:12.0.0-devel-ubuntu22.04
+
+WORKDIR /app
+
+COPY gpu_detection.cu .
+RUN nvcc -o gpu_detection gpu_detection.cu
+CMD ["./gpu_detection"]
\ No newline at end of file
--- a/device_plugin/test_pods/gpu_detection/build.sh
+++ b/device_plugin/test_pods/gpu_detection/build.sh
+sudo docker build -t xzaviourr/gpu_detection:v$1 .
+sudo docker save -o gpu_detection:v$1.tar xzaviourr/gpu_detection:v$1
+sudo docker load -i gpu_detection:v$1.tar
\ No newline at end of file
--- a/device_plugin/test_pods/gpu_detection/gpu_detection.cu
+++ b/device_plugin/test_pods/gpu_detection/gpu_detection.cu
+#include <cuda.h>
+#include <iostream>
+#include <cuda_runtime.h>
+
+int main() {
+    int cudaVersion;
+    cudaError_t cudaResult = cudaRuntimeGetVersion(&cudaVersion);
+    if (cudaResult != cudaSuccess) {
+        printf("Failed to retrieve CUDA version: %s\n", cudaGetErrorString(cudaResult));
+        return 1;
+    }
+
+    printf("CUDA Version: %d.%d\n", cudaVersion / 1000, (cudaVersion % 100) / 10);
+
+    int driverVersion;
+    cudaResult = cudaDriverGetVersion(&driverVersion);
+    if (cudaResult != cudaSuccess) {
+        printf("Failed to retrieve NVIDIA driver version: %s\n", cudaGetErrorString(cudaResult));
+        return 1;
+    }
+
+    int major = driverVersion / 1000;
+    int minor = (driverVersion % 100) / 10;
+    printf("NVIDIA Driver Version: %d.%d\n", major, minor);
+
+    int numDevices;
+    cudaGetDeviceCount(&numDevices);
+
+    if (numDevices == 0) {
+        printf("No GPU device found. Make sure the container has GPU access.\n");
+    } else {
+        printf("%d GPU device(s) found. The container has GPU access.\n", numDevices);
+    }
+
+    while(1);
+
+    return 0;
+}
\ No newline at end of file
--- a/device_plugin/test_pods/gpu_detection/pod_request.yaml
+++ b/device_plugin/test_pods/gpu_detection/pod_request.yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: gpu-detection
+spec:
+  restartPolicy: OnFailure
+  hostIPC: true
+  securityContext:
+    runAsUser: 1000
+  containers:
+  - name: gpu-detection
+    image: "xzaviourr/gpu_detection:v1.0"
+    imagePullPolicy: Never
+    resources:
+      requests:
+        nvidia.com/gpu-2gb: 1
+      limits:         
+        nvidia.com/gpu-2gb: 1
+  #   volumeMounts:
+  #     - name: nvidia-smi
+  #       mountPath: /app/smi
+  #     - name: lib-x
+  #       mountPath: /lib/x86_64-linux-gnu
+  # volumes:
+  #   - name: nvidia-smi
+  #     hostPath:
+  #       path: /usr/bin/nvidia-smi
+  #   - name: lib-x
+  #     hostPath:
+  #       path: /lib/x86_64-linux-gnu
\ No newline at end of file
--- a/device_plugin/test_pods/gpu_detection/pod_request_test.yaml
+++ b/device_plugin/test_pods/gpu_detection/pod_request_test.yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: gpu-detection1
+spec:
+  restartPolicy: OnFailure
+  containers:
+  - name: gpu-detection1
+    image: "xzaviourr/gpu_detection:v1.0"
+    args:
+      - "--runtime=nvidia"
+      - "--ipc=host"
+      - "--gpus=all"
+    securityContext:
+      privileged: true
+    imagePullPolicy: Never
+    resources:
+      requests:
+        nvidia.com/gpu: 25
+      limits:         
+        nvidia.com/gpu: 25
\ No newline at end of file
--- a/device_plugin/test_pods/vect_add/pod_request.yaml
+++ b/device_plugin/test_pods/vect_add/pod_request.yaml
@@ -3,18 +3,19 @@ kind: Pod
 metadata:
  name: vect-add
 spec:
+  hostIPC: true
  restartPolicy: OnFailure
+  securityContext:
+    runAsUser: 1000
  containers:
  - name: vect-add
    image: "xzaviourr/vect_add:v1.2"
-    securityContext:
-      privileged: true
    imagePullPolicy: Never
    resources:
      requests:
-        nvidia.com/gpu: 25
+        nvidia.com/gpu-2gb: 4
      limits:         
-        nvidia.com/gpu: 25
+        nvidia.com/gpu-2gb: 4
    # volumeMounts:
    #   - name: nvidia-driver
    #     mountPath: /usr/local/nvidia

--- a/mps-config.yaml
+++ b/mps-config.yaml
+# Plugin configuration
+# Only one of "name" or "map" should ever be set for a given deployment.
+# Use "name" to point to an external ConfigMap with a list of configurations, or to make the Chart
+# create a ConfigMap for you if "create" is True.
+# Use "map" to build an integrated ConfigMap from a set of configurations as
+# part of this helm chart. An example of setting "map" might be:
+# config:
+#   map:
+#     default: |-
+#       version: v1
+#       flags:
+#         migStrategy: none
+#     mig-single: |-
+#       version: v1
+#       flags:
+#         migStrategy: single
+#     mig-mixed: |-
+#       version: v1
+#       flags:
+#         migStrategy: mixed
+config:
+  # ConfigMap name if pulling from an external ConfigMap
+  name: ""
+  # If true, the ConfigMap containing the plugin configuration files will be created by the Chart, initialized
+  # with an empty default configuration.
+  # Otherwise, the Chart will use the existing ConfigMap with name .Values.config.name to exist.
+  create: true
+  # Set of named configs to build an integrated ConfigMap from
+  map: 
+    default: |-
+      version: v1
+      flags:
+        migStrategy: none
+      sharing:
+        mps: 
+          failRequestsGreaterThanOne: true
+          resources:
+            - name: nvidia.com/gpu
+              rename: nvidia.com/gpu-2gb
+              memoryGB: 2
+              replicas: 8
+              devices: ["0"]
+  # Default config name within the ConfigMap
+  default: ""
+  # List of fallback strategies to attempt if no config is selected and no default is provided
+  fallbackStrategies: ["named" , "single"]
+
+legacyDaemonsetAPI: null
+compatWithCPUManager: null
+migStrategy: null
+failOnInitError: null
+deviceListStrategy: null
+deviceIDStrategy: null
+nvidiaDriverRoot: null
+gdsEnabled: null
+mofedEnabled: null
+
+fullnameOverride: ""
+namespaceOverride: ""
+selectorLabelsOverride: {}
+
+allowDefaultNamespace: false
+
+imagePullSecrets: []
+image:
+  repository: ghcr.io/nebuly-ai/k8s-device-plugin
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: ""
+
+mps:
+  enabled: true
+  # The ID of the user used to run the MPS server.
+  # All the containers requesting GPU resources must run as this user.
+  userID: 1000
+  image:
+    repository: ghcr.io/nebuly-ai/nvidia-mps-server
+    pullPolicy: IfNotPresent
+    tag: "0.0.1"
+
+
+updateStrategy:
+  type: RollingUpdate
+
+podAnnotations: {}
+podSecurityContext: {}
+securityContext: {}
+
+resources: {}
+nodeSelector:
+  nos.nebuly.com/gpu-partitioning: "mps"
+affinity: {}
+tolerations:
+  # This toleration is deprecated. Kept here for backward compatibility
+  # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
+  - key: CriticalAddonsOnly
+    operator: Exists
+  - key: nvidia.com/gpu
+    operator: Exists
+    effect: NoSchedule
+  - key: "kubernetes.azure.com/scalesetpriority"
+    operator: "Equal"
+    value: "spot"
+    effect: "NoSchedule"
+
+# Mark this pod as a critical add-on; when enabled, the critical add-on
+# scheduler reserves resources for critical add-on pods so that they can
+# be rescheduled after a failure.
+# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
+priorityClassName: "system-node-critical"
+
+runtimeClassName: null
+
+# Subcharts
+nfd:
+  nameOverride: node-feature-discovery
+  master:
+    extraLabelNs:
+      - nvidia.com
+    serviceAccount:
+      name: node-feature-discovery
+
+  worker:
+    tolerations:
+    - key: "node-role.kubernetes.io/master"
+      operator: "Equal"
+      value: ""
+      effect: "NoSchedule"
+    - key: "nvidia.com/gpu"
+      operator: "Equal"
+      value: "present"
+      effect: "NoSchedule"
+    config:
+      sources:
+        pci:
+          deviceClassWhitelist:
+          - "02"
+          - "0200"
+          - "0207"
+          - "0300"
+          - "0302"
+          deviceLabelFields:
+          - vendor
+gfd:
+  enabled: false
+  nameOverride: gpu-feature-discovery
+  namespaceOverride: ""
\ No newline at end of file
--- a/mps_plugin_pod.yaml
+++ b/mps_plugin_pod.yaml
--- a/nvidia_plugin_daemonset.yaml
+++ b/nvidia_plugin_daemonset.yaml
+# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: nvidia-device-plugin-daemonset
+  namespace: kube-system
+spec:
+  selector:
+    matchLabels:
+      name: nvidia-device-plugin-ds
+  updateStrategy:
+    type: RollingUpdate
+  template:
+    metadata:
+      labels:
+        name: nvidia-device-plugin-ds
+    spec:
+      tolerations:
+      - key: nvidia.com/gpu
+        operator: Exists
+        effect: NoSchedule
+      # Mark this pod as a critical add-on; when enabled, the critical add-on
+      # scheduler reserves resources for critical add-on pods so that they can
+      # be rescheduled after a failure.
+      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
+      priorityClassName: "system-node-critical"
+      containers:
+      - image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0
+        name: nvidia-device-plugin-ctr
+        env:
+          - name: FAIL_ON_INIT_ERROR
+            value: "false"
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop: ["ALL"]
+        volumeMounts:
+        - name: device-plugin
+          mountPath: /var/lib/kubelet/device-plugins
+      volumes:
+      - name: device-plugin
+        hostPath:
+          path: /var/lib/kubelet/device-plugins