Commit 49299981 authored by Anshul's avatar Anshul

Added test pods

parent efcc7cd9
......@@ -201,6 +201,7 @@ func (ps *pluginService) Allocate(ctx context.Context, rqt *pluginapi.AllocateRe
containerAllocateResponse := &pluginapi.ContainerAllocateResponse{}
totalCompute := len(req.DevicesIDs)
envs := make(map[string]string)
envs["CUDA_MPS_PIPE_DIRECTORY"] = "/tmp/nvidia-mps"
envs["CUDA_MPS_ACTIVE_THREAD_PERCENTAGE"] = strconv.Itoa(totalCompute)
envs["CUDA_MPS_PINNED_DEVICE_MEM_LIMIT"] = "0=2G"
mounts := []*pluginapi.Mount{
......@@ -208,6 +209,11 @@ func (ps *pluginService) Allocate(ctx context.Context, rqt *pluginapi.AllocateRe
ContainerPath: "/usr/local/nvidia",
HostPath: "/usr/local/nvidia",
},
{
ContainerPath: "/tmp/nvidia-mps",
HostPath: "/tmp/nvidia-mps",
ReadOnly: false,
},
}
containerAllocateResponse.Envs = envs
containerAllocateResponse.Mounts = mounts
......
......@@ -20,7 +20,7 @@ spec:
mps-gpu-enabled: "true"
containers:
- name: mps-device-plugin
image: xzaviourr/mps-device-plugin:v6.1
image: xzaviourr/mps-device-plugin:v6.2
securityContext:
privileged: true
volumeMounts:
......
# FROM nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04
FROM tensorflow/tensorflow:latest-gpu
WORKDIR /app
RUN apt-get update && apt-get install -y \
python3 \
python3-pip \
libsm6 \
libxext6 \
libxrender-dev
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY tensorflow_test.py .
CMD ["python3", "tensorflow_test.py"]
\ No newline at end of file
sudo docker build -t xzaviourr/tensorflow_gpu_test:v$1 .
sudo docker save -o tensorflow_gpu_test.tar xzaviourr/tensorflow_gpu_test:v$1
sudo docker load -i tensorflow_gpu_test.tar
\ No newline at end of file
apiVersion: v1
kind: Pod
metadata:
name: tensorflow-gpu-test
spec:
restartPolicy: OnFailure
containers:
- name: tensorflow-gpu-test
image: "xzaviourr/tensorflow_gpu_test:v1.5"
securityContext:
privileged: true
imagePullPolicy: Never
resources:
requests:
nvidia.com/gpu: 25
limits:
nvidia.com/gpu: 25
volumeMounts:
- name: nvidia-driver
mountPath: /usr/local/cuda/lib64
readOnly: true
- name: nvidia-mps
mountPath: /tmp/nvidia-mps
readOnly: true
env:
- name: CUDA_MPS_PIPE_DIRECTORY
value: /tmp/nvidia-mps
- name: LD_LIBRARY_PATH
value: /usr/local/cuda/lib64:$LD_LIBRARY_PATH
volumes:
- name: nvidia-mps
hostPath:
path: /tmp/nvidia-mps
- name: nvidia-driver
hostPath:
path: /usr/local/cuda/lib64
\ No newline at end of file
tensorflow
\ No newline at end of file
import tensorflow as tf
tf.debugging.set_log_device_placement(True)
import logging
# Create some tensors
a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("GPU used for operation : ", c)
logging.critical("Reached end")
\ No newline at end of file
FROM nvidia/cuda:12.0.0-devel-ubuntu22.04
WORKDIR /app
COPY vect_add.cu .
RUN nvcc -o vect_add vect_add.cu
CMD ["./vect_add"]
\ No newline at end of file
sudo docker build -t xzaviourr/vect_add:v$1 .
sudo docker save -o vect_add.tar xzaviourr/vect_add:v$1
sudo docker load -i vect_add.tar
\ No newline at end of file
apiVersion: v1
kind: Pod
metadata:
name: vect-add
spec:
restartPolicy: OnFailure
containers:
- name: vect-add
image: "xzaviourr/vect_add:v1.2"
securityContext:
privileged: true
imagePullPolicy: Never
resources:
requests:
nvidia.com/gpu: 25
limits:
nvidia.com/gpu: 25
# volumeMounts:
# - name: nvidia-driver
# mountPath: /usr/local/nvidia
# readOnly: true
# - name: nvidia-mps
# mountPath: /tmp/nvidia-mps
# readOnly: true
# env:
# - name: CUDA_MPS_PIPE_DIRECTORY
# value: /tmp/nvidia-mps
# - name: LD_LIBRARY_PATH
# value: /usr/local/cuda/lib64:$LD_LIBRARY_PATH
# volumes:
# - name: nvidia-mps
# hostPath:
# path: /tmp/nvidia-mps
# - name: nvidia-driver
# hostPath:
# path: /usr/local/nvidia
\ No newline at end of file
#include <cuda.h>
#include <iostream>
#include <cuda_runtime.h>
const int N = 100000000;
__global__ void memoryIntensiveKernel(float *a, float *b, float *c)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < N)
{
c[i] = a[i] + b[i];
b[i] = a[i] + c[i];
a[i] = b[i] + c[i];
}
}
int main()
{
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
float *a, *b, *c;
cudaMalloc((void **)&a, N * sizeof(float));
cudaMalloc((void **)&b, N * sizeof(float));
cudaMalloc((void **)&c, N * sizeof(float));
float *a_h, *b_h;
a_h = new float[N];
b_h = new float[N];
for (int i = 0; i < N; i++)
{
a_h[i] = i;
b_h[i] = i;
}
cudaMemcpy(a, a_h, N * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(b, b_h, N * sizeof(float), cudaMemcpyHostToDevice);
int blockSize = 512;
int numBlocks = (N + blockSize - 1) / blockSize;
cudaEventRecord(start);
for (int iter=1; iter<5001; iter++)
{
memoryIntensiveKernel<<<numBlocks, blockSize>>>(a, b, c);
std::cout << iter <<" Iteration completed"<<std::endl;
}
cudaEventRecord(stop);
float *c_h;
c_h = new float[N];
cudaMemcpy(c_h,c,N*sizeof(float),cudaMemcpyDeviceToHost);
cudaFree(a);
cudaFree(b);
cudaFree(c);
cudaEventSynchronize(stop);
float mil = 0;
cudaEventElapsedTime(&mil,start,stop);
printf("ET: %f ms\n",mil);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment