Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
K
k8_with_mps
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Anshul
k8_with_mps
Commits
49299981
Commit
49299981
authored
Jun 12, 2023
by
Anshul
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added test pods
parent
efcc7cd9
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
189 additions
and
1 deletion
+189
-1
device_plugin/mps-device-plugin.go
device_plugin/mps-device-plugin.go
+6
-0
device_plugin/mps-manager.yaml
device_plugin/mps-manager.yaml
+1
-1
device_plugin/test_pods/tensorflow/Dockerfile
device_plugin/test_pods/tensorflow/Dockerfile
+18
-0
device_plugin/test_pods/tensorflow/build.sh
device_plugin/test_pods/tensorflow/build.sh
+3
-0
device_plugin/test_pods/tensorflow/pod_request.yaml
device_plugin/test_pods/tensorflow/pod_request.yaml
+36
-0
device_plugin/test_pods/tensorflow/requirements.txt
device_plugin/test_pods/tensorflow/requirements.txt
+1
-0
device_plugin/test_pods/tensorflow/tensorflow_test.py
device_plugin/test_pods/tensorflow/tensorflow_test.py
+12
-0
device_plugin/test_pods/vect_add/Dockerfile
device_plugin/test_pods/vect_add/Dockerfile
+7
-0
device_plugin/test_pods/vect_add/build.sh
device_plugin/test_pods/vect_add/build.sh
+3
-0
device_plugin/test_pods/vect_add/pod_request.yaml
device_plugin/test_pods/vect_add/pod_request.yaml
+36
-0
device_plugin/test_pods/vect_add/vect_add
device_plugin/test_pods/vect_add/vect_add
+0
-0
device_plugin/test_pods/vect_add/vect_add.cu
device_plugin/test_pods/vect_add/vect_add.cu
+66
-0
No files found.
device_plugin/mps-device-plugin.go
View file @
49299981
...
...
@@ -201,6 +201,7 @@ func (ps *pluginService) Allocate(ctx context.Context, rqt *pluginapi.AllocateRe
containerAllocateResponse
:=
&
pluginapi
.
ContainerAllocateResponse
{}
totalCompute
:=
len
(
req
.
DevicesIDs
)
envs
:=
make
(
map
[
string
]
string
)
envs
[
"CUDA_MPS_PIPE_DIRECTORY"
]
=
"/tmp/nvidia-mps"
envs
[
"CUDA_MPS_ACTIVE_THREAD_PERCENTAGE"
]
=
strconv
.
Itoa
(
totalCompute
)
envs
[
"CUDA_MPS_PINNED_DEVICE_MEM_LIMIT"
]
=
"0=2G"
mounts
:=
[]
*
pluginapi
.
Mount
{
...
...
@@ -208,6 +209,11 @@ func (ps *pluginService) Allocate(ctx context.Context, rqt *pluginapi.AllocateRe
ContainerPath
:
"/usr/local/nvidia"
,
HostPath
:
"/usr/local/nvidia"
,
},
{
ContainerPath
:
"/tmp/nvidia-mps"
,
HostPath
:
"/tmp/nvidia-mps"
,
ReadOnly
:
false
,
},
}
containerAllocateResponse
.
Envs
=
envs
containerAllocateResponse
.
Mounts
=
mounts
...
...
device_plugin/mps-manager.yaml
View file @
49299981
...
...
@@ -20,7 +20,7 @@ spec:
mps-gpu-enabled
:
"
true"
containers
:
-
name
:
mps-device-plugin
image
:
xzaviourr/mps-device-plugin:v6.
1
image
:
xzaviourr/mps-device-plugin:v6.
2
securityContext
:
privileged
:
true
volumeMounts
:
...
...
device_plugin/test_pods/tensorflow/Dockerfile
0 → 100644
View file @
49299981
# FROM nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04
FROM
tensorflow/tensorflow:latest-gpu
WORKDIR
/app
RUN
apt-get update
&&
apt-get
install
-y
\
python3
\
python3-pip
\
libsm6
\
libxext6
\
libxrender-dev
COPY
requirements.txt .
RUN
pip
install
--no-cache-dir
-r
requirements.txt
COPY
tensorflow_test.py .
CMD
["python3", "tensorflow_test.py"]
\ No newline at end of file
device_plugin/test_pods/tensorflow/build.sh
0 → 100644
View file @
49299981
sudo
docker build
-t
xzaviourr/tensorflow_gpu_test:v
$1
.
sudo
docker save
-o
tensorflow_gpu_test.tar xzaviourr/tensorflow_gpu_test:v
$1
sudo
docker load
-i
tensorflow_gpu_test.tar
\ No newline at end of file
device_plugin/test_pods/tensorflow/pod_request.yaml
0 → 100644
View file @
49299981
apiVersion
:
v1
kind
:
Pod
metadata
:
name
:
tensorflow-gpu-test
spec
:
restartPolicy
:
OnFailure
containers
:
-
name
:
tensorflow-gpu-test
image
:
"
xzaviourr/tensorflow_gpu_test:v1.5"
securityContext
:
privileged
:
true
imagePullPolicy
:
Never
resources
:
requests
:
nvidia.com/gpu
:
25
limits
:
nvidia.com/gpu
:
25
volumeMounts
:
-
name
:
nvidia-driver
mountPath
:
/usr/local/cuda/lib64
readOnly
:
true
-
name
:
nvidia-mps
mountPath
:
/tmp/nvidia-mps
readOnly
:
true
env
:
-
name
:
CUDA_MPS_PIPE_DIRECTORY
value
:
/tmp/nvidia-mps
-
name
:
LD_LIBRARY_PATH
value
:
/usr/local/cuda/lib64:$LD_LIBRARY_PATH
volumes
:
-
name
:
nvidia-mps
hostPath
:
path
:
/tmp/nvidia-mps
-
name
:
nvidia-driver
hostPath
:
path
:
/usr/local/cuda/lib64
\ No newline at end of file
device_plugin/test_pods/tensorflow/requirements.txt
0 → 100644
View file @
49299981
tensorflow
\ No newline at end of file
device_plugin/test_pods/tensorflow/tensorflow_test.py
0 → 100644
View file @
49299981
import
tensorflow
as
tf
tf
.
debugging
.
set_log_device_placement
(
True
)
import
logging
# Create some tensors
a
=
tf
.
constant
([[
1.0
,
2.0
,
3.0
],
[
4.0
,
5.0
,
6.0
]])
b
=
tf
.
constant
([[
1.0
,
2.0
],
[
3.0
,
4.0
],
[
5.0
,
6.0
]])
c
=
tf
.
matmul
(
a
,
b
)
print
(
"Num GPUs Available: "
,
len
(
tf
.
config
.
list_physical_devices
(
'GPU'
)))
print
(
"GPU used for operation : "
,
c
)
logging
.
critical
(
"Reached end"
)
\ No newline at end of file
device_plugin/test_pods/vect_add/Dockerfile
0 → 100644
View file @
49299981
FROM
nvidia/cuda:12.0.0-devel-ubuntu22.04
WORKDIR
/app
COPY
vect_add.cu .
RUN
nvcc
-o
vect_add vect_add.cu
CMD
["./vect_add"]
\ No newline at end of file
device_plugin/test_pods/vect_add/build.sh
0 → 100644
View file @
49299981
sudo
docker build
-t
xzaviourr/vect_add:v
$1
.
sudo
docker save
-o
vect_add.tar xzaviourr/vect_add:v
$1
sudo
docker load
-i
vect_add.tar
\ No newline at end of file
device_plugin/test_pods/vect_add/pod_request.yaml
0 → 100644
View file @
49299981
apiVersion
:
v1
kind
:
Pod
metadata
:
name
:
vect-add
spec
:
restartPolicy
:
OnFailure
containers
:
-
name
:
vect-add
image
:
"
xzaviourr/vect_add:v1.2"
securityContext
:
privileged
:
true
imagePullPolicy
:
Never
resources
:
requests
:
nvidia.com/gpu
:
25
limits
:
nvidia.com/gpu
:
25
# volumeMounts:
# - name: nvidia-driver
# mountPath: /usr/local/nvidia
# readOnly: true
# - name: nvidia-mps
# mountPath: /tmp/nvidia-mps
# readOnly: true
# env:
# - name: CUDA_MPS_PIPE_DIRECTORY
# value: /tmp/nvidia-mps
# - name: LD_LIBRARY_PATH
# value: /usr/local/cuda/lib64:$LD_LIBRARY_PATH
# volumes:
# - name: nvidia-mps
# hostPath:
# path: /tmp/nvidia-mps
# - name: nvidia-driver
# hostPath:
# path: /usr/local/nvidia
\ No newline at end of file
device_plugin/test_pods/vect_add/vect_add
0 → 100755
View file @
49299981
File added
device_plugin/test_pods/vect_add/vect_add.cu
0 → 100644
View file @
49299981
#include <cuda.h>
#include <iostream>
#include <cuda_runtime.h>
const
int
N
=
100000000
;
__global__
void
memoryIntensiveKernel
(
float
*
a
,
float
*
b
,
float
*
c
)
{
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
i
<
N
)
{
c
[
i
]
=
a
[
i
]
+
b
[
i
];
b
[
i
]
=
a
[
i
]
+
c
[
i
];
a
[
i
]
=
b
[
i
]
+
c
[
i
];
}
}
int
main
()
{
cudaEvent_t
start
,
stop
;
cudaEventCreate
(
&
start
);
cudaEventCreate
(
&
stop
);
float
*
a
,
*
b
,
*
c
;
cudaMalloc
((
void
**
)
&
a
,
N
*
sizeof
(
float
));
cudaMalloc
((
void
**
)
&
b
,
N
*
sizeof
(
float
));
cudaMalloc
((
void
**
)
&
c
,
N
*
sizeof
(
float
));
float
*
a_h
,
*
b_h
;
a_h
=
new
float
[
N
];
b_h
=
new
float
[
N
];
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
a_h
[
i
]
=
i
;
b_h
[
i
]
=
i
;
}
cudaMemcpy
(
a
,
a_h
,
N
*
sizeof
(
float
),
cudaMemcpyHostToDevice
);
cudaMemcpy
(
b
,
b_h
,
N
*
sizeof
(
float
),
cudaMemcpyHostToDevice
);
int
blockSize
=
512
;
int
numBlocks
=
(
N
+
blockSize
-
1
)
/
blockSize
;
cudaEventRecord
(
start
);
for
(
int
iter
=
1
;
iter
<
5001
;
iter
++
)
{
memoryIntensiveKernel
<<<
numBlocks
,
blockSize
>>>
(
a
,
b
,
c
);
std
::
cout
<<
iter
<<
" Iteration completed"
<<
std
::
endl
;
}
cudaEventRecord
(
stop
);
float
*
c_h
;
c_h
=
new
float
[
N
];
cudaMemcpy
(
c_h
,
c
,
N
*
sizeof
(
float
),
cudaMemcpyDeviceToHost
);
cudaFree
(
a
);
cudaFree
(
b
);
cudaFree
(
c
);
cudaEventSynchronize
(
stop
);
float
mil
=
0
;
cudaEventElapsedTime
(
&
mil
,
start
,
stop
);
printf
(
"ET: %f ms
\n
"
,
mil
);
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment