mirror of
https://github.com/openshift/openshift-docs.git
synced 2026-02-05 12:46:18 +01:00
102 lines
2.2 KiB
Plaintext
102 lines
2.2 KiB
Plaintext
// Module included in the following assemblies:
|
|
//
|
|
// * hardware_accelerators/amd-gpu-operator.adoc
|
|
|
|
:_mod-docs-content-type: PROCEDURE
|
|
[id="amd-testing-the-amd-gpu-operator_{context}"]
|
|
= Testing the AMD GPU Operator
|
|
|
|
Use the following procedure to test the ROCmInfo installation and view the logs for the AMD MI210 GPU.
|
|
|
|
.Procedure
|
|
|
|
. Create a YAML file that tests ROCmInfo:
|
|
+
|
|
[source,terminal]
|
|
----
|
|
$ cat << EOF > rocminfo.yaml
|
|
|
|
apiVersion: v1
|
|
kind: Pod
|
|
metadata:
|
|
name: rocminfo
|
|
spec:
|
|
containers:
|
|
- image: docker.io/rocm/pytorch:latest
|
|
name: rocminfo
|
|
command: ["/bin/sh","-c"]
|
|
args: ["rocminfo"]
|
|
resources:
|
|
limits:
|
|
amd.com/gpu: 1
|
|
requests:
|
|
amd.com/gpu: 1
|
|
restartPolicy: Never
|
|
EOF
|
|
----
|
|
|
|
. Create the `rocminfo` pod:
|
|
+
|
|
[source,terminal]
|
|
----
|
|
$ oc create -f rocminfo.yaml
|
|
----
|
|
+
|
|
.Example output
|
|
[source,terminal]
|
|
----
|
|
apiVersion: v1
|
|
pod/rocminfo created
|
|
----
|
|
|
|
. Check the `rocmnfo` log with one MI210 GPU:
|
|
+
|
|
[source,terminal]
|
|
----
|
|
$ oc logs rocminfo | grep -A5 "Agent"
|
|
----
|
|
+
|
|
.Example output
|
|
[source,terminal]
|
|
----
|
|
HSA Agents
|
|
==========
|
|
*******
|
|
Agent 1
|
|
*******
|
|
Name: Intel(R) Xeon(R) Gold 6330 CPU @ 2.00GHz
|
|
Uuid: CPU-XX
|
|
Marketing Name: Intel(R) Xeon(R) Gold 6330 CPU @ 2.00GHz
|
|
Vendor Name: CPU
|
|
--
|
|
Agent 2
|
|
*******
|
|
Name: Intel(R) Xeon(R) Gold 6330 CPU @ 2.00GHz
|
|
Uuid: CPU-XX
|
|
Marketing Name: Intel(R) Xeon(R) Gold 6330 CPU @ 2.00GHz
|
|
Vendor Name: CPU
|
|
--
|
|
Agent 3
|
|
*******
|
|
Name: gfx90a
|
|
Uuid: GPU-024b776f768a638b
|
|
Marketing Name: AMD Instinct MI210
|
|
Vendor Name: AMD
|
|
----
|
|
|
|
. Delete the pod:
|
|
+
|
|
[source,terminal]
|
|
----
|
|
$ oc delete -f rocminfo.yaml
|
|
----
|
|
+
|
|
.Example output
|
|
[source,terminal]
|
|
----
|
|
pod "rocminfo" deleted
|
|
----
|
|
|
|
|
|
|