From 229c7e6569810f392db87ce51ea73fcd9193b107 Mon Sep 17 00:00:00 2001 From: Shikha Jhala Date: Tue, 21 Feb 2023 12:39:07 -0500 Subject: [PATCH] CNV-18331: Cluster DPDK readiness checkup --- .../virt-checking-cluster-dpdk-readiness.adoc | 212 ++++++++++++++++++ modules/virt-dpdk-config-map-parameters.adoc | 77 +++++++ .../virt-running-cluster-checkups.adoc | 10 + 3 files changed, 299 insertions(+) create mode 100644 modules/virt-checking-cluster-dpdk-readiness.adoc create mode 100644 modules/virt-dpdk-config-map-parameters.adoc diff --git a/modules/virt-checking-cluster-dpdk-readiness.adoc b/modules/virt-checking-cluster-dpdk-readiness.adoc new file mode 100644 index 0000000000..065ccf8e08 --- /dev/null +++ b/modules/virt-checking-cluster-dpdk-readiness.adoc @@ -0,0 +1,212 @@ +// Module included in the following assemblies: +// +// * virt/support/monitoring/virt-running-cluster-checkups.adoc + +:_content-type: PROCEDURE +[id="virt-checking-cluster-dpdk-readiness_{context}"] += Checking cluster readiness to run DPDK applications with zero packet loss + +Use a predefined checkup to verify if your {product-title} cluster node can run a virtual machine (VM) with a Data Plane Development Kit (DPDK) workload. The checkup runs traffic between a traffic generator pod and a VM running a test DPDK application, and checks for packet loss. + +.Prerequisites +* You have access to the cluster as a user with `cluster-admin` permissions. +* You have installed the OpenShift CLI (`oc`). +* You have configured the compute nodes to run DPDK applications on VMs with zero packet loss. + +.Procedure +. Create a manifest file that contains the `ServiceAccount`, `Role`, and `RoleBinding` objects with permissions that the checkup requires for cluster access: ++ +.Example roles manifest +[%collapsible] +==== +[source,yaml] +---- +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: dpdk-checkup-sa +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kiagnose-configmap-access +rules: + - apiGroups: [ "" ] + resources: [ "configmaps" ] + verbs: [ "get", "update" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kiagnose-configmap-access +subjects: + - kind: ServiceAccount + name: dpdk-checkup-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kiagnose-configmap-access +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kubevirt-dpdk-checker +rules: + - apiGroups: [ "kubevirt.io" ] + resources: [ "virtualmachineinstances" ] + verbs: [ "create", "get", "delete" ] + - apiGroups: [ "subresources.kubevirt.io" ] + resources: [ "virtualmachineinstances/console" ] + verbs: [ "get" ] + - apiGroups: [ "" ] + resources: [ "pods" ] + verbs: [ "create", "get", "delete" ] + - apiGroups: [ "" ] + resources: [ "pods/exec" ] + verbs: [ "create" ] + - apiGroups: [ "k8s.cni.cncf.io" ] + resources: [ "network-attachment-definitions" ] + verbs: [ "get" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kubevirt-dpdk-checker +subjects: + - kind: ServiceAccount + name: dpdk-checkup-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kubevirt-dpdk-checker +---- +==== + +. Apply the checkup roles manifest: ++ +[source,terminal] +---- +$ oc apply -n -f .yaml +---- + +. Create a `ConfigMap` manifest that contains the input parameters for the checkup. The config map also stores the results of the checkup. ++ +.Example input config map +[source,yaml] +---- +apiVersion: v1 +kind: ConfigMap +metadata: + name: dpdk-checkup-config +data: + spec.timeout: 10m + spec.param.networkAttachmentDefinitionName: <1> + spec.param.trafficGeneratorRuntimeClassName: <2> +---- +<1> The name of the `NetworkAttachmentDefinition` object. +<2> The `RuntimeClass` resource that the traffic generator pod uses. + +. Apply the config map manifest in the target namespace: ++ +[source,terminal] +---- +$ oc apply -n -f .yaml +---- + +. Create a `Job` object to run the checkup: ++ +.Example job manifest +[source,yaml] +---- +apiVersion: batch/v1 +kind: Job +metadata: + name: dpdk-checkup +spec: + backoffLimit: 0 + template: + spec: + serviceAccountName: dpdk-checkup-sa + restartPolicy: Never + containers: + - name: dpdk-checkup + image: brew.registry.redhat.io/rh-osbs/container-native-virtualization-kubevirt-dpdk-checkup-rhel9:v4.13.0 + imagePullPolicy: Always + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: "RuntimeDefault" + env: + - name: CONFIGMAP_NAMESPACE + value: + - name: CONFIGMAP_NAME + value: dpdk-checkup-config + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid +---- + +. Apply the `Job` manifest: ++ +[source,terminal] +---- +$ oc apply -n -f .yaml +---- + +. Wait for the job to complete: ++ +[source,terminal] +---- +$ oc wait job dpdk-checkup -n --for condition=complete --timeout 10m +---- + +. Review the results of the checkup by running the following command: ++ +[source,terminal] +---- +$ oc get configmap dpdk-checkup-config -n -o yaml +---- ++ +.Example output config map (success) +[source,yaml] +---- +apiVersion: v1 +kind: ConfigMap +metadata: + name: dpdk-checkup-config +data: + spec.timeout: 1h2m + spec.param.NetworkAttachmentDefinitionName: "mlx-dpdk-network-1" + spec.param.trafficGeneratorRuntimeClassName: performance-performance-zeus10 + status.succeeded: true + status.failureReason: " " + status.startTimestamp: 2022-12-21T09:33:06+00:00 + status.completionTimestamp: 2022-12-21T11:33:06+00:00 + status.result.actualTrafficGeneratorTargetNode: worker-dpdk1 + status.result.actualDPDKVMTargetNode: worker-dpdk2 + status.result.dropRate: 0 +---- + +. Delete the job and config map resources that you previously created by running the following commands: ++ +[source,terminal] +---- +$ oc delete job -n dpdk-checkup +---- ++ +[source,terminal] +---- +$ oc delete config-map -n dpdk-checkup-config +---- + +. Optional: If you do not plan to run another checkup, delete the checkup roles manifest: ++ +[source,terminal] +---- +$ oc delete -f .yaml +---- diff --git a/modules/virt-dpdk-config-map-parameters.adoc b/modules/virt-dpdk-config-map-parameters.adoc new file mode 100644 index 0000000000..96671a2737 --- /dev/null +++ b/modules/virt-dpdk-config-map-parameters.adoc @@ -0,0 +1,77 @@ +// Module included in the following assemblies: +// +// * virt/support/monitoring/virt-running-cluster-checkups.adoc + +:_content-type: REFERENCE +[id="virt-dpdk-config-map-parameters_{context}"] += DPDK checkup config map parameters + +The following table shows the mandatory and optional parameters that you can set in the `data` stanza of the input `ConfigMap` manifest when you run a cluster DPDK readiness checkup: + +.DPDK checkup config map parameters +[cols="1,1,1", options="header"] +|==== +|Parameter +|Description +|Is Mandatory + +|`spec.timeout` +|The time, in minutes, before the checkup fails. +|True + +|`spec.param.networkAttachmentDefinitionName` +|The name of the `NetworkAttachmentDefinition` object of the SR-IOV NICs connected. +|True + +|`spec.param.trafficGeneratorRuntimeClassName` +|The RuntimeClass resource that the traffic generator pod uses. +|True + +|`spec.param.trafficGeneratorImage` +|The container image for the traffic generator. The default value is `quay.io/kiagnose/kubevirt-dpdk-checkup-traffic-gen:main`. +|False + +|`spec.param.trafficGeneratorNodeSelector` +|The node on which the traffic generator pod is to be scheduled. The node should be configured to allow DPDK traffic. +|False + +|`spec.param.trafficGeneratorPacketsPerSecond` +|The number of packets per second, in kilo (k) or million(m). The default value is 14m. +|False + +|`spec.param.trafficGeneratorEastMacAddress` +|The MAC address of the NIC connected to the traffic generator pod or VM. The default value is a random MAC address in the format `50:xx:xx:xx:xx:01`. +|False + +|`spec.param.trafficGeneratorWestMacAddress` +|The MAC address of the NIC connected to the traffic generator pod or VM. The default value is a random MAC address in the format `50:xx:xx:xx:xx:02`. +|False + +|`spec.param.vmContainerDiskImage` +|The container disk image for the VM. The default value is `quay.io/kiagnose/kubevirt-dpdk-checkup-vm:main`. +|False + +|`spec.param.DPDKLabelSelector` +|The label of the node on which the VM runs. The node should be configured to allow DPDK traffic. +|False + +|`spec.param.DPDKEastMacAddress` +|The MAC address of the NIC that is connected to the VM. The default value is a random MAC address in the format `60:xx:xx:xx:xx:01`. +|False + +|`spec.param.DPDKWestMacAddress` +|The MAC address of the NIC that is connected to the VM. The default value is a random MAC address in the format `60:xx:xx:xx:xx:02`. +|False + +|`spec.param.testDuration` +|The duration, in minutes, for which the traffic generator runs. The default value is 5 minutes. +|False + +|`spec.param.portBandwidthGB` +|The maximum bandwidth of the SR-IOV NIC. The default value is 10GB. +|False + +|`spec.param.verbose` +|When set to `true`, it increases the verbosity of the checkup log. The default value is `false`. +|False +|==== diff --git a/virt/support/monitoring/virt-running-cluster-checkups.adoc b/virt/support/monitoring/virt-running-cluster-checkups.adoc index a9c0c66ae7..e82245ac04 100644 --- a/virt/support/monitoring/virt-running-cluster-checkups.adoc +++ b/virt/support/monitoring/virt-running-cluster-checkups.adoc @@ -15,3 +15,13 @@ include::modules/virt-about-cluster-checkup-framework.adoc[leveloffset=+1] include::modules/virt-measuring-latency-vm-secondary-network.adoc[leveloffset=+1] +include::modules/virt-checking-cluster-dpdk-readiness.adoc[leveloffset=+1] + +include::modules/virt-dpdk-config-map-parameters.adoc[leveloffset=+2] + +[role="_additional-resources"] +[id="additional-resources_running-cluster-checkups"] +== Additional resources +* xref:../../../virt/virtual_machines/vm_networking/virt-attaching-vm-multiple-networks.adoc#virt-attaching-vm-multiple-networks[Attaching a virtual machine to multiple networks] +* xref:../../../networking/hardware_networks/using-dpdk-and-rdma.adoc#example-vf-use-in-dpdk-mode-intel_using-dpdk-and-rdma[Using a virtual function in DPDK mode with an Intel NIC] +* xref:../../../networking/hardware_networks/using-dpdk-and-rdma.adoc#nw-example-dpdk-line-rate_using-dpdk-and-rdma[Using SR-IOV and the Node Tuning Operator to achieve a DPDK line rate]