From ae118c8d6e9785a90d58e90588324437ba4fb4b5 Mon Sep 17 00:00:00 2001 From: Ashley Hardin Date: Thu, 3 Feb 2022 15:12:46 -0500 Subject: [PATCH] PSAP-496, documented NFD Toplogy Updater --- .../psap-node-feature-discovery-operator.adoc | 4 + ...ap-configuring-node-feature-discovery.adoc | 1 + .../psap-node-feature-discovery-operator.adoc | 1 - ...ry-topology-updater-command-reference.adoc | 196 ++++++++++++++++++ ...ture-discovery-using-topology-updater.adoc | 58 ++++++ ...using-node-feature-discovery-operator.adoc | 144 +++++-------- 6 files changed, 309 insertions(+), 95 deletions(-) create mode 100644 modules/psap-node-feature-discovery-topology-updater-command-reference.adoc create mode 100644 modules/psap-node-feature-discovery-using-topology-updater.adoc diff --git a/hardware_enablement/psap-node-feature-discovery-operator.adoc b/hardware_enablement/psap-node-feature-discovery-operator.adoc index 74076f9121..4d8ed22dcf 100644 --- a/hardware_enablement/psap-node-feature-discovery-operator.adoc +++ b/hardware_enablement/psap-node-feature-discovery-operator.adoc @@ -15,3 +15,7 @@ include::modules/psap-installing-node-feature-discovery-operator.adoc[leveloffse include::modules/psap-using-node-feature-discovery-operator.adoc[leveloffset=+1] include::modules/psap-configuring-node-feature-discovery.adoc[leveloffset=+1] + +include::modules/psap-node-feature-discovery-using-topology-updater.adoc[leveloffset=+1] + +include::modules/psap-node-feature-discovery-topology-updater-command-reference.adoc[leveloffset=+2] diff --git a/modules/psap-configuring-node-feature-discovery.adoc b/modules/psap-configuring-node-feature-discovery.adoc index c6e2ba5455..91c558f6f3 100644 --- a/modules/psap-configuring-node-feature-discovery.adoc +++ b/modules/psap-configuring-node-feature-discovery.adoc @@ -2,6 +2,7 @@ // // * scalability_and_performance/psap-node-feature-discovery-operator.adoc +:_content-type: REFERENCE [id="configuring-the-node-feature-discovery_{context}"] = Configuring the Node Feature Discovery Operator diff --git a/modules/psap-node-feature-discovery-operator.adoc b/modules/psap-node-feature-discovery-operator.adoc index a0dd5a46d2..622af27893 100644 --- a/modules/psap-node-feature-discovery-operator.adoc +++ b/modules/psap-node-feature-discovery-operator.adoc @@ -8,7 +8,6 @@ endif::[] ifeval::["{context}" == "node-feature-discovery-operator"] :perf: endif::[] - :_content-type: CONCEPT [id="about-node-feature-discovery-operator_{context}"] ifdef::operators[] diff --git a/modules/psap-node-feature-discovery-topology-updater-command-reference.adoc b/modules/psap-node-feature-discovery-topology-updater-command-reference.adoc new file mode 100644 index 0000000000..26c0d05bf4 --- /dev/null +++ b/modules/psap-node-feature-discovery-topology-updater-command-reference.adoc @@ -0,0 +1,196 @@ +// Module included in the following assemblies: +// +// * hardware_enablement/psap-node-feature-discovery-operator.adoc + +:_content-type: REFERENCE +[id="nfd-topology-updater-command-line-flags_{context}"] += NFD Topology Updater command line flags + +To view available command line flags, run the `nfd-topology-updater -help` command. For example, in a podman container, run the following command: + +[source,terminal] +---- +$ podman run gcr.io/k8s-staging-nfd/node-feature-discovery:master nfd-topology-updater -help +---- + +[discrete] +[id="nfd-topology-updater-ca-file_{context}"] +== -ca-file + +The `-ca-file` flag is one of the three flags, together with the `-cert-file` and `-key-file`flags, that controls the mutual TLS authentication on the NFD Topology Updater. This flag specifies the TLS root certificate that is used for verifying the authenticity of nfd-master. + +Default: empty + +[IMPORTANT] +==== +The `-ca-file` flag must be specified together with the `-cert-file` and `-key-file` flags. +==== + +.Example +[source,terminal] +---- +$ nfd-topology-updater -ca-file=/opt/nfd/ca.crt -cert-file=/opt/nfd/updater.crt -key-file=/opt/nfd/updater.key +---- + +[discrete] +[id="nfd-topology-updater-cert-file_{context}"] +== -cert-file + +The `-cert-file` flag is one of the three flags, together with the `-ca-file` and `-key-file flags`, that controls mutual TLS authentication on the NFD Topology Updater. This flag specifies the TLS certificate presented for authenticating outgoing requests. + +Default: empty + +[IMPORTANT] +==== +The `-cert-file` flag must be specified together with the `-ca-file` and `-key-file` flags. +==== + +.Example +[source,terminal] +---- +$ nfd-topology-updater -cert-file=/opt/nfd/updater.crt -key-file=/opt/nfd/updater.key -ca-file=/opt/nfd/ca.crt +---- + +[discrete] +[id="nfd-topology-updater-help_{context}"] +== -h, -help + +Print usage and exit. + +[discrete] +[id="nfd-topology-updater-key-file_{context}"] +== -key-file + +The `-key-file` flag is one of the three flags, together with the `-ca-file` and `-cert-file` flags, that controls the mutual TLS authentication on the NFD Topology Updater. This flag specifies the private key corresponding the given certificate file, or `-cert-file`, that is used for authenticating outgoing requests. + +Default: empty + +[IMPORTANT] +==== +The `-key-file` flag must be specified together with the `-ca-file` and `-cert-file` flags. +==== + +.Example +[source,terminal] +---- +$ nfd-topology-updater -key-file=/opt/nfd/updater.key -cert-file=/opt/nfd/updater.crt -ca-file=/opt/nfd/ca.crt +---- + +[discrete] +[id="nfd-topology-updater-kubelet-config-file_{context}"] +== -kubelet-config-file + +The `-kubelet-config-file` specifies the path to the Kubelet's configuration +file. + +Default: `/host-var/lib/kubelet/config.yaml` + +.Example +[source,terminal] +---- +$ nfd-topology-updater -kubelet-config-file=/var/lib/kubelet/config.yaml +---- + +[discrete] +[id="nfd-topology-updater-no-publish_{context}"] +== -no-publish + +The `-no-publish` flag disables all communication with the nfd-master, making it a dry run flag for nfd-topology-updater. NFD Topology Updater runs resource hardware topology detection normally, but no CR requests are sent to nfd-master. + +Default: `false` + +.Example +[source,terminal] +---- +$ nfd-topology-updater -no-publish +---- + +[id="nfd-topology-updater-oneshot_{context}"] +== -oneshot + +The `-oneshot` flag causes the NFD Topology Updater to exit after one pass of resource hardware topology detection. + +Default: `false` + +.Example +[source,terminal] +---- +$ nfd-topology-updater -oneshot -no-publish +---- + +[discrete] +[id="nfd-topology-updater-podresources-socket_{context}"] +== -podresources-socket + +The `-podresources-socket` flag specifies the path to the Unix socket where kubelet exports a gRPC service to enable discovery of in-use CPUs and devices, and to provide metadata for them. + +Default: `/host-var/liblib/kubelet/pod-resources/kubelet.sock` + +.Example +[source,terminal] +---- +$ nfd-topology-updater -podresources-socket=/var/lib/kubelet/pod-resources/kubelet.sock +---- + +[discrete] +[id="nfd-topology-updater-server_{context}"] +== -server + +The `-server` flag specifies the address of the nfd-master endpoint to connect to. + +Default: `localhost:8080` + +.Example +[source,terminal] +---- +$ nfd-topology-updater -server=nfd-master.nfd.svc.cluster.local:443 +---- + +[discrete] +[id="nfd-topology-updater-server-name-override_{context}"] +== -server-name-override + +The `-server-name-override` flag specifies the common name (CN) which to expect from the nfd-master TLS certificate. This flag is mostly intended for development and debugging purposes. + +Default: empty + +.Example +[source,terminal] +---- +$ nfd-topology-updater -server-name-override=localhost +---- + +[discrete] +[id="nfd-topology-updater-sleep-interval_{context}"] +== -sleep-interval + +The `-sleep-interval` flag specifies the interval between resource hardware topology re-examination and custom resource updates. A non-positive value implies infinite sleep interval and no re-detection is done. + +Default: `60s` + +.Example +[source,terminal] +---- +$ nfd-topology-updater -sleep-interval=1h +---- + +[discrete] +[id="nfd-topology-updater-version_{context}"] +== -version + +Print version and exit. + +[discrete] +[id="nfd-topology-updater-watch-namespace_{context}"] +== -watch-namespace + +The `-watch-namespace` flag specifies the namespace to ensure that resource hardware topology examination only happens for the pods running in the +specified namespace. Pods that are not running in the specified namespace are not considered during resource accounting. This is particularly useful for testing and debugging purposes. A `*` value means that all of the pods across all namespaces are considered during the accounting process. + +Default: `*` + +.Example +[source,terminal] +---- +$ nfd-topology-updater -watch-namespace=rte +---- diff --git a/modules/psap-node-feature-discovery-using-topology-updater.adoc b/modules/psap-node-feature-discovery-using-topology-updater.adoc new file mode 100644 index 0000000000..425a6bc559 --- /dev/null +++ b/modules/psap-node-feature-discovery-using-topology-updater.adoc @@ -0,0 +1,58 @@ +// Module included in the following assemblies: +// +// * hardware_enablement/psap-node-feature-discovery-operator.adoc + +:_content-type: PROCEDURE +[id="using-the-nfd-topology-updater_{context}"] += Using the NFD Topology Updater + +The Node Feature Discovery (NFD) Topology Updater is a daemon responsible for examining allocated resources on a worker node. It accounts for resources that are available to be allocated to new pod on a per-zone basis, where a zone can be a Non-Uniform Memory Access (NUMA) node. The NFD Topology Updater communicates the information to nfd-master, which creates a `NodeResourceTopology` custom resource (CR) corresponding to all of the worker nodes in the cluster. One instance of the NFD Topology Updater runs on each node of the cluster. + +To enable the Topology Updater workers in NFD, set the `topologyupdater` variable to `true` in the `NodeFeatureDiscovery` CR, as described in the section *Using the Node Feature Discovery Operator*. + +== NodeResourceTopology CR + +When run with NFD Topology Updater, NFD creates custom resource instances corresponding to the node resource hardware topology, such as: + +[source,yaml] +---- +apiVersion: topology.node.k8s.io/v1alpha1 +kind: NodeResourceTopology +metadata: + name: node1 +topologyPolicies: ["SingleNUMANodeContainerLevel"] +zones: + - name: node-0 + type: Node + resources: + - name: cpu + capacity: 20 + allocatable: 16 + available: 10 + - name: vendor/nic1 + capacity: 3 + allocatable: 3 + available: 3 + - name: node-1 + type: Node + resources: + - name: cpu + capacity: 30 + allocatable: 30 + available: 15 + - name: vendor/nic2 + capacity: 6 + allocatable: 6 + available: 6 + - name: node-2 + type: Node + resources: + - name: cpu + capacity: 30 + allocatable: 30 + available: 15 + - name: vendor/nic1 + capacity: 3 + allocatable: 3 + available: 3 +---- diff --git a/modules/psap-using-node-feature-discovery-operator.adoc b/modules/psap-using-node-feature-discovery-operator.adoc index 41e4bbdaeb..c2d4c97e0b 100644 --- a/modules/psap-using-node-feature-discovery-operator.adoc +++ b/modules/psap-using-node-feature-discovery-operator.adoc @@ -35,16 +35,17 @@ metadata: namespace: openshift-nfd spec: instance: "" # instance is empty by default + topologyupdater: false # False by default operand: namespace: openshift-nfd image: registry.redhat.io/openshift4/ose-node-feature-discovery:v4.10 imagePullPolicy: Always workerConfig: configData: | - #core: + core: # labelWhiteList: # noPublish: false - # sleepInterval: 60s + sleepInterval: 60s # sources: [all] # klog: # addDirHeader: false @@ -61,102 +62,57 @@ spec: # logFile: # logFileMaxSize: 1800 # skipLogHeaders: false - #sources: - # cpu: - # cpuid: - ## NOTE: whitelist has priority over blacklist - # attributeBlacklist: - # - "BMI1" - # - "BMI2" - # - "CLMUL" - # - "CMOV" - # - "CX16" - # - "ERMS" - # - "F16C" - # - "HTT" - # - "LZCNT" - # - "MMX" - # - "MMXEXT" - # - "NX" - # - "POPCNT" - # - "RDRAND" - # - "RDSEED" - # - "RDTSCP" - # - "SGX" - # - "SSE" - # - "SSE2" - # - "SSE3" - # - "SSE4.1" - # - "SSE4.2" - # - "SSSE3" - # attributeWhitelist: - # kernel: - # kconfigFile: "/path/to/kconfig" - # configOpts: - # - "NO_HZ" - # - "X86" - # - "DMI" - # pci: - # deviceClassWhitelist: - # - "0200" - # - "03" - # - "12" - # deviceLabelFields: - # - "class" - # - "vendor" - # - "device" - # - "subsystem_vendor" - # - "subsystem_device" - # usb: - # deviceClassWhitelist: - # - "0e" - # - "ef" - # - "fe" - # - "ff" - # deviceLabelFields: - # - "class" - # - "vendor" - # - "device" - # custom: - # - name: "my.kernel.feature" - # matchOn: - # - loadedKMod: ["example_kmod1", "example_kmod2"] - # - name: "my.pci.feature" - # matchOn: - # - pciId: - # class: ["0200"] - # vendor: ["15b3"] - # device: ["1014", "1017"] - # - pciId : - # vendor: ["8086"] - # device: ["1000", "1100"] - # - name: "my.usb.feature" - # matchOn: - # - usbId: - # class: ["ff"] - # vendor: ["03e7"] - # device: ["2485"] - # - usbId: - # class: ["fe"] - # vendor: ["1a6e"] - # device: ["089a"] - # - name: "my.combined.feature" - # matchOn: - # - pciId: - # vendor: ["15b3"] - # device: ["1014", "1017"] - # loadedKMod : ["vendor_kmod1", "vendor_kmod2"] + sources: + cpu: + cpuid: + # NOTE: whitelist has priority over blacklist + attributeBlacklist: + - "BMI1" + - "BMI2" + - "CLMUL" + - "CMOV" + - "CX16" + - "ERMS" + - "F16C" + - "HTT" + - "LZCNT" + - "MMX" + - "MMXEXT" + - "NX" + - "POPCNT" + - "RDRAND" + - "RDSEED" + - "RDTSCP" + - "SGX" + - "SSE" + - "SSE2" + - "SSE3" + - "SSE4.1" + - "SSE4.2" + - "SSSE3" + attributeWhitelist: + kernel: + kconfigFile: "/path/to/kconfig" + configOpts: + - "NO_HZ" + - "X86" + - "DMI" + pci: + deviceClassWhitelist: + - "0200" + - "03" + - "12" + deviceLabelFields: + - "class" customConfig: configData: | - # - name: "more.kernel.features" - # matchOn: - # - loadedKMod: ["example_kmod3"] - # - name: "more.features.by.nodename" - # value: customValue - # matchOn: - # - nodename: ["special-.*-node-.*"] + - name: "more.kernel.features" + matchOn: + - loadedKMod: ["example_kmod3"] ---- +For more details on how to customize NFD workers, refer to the link:https://kubernetes-sigs.github.io/node-feature-discovery/v0.10/advanced/worker-configuration-reference.html[Configuration file reference of nfd-worker]. + . Create the `NodeFeatureDiscovery` CR instance by running the following command: + [source,terminal]