From 07830efd034a0ad8508ad596fba3087513798938 Mon Sep 17 00:00:00 2001 From: Lakshmi Girish Date: Tue, 27 May 2025 08:38:18 +0530 Subject: [PATCH] IBM Z troubleshooting changes for HCP419 --- .../hcp-troubleshooting.adoc | 3 +- modules/agent-service-failure.adoc | 76 +++++++++++++++++++ modules/hcp-ibm-z-zvm-agents.adoc | 15 ++++ 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 modules/agent-service-failure.adoc diff --git a/hosted_control_planes/hcp-troubleshooting.adoc b/hosted_control_planes/hcp-troubleshooting.adoc index 64f7e03217..b4387a1c86 100644 --- a/hosted_control_planes/hcp-troubleshooting.adoc +++ b/hosted_control_planes/hcp-troubleshooting.adoc @@ -65,4 +65,5 @@ include::modules/hcp-ts-bm-nodes-not-added.adoc[leveloffset=+2] include::modules/hosted-restart-hcp-components.adoc[leveloffset=+1] include::modules/hosted-control-planes-pause-reconciliation.adoc[leveloffset=+1] -include::modules/scale-down-data-plane.adoc[leveloffset=+1] \ No newline at end of file +include::modules/scale-down-data-plane.adoc[leveloffset=+1] +include::modules/agent-service-failure.adoc[leveloffset=+1] \ No newline at end of file diff --git a/modules/agent-service-failure.adoc b/modules/agent-service-failure.adoc new file mode 100644 index 0000000000..9921c715e0 --- /dev/null +++ b/modules/agent-service-failure.adoc @@ -0,0 +1,76 @@ +// Module included in the following assemblies: +// +// * hosted_control_planes/hcp-troubleshooting.adoc + +:_mod-docs-content-type: PROCEDURE +[id="agent-service-failure_{context}"] += Agent service failures and agents not joining the cluster + +In some cases, agents might fail to join the cluster after booting the machines with the boot artifacts. You can confirm this issue by checking the `agent.service` logs for the following error: + +---- +Error: copying system image from manifest list: Source image rejected: A signature was required, but no signature exists +---- + +[NOTE] +==== +This issue occurs because image signature verification fails when no signature is present. +As a workaround, you can disable signature verification by modifying the container policy. +==== + +.Procedure + +. Add the `ignitionConfigOverride` field in the `InfraEnv` manifest to override the `/etc/containers/policy.json` file. This disables signature verification for container images. + +. Replace the base64-encoded content in the `ignitionConfigOverride` with the required `/etc/containers/policy.json` configuration according to your image registries. + ++ +.Example +[source,json] +---- +{ + "default": [ + { + "type": "insecureAcceptAnything" + } + ], + "transports": { + "docker": { + "": [ + { + "type": "insecureAcceptAnything" + } + ], + "REGISTRY2": [ + { + "type": "insecureAcceptAnything" + } + ] + }, + "docker-daemon": { + "": [ + { + "type": "insecureAcceptAnything" + } + ] + } + } +} +---- + ++ +.Example InfraEnv manifest with `ignitionConfigOverride` +[source,yaml] +---- +apiVersion: agent-install.openshift.io/v1beta1 +kind: InfraEnv +metadata: + name: + namespace: +spec: + cpuArchitecture: s390x + pullSecretRef: + name: pull-secret + sshAuthorizedKey: + ignitionConfigOverride: '{"ignition":{"version":"3.2.0"},"storage":{"files":[{"path":"/etc/containers/policy.json","mode":420,"overwrite":true,"contents":{"source":"data:text/plain;charset=utf-8;base64,ewogICAgImRlZmF1bHQiOiBbCiAgICAgICAgewogICAgICAgICAgICAidHlwZSI6ICJpbnNlY3VyZUFjY2VwdEFueXRoaW5nIgogICAgICAgIH0KICAgIF0sCiAgICAidHJhbnNwb3J0cyI6CiAgICAgICAgewogICAgICAgICAgICAiZG9ja2VyLWRhZW1vbiI6CiAgICAgICAgICAgICAgICB7CiAgICAgICAgICAgICAgICAgICAgIiI6IFt7InR5cGUiOiJpbnNlY3VyZUFjY2VwdEFueXRoaW5nIn1dCiAgICAgICAgICAgICAgICB9CiAgICAgICAgfQp9"}}]}}' +---- \ No newline at end of file diff --git a/modules/hcp-ibm-z-zvm-agents.adoc b/modules/hcp-ibm-z-zvm-agents.adoc index 6d44424087..450aa71911 100644 --- a/modules/hcp-ibm-z-zvm-agents.adoc +++ b/modules/hcp-ibm-z-zvm-agents.adoc @@ -14,6 +14,7 @@ Complete the following steps to start your {ibm-z-title} environment with the do . Update the parameter file to add the `rootfs_url`, `network_adaptor` and `disk_type` values. + +-- .Example parameter file [source,yaml] ---- @@ -31,7 +32,21 @@ ai.ip_cfg_override=1 \// <4> <1> For the `coreos.live.rootfs_url` artifact, specify the matching `rootfs` artifact for the `kernel` and `initramfs` that you are starting. Only HTTP and HTTPS protocols are supported. <2> For the `ip` parameter, manually assign the IP address, as described in _Installing a cluster with z/VM on {ibm-z-title} and {ibm-linuxone-title}_. <3> For installations on DASD-type disks, use `rd.dasd` to specify the DASD where Red Hat Enterprise Linux CoreOS (RHCOS) is to be installed. For installations on FCP-type disks, use `rd.zfcp=,,` to specify the FCP disk where RHCOS is to be installed. ++ +[NOTE] +==== +For FCP multipath configurations, provide two disks instead of one. +==== ++ +.Example +[source,yaml] +---- +rd.zfcp=,, \ +rd.zfcp=,, +---- ++ <4> Specify this parameter when you use an Open Systems Adapter (OSA) or HiperSockets. +-- . Move `initrd`, kernel images, and the parameter file to the guest VM by running the following commands: +