From 0f029a8443998e14d8818d4c680603e2d76c7c23 Mon Sep 17 00:00:00 2001 From: Cliff Schomburg <7424213+cssjr@users.noreply.github.com> Date: Tue, 23 Jun 2026 17:51:27 -0700 Subject: [PATCH 1/4] feat(aro-hcp): add periodic Grafana datasource cleanup job (AROSLSRE-1138) Add a monthly Prow periodic that runs grafanactl clean datasources and clean fixup-datasources against the DEV Grafana instance to remove orphaned Prometheus datasources left by personal dev environments. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Azure-ARO-HCP-main__periodic-cleanup.yaml | 12 +++ .../ARO-HCP/Azure-ARO-HCP-main-periodics.yaml | 81 +++++++++++++++++++ .../deprovision/grafana-datasources/OWNERS | 12 +++ ...eprovision-grafana-datasources-commands.sh | 29 +++++++ ...sion-grafana-datasources-ref.metadata.json | 15 ++++ ...p-deprovision-grafana-datasources-ref.yaml | 30 +++++++ 6 files changed, 179 insertions(+) create mode 100644 ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/OWNERS create mode 100755 ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-commands.sh create mode 100644 ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json create mode 100644 ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.yaml diff --git a/ci-operator/config/Azure/ARO-HCP/Azure-ARO-HCP-main__periodic-cleanup.yaml b/ci-operator/config/Azure/ARO-HCP/Azure-ARO-HCP-main__periodic-cleanup.yaml index 08448a28608d2..12a4790bf2b60 100644 --- a/ci-operator/config/Azure/ARO-HCP/Azure-ARO-HCP-main__periodic-cleanup.yaml +++ b/ci-operator/config/Azure/ARO-HCP/Azure-ARO-HCP-main__periodic-cleanup.yaml @@ -203,6 +203,18 @@ tests: CLEANUP_SWEEPER_WORKFLOW: shared-leftovers test: - ref: aro-hcp-deprovision-cleanup-sweeper +- as: clean-grafana-datasources + cron: 0 6 1 * * + reporter_config: + channel: '#aro-hcp-failures-dev' + job_states_to_report: + - failure + - error + report_template: ':failed: Job *{{.Spec.Job}}* ended with *{{.Status.State}}*. + <{{.Status.URL}}|View logs>' + steps: + test: + - ref: aro-hcp-deprovision-grafana-datasources zz_generated_metadata: branch: main org: Azure diff --git a/ci-operator/jobs/Azure/ARO-HCP/Azure-ARO-HCP-main-periodics.yaml b/ci-operator/jobs/Azure/ARO-HCP/Azure-ARO-HCP-main-periodics.yaml index 6bf581b1397ca..55526377126f7 100644 --- a/ci-operator/jobs/Azure/ARO-HCP/Azure-ARO-HCP-main-periodics.yaml +++ b/ci-operator/jobs/Azure/ARO-HCP/Azure-ARO-HCP-main-periodics.yaml @@ -178,6 +178,87 @@ periodics: - name: result-aggregator secret: secretName: result-aggregator +- agent: kubernetes + cluster: build04 + cron: 0 6 1 * * + decorate: true + decoration_config: + skip_cloning: true + extra_refs: + - base_ref: main + org: Azure + repo: ARO-HCP + labels: + ci-operator.openshift.io/variant: periodic-cleanup + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-Azure-ARO-HCP-main-periodic-cleanup-clean-grafana-datasources + reporter_config: + slack: + channel: '#aro-hcp-failures-dev' + job_states_to_report: + - failure + - error + report_template: ':failed: Job *{{.Spec.Job}}* ended with *{{.Status.State}}*. + <{{.Status.URL}}|View logs>' + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --target=clean-grafana-datasources + - --variant=periodic-cleanup + command: + - ci-operator + env: + - name: HTTP_SERVER_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + ports: + - containerPort: 8080 + name: http + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator - agent: kubernetes cluster: build04 cron: 35 * * * * diff --git a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/OWNERS b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/OWNERS new file mode 100644 index 0000000000000..3ff2089d3c778 --- /dev/null +++ b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/OWNERS @@ -0,0 +1,12 @@ +approvers: +- geoberle +- mmazur +- roivaz +- venkateshsredhat +- deads2k +reviewers: +- geoberle +- mmazur +- roivaz +- venkateshsredhat +- deads2k diff --git a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-commands.sh b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-commands.sh new file mode 100755 index 0000000000000..b013d3c41d092 --- /dev/null +++ b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-commands.sh @@ -0,0 +1,29 @@ +#!/bin/bash +set -o errexit +set -o nounset +set -o pipefail + +export CLUSTER_PROFILE_DIR="/var/run/aro-hcp-${VAULT_SECRET_PROFILE}" + +export AZURE_CLIENT_ID; AZURE_CLIENT_ID=$(cat "${CLUSTER_PROFILE_DIR}/client-id") +export AZURE_TENANT_ID; AZURE_TENANT_ID=$(cat "${CLUSTER_PROFILE_DIR}/tenant") +export AZURE_CLIENT_SECRET; AZURE_CLIENT_SECRET=$(cat "${CLUSTER_PROFILE_DIR}/client-secret") + +az login --service-principal -u "${AZURE_CLIENT_ID}" -p "${AZURE_CLIENT_SECRET}" --tenant "${AZURE_TENANT_ID}" --output none + +export GLOBAL_INFRA_SUBSCRIPTION_ID; GLOBAL_INFRA_SUBSCRIPTION_ID=$(cat "${CLUSTER_PROFILE_DIR}/infra-global-subscription-id") + +echo "Building grafanactl..." +go build -o /tmp/grafanactl ./tooling/grafanactl + +echo "Running: grafanactl clean datasources" +/tmp/grafanactl clean datasources \ + --subscription "${GLOBAL_INFRA_SUBSCRIPTION_ID}" \ + --resource-group "${GRAFANA_RESOURCE_GROUP}" \ + --grafana-name "${GRAFANA_NAME}" + +echo "Running: grafanactl clean fixup-datasources" +/tmp/grafanactl clean fixup-datasources \ + --subscription "${GLOBAL_INFRA_SUBSCRIPTION_ID}" \ + --resource-group "${GRAFANA_RESOURCE_GROUP}" \ + --grafana-name "${GRAFANA_NAME}" diff --git a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json new file mode 100644 index 0000000000000..266bef50e2b00 --- /dev/null +++ b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json @@ -0,0 +1,15 @@ +{ + "path": "aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.yaml", + "owners": { + "approvers": [ + "aro-hcp-sl-approvers", + "geoberle", + "deads2k" + ], + "reviewers": [ + "aro-hcp-sl-reviewers", + "geoberle", + "deads2k" + ] + } +} diff --git a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.yaml b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.yaml new file mode 100644 index 0000000000000..d46c4fc82629e --- /dev/null +++ b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.yaml @@ -0,0 +1,30 @@ +ref: + as: aro-hcp-deprovision-grafana-datasources + from: aro-hcp-e2e-tests + commands: aro-hcp-deprovision-grafana-datasources-commands.sh + resources: + requests: + cpu: 100m + memory: 300Mi + credentials: + - namespace: test-credentials + name: cluster-secrets-aro-hcp-dev + mount_path: /var/run/aro-hcp-dev + env: + - name: VAULT_SECRET_PROFILE + default: "dev" + documentation: |- + Selects which environment's cluster secrets to use (dev, int, stg, prod). + - name: GRAFANA_RESOURCE_GROUP + default: "global" + documentation: |- + Azure resource group containing the Managed Grafana instance. + - name: GRAFANA_NAME + default: "arohcp-dev" + documentation: |- + Name of the Azure Managed Grafana instance to clean. + documentation: |- + Remove orphaned Prometheus datasources from an Azure Managed Grafana instance. + Runs grafanactl clean datasources (removes stale AMW integrations from the + Grafana resource) followed by grafanactl clean fixup-datasources (deletes + Managed_Prometheus_* datasources not backed by a live Azure Monitor Workspace). From 4fa2718e696c6f2d8f49d1d6cfc00bf087bcf241 Mon Sep 17 00:00:00 2001 From: Cliff Schomburg <7424213+cssjr@users.noreply.github.com> Date: Tue, 23 Jun 2026 19:00:49 -0700 Subject: [PATCH 2/4] fix: set AZURE_TOKEN_CREDENTIALS=prod for DefaultAzureCredential The Azure SDK's DefaultAzureCredential with RequireAzureTokenCredentials requires AZURE_TOKEN_CREDENTIALS to select credential sources. Setting it to "prod" enables EnvironmentCredential (AZURE_CLIENT_ID/SECRET/TENANT), which is how all ARO-HCP Prow steps authenticate. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../aro-hcp-deprovision-grafana-datasources-commands.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-commands.sh b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-commands.sh index b013d3c41d092..b361b051689ef 100755 --- a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-commands.sh +++ b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-commands.sh @@ -8,6 +8,7 @@ export CLUSTER_PROFILE_DIR="/var/run/aro-hcp-${VAULT_SECRET_PROFILE}" export AZURE_CLIENT_ID; AZURE_CLIENT_ID=$(cat "${CLUSTER_PROFILE_DIR}/client-id") export AZURE_TENANT_ID; AZURE_TENANT_ID=$(cat "${CLUSTER_PROFILE_DIR}/tenant") export AZURE_CLIENT_SECRET; AZURE_CLIENT_SECRET=$(cat "${CLUSTER_PROFILE_DIR}/client-secret") +export AZURE_TOKEN_CREDENTIALS=prod az login --service-principal -u "${AZURE_CLIENT_ID}" -p "${AZURE_CLIENT_SECRET}" --tenant "${AZURE_TENANT_ID}" --output none From 58d10aaa8b3c6d1973ca5320358c0ae7df8f8e88 Mon Sep 17 00:00:00 2001 From: Cliff Schomburg <7424213+cssjr@users.noreply.github.com> Date: Wed, 24 Jun 2026 06:02:49 -0700 Subject: [PATCH 3/4] Update registry metadata. --- ...p-deprovision-grafana-datasources-ref.metadata.json | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json index 266bef50e2b00..942762311d727 100644 --- a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json +++ b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json @@ -2,14 +2,18 @@ "path": "aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.yaml", "owners": { "approvers": [ - "aro-hcp-sl-approvers", "geoberle", + "mmazur", + "roivaz", + "venkateshsredhat", "deads2k" ], "reviewers": [ - "aro-hcp-sl-reviewers", "geoberle", + "mmazur", + "roivaz", + "venkateshsredhat", "deads2k" ] } -} +} \ No newline at end of file From 2260d32089e1ca49b6fb8518ca4c52fc21618a85 Mon Sep 17 00:00:00 2001 From: Cliff Schomburg <7424213+cssjr@users.noreply.github.com> Date: Thu, 25 Jun 2026 07:05:10 -0700 Subject: [PATCH 4/4] Update OWNERS file to use aliases for reviewers and approvers. --- .../aro-hcp/deprovision/grafana-datasources/OWNERS | 10 +++------- ...p-deprovision-grafana-datasources-ref.metadata.json | 8 ++------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/OWNERS b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/OWNERS index 3ff2089d3c778..d70a372f3d3f8 100644 --- a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/OWNERS +++ b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/OWNERS @@ -1,12 +1,8 @@ approvers: +- aro-hcp-sl-approvers - geoberle -- mmazur -- roivaz -- venkateshsredhat - deads2k reviewers: +- aro-hcp-sl-reviewers - geoberle -- mmazur -- roivaz -- venkateshsredhat -- deads2k +- deads2k \ No newline at end of file diff --git a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json index 942762311d727..6bbd675ee2c94 100644 --- a/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json +++ b/ci-operator/step-registry/aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.metadata.json @@ -2,17 +2,13 @@ "path": "aro-hcp/deprovision/grafana-datasources/aro-hcp-deprovision-grafana-datasources-ref.yaml", "owners": { "approvers": [ + "aro-hcp-sl-approvers", "geoberle", - "mmazur", - "roivaz", - "venkateshsredhat", "deads2k" ], "reviewers": [ + "aro-hcp-sl-reviewers", "geoberle", - "mmazur", - "roivaz", - "venkateshsredhat", "deads2k" ] }