From 7850b58166186a5f7b15afe6bbf52d6574b99891 Mon Sep 17 00:00:00 2001 From: Matt Clark Date: Wed, 3 Jun 2026 15:41:43 -0700 Subject: [PATCH] ROSAENG-2066: Add osdctl account aws-creds command Co-Authored-By: Claude Opus 4.6 (1M context) --- cmd/account/aws-creds-rotate.go | 241 +++ cmd/account/aws-creds-snapshot.go | 97 + cmd/account/aws-creds.go | 779 ++++++++ cmd/account/cmd.go | 1 + cmd/account/rotate-secret.go | 5 +- docs/README.md | 154 +- docs/osdctl_account.md | 2 +- docs/osdctl_account_aws-creds.md | 35 + docs/osdctl_account_aws-creds_rotate.md | 92 + docs/osdctl_account_aws-creds_snapshot.md | 76 + docs/osdctl_account_rotate-secret.md | 15 +- pkg/controller/awscreds.go | 1217 +++++++++++++ pkg/controller/awscreds_test.go | 1993 +++++++++++++++++++++ pkg/controller/rotatesecret.go | 1063 +++++++++-- pkg/controller/rotatesecret_test.go | 435 ++++- pkg/provider/aws/client.go | 5 + pkg/provider/aws/mock/client.go | 16 + 17 files changed, 5919 insertions(+), 307 deletions(-) create mode 100644 cmd/account/aws-creds-rotate.go create mode 100644 cmd/account/aws-creds-snapshot.go create mode 100644 cmd/account/aws-creds.go create mode 100644 docs/osdctl_account_aws-creds.md create mode 100644 docs/osdctl_account_aws-creds_rotate.md create mode 100644 docs/osdctl_account_aws-creds_snapshot.md create mode 100644 pkg/controller/awscreds.go create mode 100644 pkg/controller/awscreds_test.go diff --git a/cmd/account/aws-creds-rotate.go b/cmd/account/aws-creds-rotate.go new file mode 100644 index 000000000..d733e89da --- /dev/null +++ b/cmd/account/aws-creds-rotate.go @@ -0,0 +1,241 @@ +package account + +import ( + "context" + "fmt" + "io" + + "github.com/fatih/color" + "github.com/openshift/osdctl/pkg/controller" + "github.com/openshift/osdctl/pkg/utils" + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" + cmdutil "k8s.io/kubectl/pkg/cmd/util" +) + +type awsCredsRotateOptions struct { + awsCredsOptions + rotateManagedAdmin bool + rotateCcsAdmin bool + refreshSecrets bool + dryRun bool + force bool +} + +// newCmdAWSCredsRotate creates the "aws-creds rotate" subcommand for IAM credential rotation. +func newCmdAWSCredsRotate(streams genericclioptions.IOStreams) *cobra.Command { + ops := &awsCredsRotateOptions{ + awsCredsOptions: awsCredsOptions{IOStreams: streams, log: newAWSCredsLogger()}, + } + + cmd := &cobra.Command{ + Use: "rotate -C --reason [flags]", + Short: "Rotate AWS IAM credentials for a cluster", + Long: `Rotates AWS IAM credentials for osdManagedAdmin and/or osdCcsAdmin users. +Runs a diagnostic snapshot first, then performs the rotation with +interactive confirmation. + +Use --refresh-secrets to only delete and recreate CredentialRequest secrets +without rotating AWS keys or modifying Hive secrets. This is useful when +CCO needs to re-provision secrets with existing credentials. + +AWS credentials are obtained via backplane by default, falling back to the +default AWS credential chain (env vars, ~/.aws/config). Use --aws-profile +to specify a named profile, or --aws-use-env to skip backplane and use +environment credentials directly (e.g. after rh-aws-saml-login). + +Pre-flight checks (IAM permissions, secret existence) block rotation by +default. Use --force to allow proceeding past errors with explicit YES +confirmation — only when you are certain the errors are benign.`, + Example: ` # Rotate osdManagedAdmin credentials + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin + + # Rotate osdCcsAdmin credentials (CCS clusters only) + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --ccs-admin + + # Rotate both + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin --ccs-admin + + # Only refresh CredentialRequest secrets (no key rotation) + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --refresh-secrets + + # Dry-run: preview what would happen + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin --dry-run + + # Using rh-aws-saml-login credentials (no backplane) + kinit $USER@IPA.REDHAT.COM + eval $(rh-aws-saml-login --output env rhcontrol) + export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin --aws-use-env + + # With staging cluster and production hive + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin --hive-ocm-url production`, + DisableAutoGenTag: true, + Run: func(cmd *cobra.Command, args []string) { + cmdutil.CheckErr(ops.validateRotate(cmd, args)) + cmdutil.CheckErr(runRotate(cmd.Context(), ops)) + }, + } + + ops.addFlags(cmd) + cmd.Flags().BoolVar(&ops.rotateManagedAdmin, "managed-admin", false, "Rotate osdManagedAdmin credentials") + cmd.Flags().BoolVar(&ops.rotateCcsAdmin, "ccs-admin", false, "Rotate osdCcsAdmin credentials (CCS clusters only)") + cmd.Flags().BoolVar(&ops.refreshSecrets, "refresh-secrets", false, "Only delete and recreate CredentialRequest secrets (no key rotation)") + cmd.Flags().BoolVar(&ops.dryRun, "dry-run", false, "Preview rotation actions without making changes") + cmd.Flags().BoolVar(&ops.force, "force", false, "Allow proceeding past pre-flight errors with YES confirmation. Use only when certain the errors are benign (e.g., known SCP restrictions that won't affect rotation)") + + return cmd +} + +// validateRotate validates that exactly one rotation mode is selected and flags are not conflicting. +func (o *awsCredsRotateOptions) validateRotate(cmd *cobra.Command, args []string) error { + if err := o.validate(cmd, args); err != nil { + return err + } + if o.refreshSecrets && (o.rotateManagedAdmin || o.rotateCcsAdmin) { + return cmdutil.UsageErrorf(cmd, "--refresh-secrets cannot be combined with --managed-admin or --ccs-admin") + } + if !o.rotateManagedAdmin && !o.rotateCcsAdmin && !o.refreshSecrets { + return cmdutil.UsageErrorf(cmd, "at least one of --managed-admin, --ccs-admin, or --refresh-secrets is required") + } + return nil +} + +// confirmStrictYES requires the user to type "YES" exactly to proceed. +func confirmStrictYES(in io.Reader, out io.Writer) bool { + fmt.Fprintf(out, "Type YES to continue: ") + var response string + if _, err := fmt.Fscanln(in, &response); err != nil { + return false + } + return response == "YES" +} + +// runRotate orchestrates the full rotation workflow: cluster identification, +// diagnostic snapshot, permission verification, and credential rotation. +func runRotate(ctx context.Context, o *awsCredsRotateOptions) error { + + rc, err := o.identifyCluster() + if err != nil { + return err + } + defer rc.ocmConn.Close() + + if o.rotateCcsAdmin && !rc.isCCS { + return fmt.Errorf("--ccs-admin specified but cluster is not CCS/BYOC") + } + + if o.refreshSecrets { + if err := o.resolveForCRSecrets(ctx, rc); err != nil { + return err + } + report, err := controller.DiagnoseCRSecrets(ctx, rc.hiveClient, rc.managedClient, rc.claimName, rc.account, o.Out) + if err != nil { + return err + } + controller.RenderCredRequestTable(report, o.Out) + return runRefreshSecrets(ctx, o, rc, report) + } + + if err := o.resolveCluster(ctx, rc); err != nil { + return err + } + + o.log.Info("Running pre-rotation diagnostic snapshot") + input := rc.toCredsInput(o.log, o.Out) + report, err := controller.DiagnoseCredentials(ctx, input) + if err != nil { + return err + } + controller.RenderReport(report, o.Out) + + if !report.AllPermissionsOK { + o.log.Warn("IAM permission check detected issues") + red := color.New(color.FgRed).SprintFunc() + fmt.Fprintf(o.Out, "\n%s Pre-flight permission checks detected issues.\n", red("[FAIL]")) + fmt.Fprintln(o.Out, "Proceeding may result in failures during rotation or CR secret recreation.") + if !o.force { + fmt.Fprintln(o.Out, "Use --force to allow proceeding with YES confirmation.") + return fmt.Errorf("pre-flight permission checks failed — use --force to override") + } + fmt.Fprintln(o.Out, "--force specified. Type YES to confirm you want to proceed despite errors.") + if !confirmStrictYES(o.In, o.Out) { + o.log.Info("Operation cancelled by user") + return nil + } + } + + if o.dryRun { + o.log.Info("Dry-run mode — no changes will be made") + } + + rotateInput := &controller.RotateSecretInput{ + AccountCRName: rc.claimName, + Account: rc.account, + OsdManagedAdminUsername: rc.adminUsername, + UpdateManagedAdminCreds: o.rotateManagedAdmin, + UpdateCcsCreds: o.rotateCcsAdmin, + DryRun: o.dryRun, + SkipPermissionCheck: !report.AllPermissionsOK, + AwsClient: rc.awsClient, + HiveKubeClient: rc.hiveClient, + ManagedClusterClient: rc.managedClient, + Report: report, + Log: o.log, + In: o.In, + Out: o.Out, + } + + if !o.dryRun { + o.log.Warn("Credential rotation will modify IAM keys and Hive secrets") + fmt.Fprintln(o.Out, "\nProceed with credential rotation?") + if !utils.ConfirmPrompt() { + o.log.Info("Rotation cancelled by user") + return nil + } + } + + o.log.Info("Starting credential rotation") + if err := controller.RotateSecret(ctx, rotateInput); err != nil { + o.log.WithError(err).Error("Credential rotation failed") + return err + } + o.log.Info("Credential rotation completed successfully") + return nil +} + +// runRefreshSecrets deletes and recreates CredentialRequest secrets without rotating AWS keys. +func runRefreshSecrets(ctx context.Context, o *awsCredsRotateOptions, rc *resolvedCluster, report *controller.DiagnosticReport) error { + + if rc.managedClient == nil { + return fmt.Errorf("managed cluster client not available — cannot refresh secrets") + } + + if report.ClusterRootKeyID == "" { + return fmt.Errorf("cannot refresh CredentialRequest secrets: kube-system/aws-creds is missing or unreadable on the managed cluster — CCO needs this secret to recreate operator credentials") + } + + if o.dryRun { + o.log.Info("Dry-run mode — no secrets will be deleted") + fmt.Fprintln(o.Out, "\n[Dry Run] Would delete and recreate all CredentialRequest secrets.") + fmt.Fprintln(o.Out, "[Dry Run] No AWS keys or Hive secrets would be modified.") + return nil + } + + fmt.Fprintf(o.Out, "\nThis will delete %d CredentialRequest secret(s) so CCO recreates them.\n", len(report.CredRequests)) + fmt.Fprintln(o.Out, "No AWS keys or Hive secrets will be modified.") + fmt.Fprintln(o.Out, "\nProceed with secret refresh?") + if !utils.ConfirmPrompt() { + o.log.Info("Refresh cancelled by user") + return nil + } + + o.log.Info("Deleting credential secrets for CCO to recreate") + if err := controller.DeleteCredentialSecrets(ctx, rc.managedClient, o.In, o.Out); err != nil { + o.log.WithError(err).Error("Failed to refresh credential secrets") + return err + } + + o.log.Info("Credential secret refresh completed successfully") + return nil +} diff --git a/cmd/account/aws-creds-snapshot.go b/cmd/account/aws-creds-snapshot.go new file mode 100644 index 000000000..8f0bb5c08 --- /dev/null +++ b/cmd/account/aws-creds-snapshot.go @@ -0,0 +1,97 @@ +package account + +import ( + "context" + + "github.com/openshift/osdctl/pkg/controller" + "github.com/spf13/cobra" + "k8s.io/cli-runtime/pkg/genericclioptions" + cmdutil "k8s.io/kubectl/pkg/cmd/util" +) + +type awsCredsSnapshotOptions struct { + awsCredsOptions + crSecretsOnly bool +} + +// newCmdAWSCredsSnapshot creates the "aws-creds snapshot" subcommand for read-only credential diagnostics. +func newCmdAWSCredsSnapshot(streams genericclioptions.IOStreams) *cobra.Command { + ops := &awsCredsSnapshotOptions{ + awsCredsOptions: awsCredsOptions{IOStreams: streams, log: newAWSCredsLogger()}, + } + + cmd := &cobra.Command{ + Use: "snapshot -C --reason [flags]", + Short: "Show a read-only credential status report for a cluster", + Long: `Produces a diagnostic report of AWS IAM credentials including: + - IAM access keys and which Hive secrets reference them + - CredentialRequest secrets and whether they need refresh + - IAM permission simulation (SCP/policy restriction detection) + +Use --cr-secrets to show only the CredentialRequest secrets table. + +This is a read-only operation — no credentials are modified. + +AWS credentials are obtained via backplane by default, falling back to the +default AWS credential chain (env vars, ~/.aws/config). Use --aws-profile +to specify a named profile, or --aws-use-env to skip backplane and use +environment credentials directly (e.g. after rh-aws-saml-login).`, + Example: ` # Full credential status report (uses backplane) + osdctl account aws-creds snapshot -C $CLUSTER_ID --reason "$JIRA_TICKET" + + # Only show CredentialRequest secret status + osdctl account aws-creds snapshot -C $CLUSTER_ID --reason "$JIRA_TICKET" --cr-secrets + + # Using rh-aws-saml-login credentials (no backplane) + kinit $USER@IPA.REDHAT.COM + eval $(rh-aws-saml-login --output env rhcontrol) + export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN + osdctl account aws-creds snapshot -C $CLUSTER_ID --reason "$JIRA_TICKET" --aws-use-env + + # With staging cluster and production hive + osdctl account aws-creds snapshot -C $CLUSTER_ID --reason "$JIRA_TICKET" --hive-ocm-url production`, + DisableAutoGenTag: true, + Run: func(cmd *cobra.Command, args []string) { + cmdutil.CheckErr(ops.validate(cmd, args)) + cmdutil.CheckErr(runSnapshot(cmd.Context(), ops)) + }, + } + + ops.addFlags(cmd) + cmd.Flags().BoolVar(&ops.crSecretsOnly, "cr-secrets", false, "Only show CredentialRequest secrets status") + return cmd +} + +// runSnapshot produces the diagnostic report, either full or CR-secrets-only based on flags. +func runSnapshot(ctx context.Context, o *awsCredsSnapshotOptions) error { + + rc, err := o.identifyCluster() + if err != nil { + return err + } + defer rc.ocmConn.Close() + + if o.crSecretsOnly { + if err := o.resolveForCRSecrets(ctx, rc); err != nil { + return err + } + report, err := controller.DiagnoseCRSecrets(ctx, rc.hiveClient, rc.managedClient, rc.claimName, rc.account, o.Out) + if err != nil { + return err + } + controller.RenderCredRequestTable(report, o.Out) + return nil + } + + if err := o.resolveCluster(ctx, rc); err != nil { + return err + } + + input := rc.toCredsInput(o.log, o.Out) + report, err := controller.DiagnoseCredentials(ctx, input) + if err != nil { + return err + } + controller.RenderReport(report, o.Out) + return nil +} diff --git a/cmd/account/aws-creds.go b/cmd/account/aws-creds.go new file mode 100644 index 000000000..db77b866d --- /dev/null +++ b/cmd/account/aws-creds.go @@ -0,0 +1,779 @@ +package account + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "regexp" + "strings" + + awsSdk "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/sts" + sdk "github.com/openshift-online/ocm-sdk-go" + cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" + awsv1alpha1 "github.com/openshift/aws-account-operator/api/v1alpha1" + ccov1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" + "github.com/openshift/osdctl/cmd/common" + "github.com/openshift/osdctl/pkg/controller" + "github.com/openshift/osdctl/pkg/k8s" + "github.com/openshift/osdctl/pkg/osdCloud" + awsprovider "github.com/openshift/osdctl/pkg/provider/aws" + "github.com/openshift/osdctl/pkg/provider/pagerduty" + "github.com/openshift/osdctl/pkg/utils" + "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/cli-runtime/pkg/genericclioptions" + "k8s.io/client-go/kubernetes/scheme" + cmdutil "k8s.io/kubectl/pkg/cmd/util" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// AWS IAM Credential Architecture for OSD/ROSA Classic Clusters +// +// There are two IAM users managed per cluster, with distinct roles: +// +// osdCcsAdmin (CCS/BYOC clusters only): +// - Created by the customer with AdministratorAccess policy attached. +// - Credentials are provided to Red Hat and stored in the "byoc" secret on +// Hive (referenced by AccountClaim.Spec.BYOCSecretRef). +// - The aws-account-operator uses these credentials to: +// 1. Create the ManagedOpenShift-Support- IAM role in the customer's +// account, with a trust policy allowing RH-SRE-CCS-Access to assume it. +// 2. Assume that role to create/manage the osdManagedAdmin IAM user. +// - The operator re-reads the byoc secret on every reconciliation (no caching), +// so updating the secret after rotation is sufficient — no operator restart needed. +// - Rotating the access key does NOT invalidate the ManagedOpenShift-Support +// trust policy, because the policy references the IAM user ARN (which doesn't +// change), not the access key. +// +// osdManagedAdmin (all non-STS clusters): +// - Created by the aws-account-operator (using osdCcsAdmin creds on CCS, or +// OrganizationAccountAccessRole on non-CCS). +// - Credentials stored in the account secret on Hive and synced to the cluster +// as kube-system/aws-creds via SyncSet. +// - The Cloud Credential Operator (CCO) uses these credentials to fulfill +// CredentialRequests — creating per-operator IAM users/policies and secrets +// (e.g., openshift-image-registry, openshift-ingress, openshift-machine-api). +// - After rotation, CredentialRequest secrets must be deleted so CCO recreates +// them with the new credentials. +// +// SRE/Backplane access path (independent of both users): +// - Backplane authenticates via OCM JWT → SRE-Support-Role → role chain → +// ManagedOpenShift-Support- in the customer account. +// - This path does NOT read osdCcsAdmin or osdManagedAdmin credentials. +// - The dependency is indirect: osdCcsAdmin created the infrastructure (roles) +// that backplane later assumes. +func newCmdAWSCreds(streams genericclioptions.IOStreams) *cobra.Command { + cmd := &cobra.Command{ + Use: "aws-creds", + Short: "Diagnose and manage AWS IAM credentials for a cluster", + Long: "Subcommands for inspecting and rotating AWS IAM credentials, Hive secrets, and CredentialRequests.", + DisableAutoGenTag: true, + } + + cmd.AddCommand(newCmdAWSCredsSnapshot(streams)) + cmd.AddCommand(newCmdAWSCredsRotate(streams)) + + return cmd +} + +type awsCredsOptions struct { + clusterID string + profile string + awsUseEnv bool + reason string + adminUsername string + hiveOcmUrl string + logLevel string + + log *logrus.Logger + genericclioptions.IOStreams +} + +func newAWSCredsLogger() *logrus.Logger { + l := logrus.New() + l.SetOutput(os.Stderr) + l.SetFormatter(&logrus.TextFormatter{ + FullTimestamp: true, + TimestampFormat: "15:04:05", + ForceColors: true, + }) + l.SetLevel(logrus.InfoLevel) + return l +} + +func (o *awsCredsOptions) addFlags(cmd *cobra.Command) { + cmd.Flags().StringVarP(&o.clusterID, "cluster-id", "C", "", "(Required) OCM internal or external cluster ID") + cmd.Flags().StringVarP(&o.profile, "aws-profile", "p", "", "AWS profile for role chaining. If omitted, tries backplane then falls back to default AWS credential chain") + cmd.Flags().BoolVar(&o.awsUseEnv, "aws-use-env", false, "Use AWS credentials from environment variables (e.g. after rh-aws-saml-login), skipping backplane") + cmd.Flags().StringVarP(&o.reason, "reason", "r", "", "(Required) Elevation reason, usually a Jira ticket ID") + cmd.Flags().StringVar(&o.adminUsername, "admin-username", "", "Override the osdManagedAdmin IAM username. Only needed if auto-detection fails (e.g. custom or legacy username)") + cmd.Flags().StringVar(&o.hiveOcmUrl, "hive-ocm-url", "", "OCM environment for Hive operations (aliases: production, staging, integration)") + cmd.Flags().StringVarP(&o.logLevel, "log-level", "l", "info", "Log level: debug, info, warn, error") + + _ = cmd.MarkFlagRequired("cluster-id") + _ = cmd.MarkFlagRequired("reason") + + hideIrrelevantGlobalFlags(cmd) +} + +const subcommandUsageTemplate = `Usage:{{if .Runnable}} + {{.UseLine}}{{end}}{{if .HasAvailableSubCommands}} + {{.CommandPath}} [command]{{end}}{{if gt (len .Aliases) 0}} + +Aliases: + {{.NameAndAliases}}{{end}}{{if .HasExample}} + +Examples: +{{.Example}}{{end}}{{if .HasAvailableSubCommands}}{{$cmds := .Commands}}{{if eq (len .Groups) 0}} + +Available Commands:{{range $cmds}}{{if (or .IsAvailableCommand (eq .Name "help"))}} + {{rpad .Name .NamePadding }} {{.Short}}{{end}}{{end}}{{else}}{{range $group := .Groups}} + +{{.Title}}{{range $cmds}}{{if (and (eq .GroupID $group.ID) (or .IsAvailableCommand (eq .Name "help")))}} + {{rpad .Name .NamePadding }} {{.Short}}{{end}}{{end}}{{end}}{{if not .AllChildCommandsHaveGroup}} + +Additional Commands:{{range $cmds}}{{if (and (eq .GroupID "") (or .IsAvailableCommand (eq .Name "help")))}} + {{rpad .Name .NamePadding }} {{.Short}}{{end}}{{end}}{{end}}{{end}}{{end}}{{if .HasAvailableLocalFlags}} + +Flags: +{{.LocalFlags.FlagUsages | trimTrailingWhitespaces}}{{end}}{{if .HasAvailableSubCommands}} + +Use "{{.CommandPath}} [command] --help" for more information about a command.{{end}} +` + +// hideIrrelevantGlobalFlags replaces the default usage template to suppress inherited global flags. +func hideIrrelevantGlobalFlags(cmd *cobra.Command) { + cmd.SetUsageTemplate(subcommandUsageTemplate) +} + +var ( + jiraTicketPattern = regexp.MustCompile(`(OHSS|SREP|ROSAENG|OSD|SDE|CSSRE|MNGT)-\d+`) + pdIncidentIDPattern = regexp.MustCompile(`(?:pagerduty\.com/incidents/|^|\s)([A-Z0-9]{10,})`) + pdIncidentNumPattern = regexp.MustCompile(`[Ii]ncident\s*#?(\d{4,})`) +) + +// validateReason checks that the --reason flag contains a Jira ticket or PD incident +// associated with the target cluster, returning true if any warnings were issued. +func (o *awsCredsOptions) validateReason(rc *resolvedCluster) bool { + jiraTickets := jiraTicketPattern.FindAllString(o.reason, -1) + + var pdIncidents []string + for _, match := range pdIncidentIDPattern.FindAllStringSubmatch(o.reason, -1) { + pdIncidents = append(pdIncidents, match[1]) + } + for _, match := range pdIncidentNumPattern.FindAllStringSubmatch(o.reason, -1) { + pdIncidents = append(pdIncidents, match[1]) + } + warned := false + + if len(jiraTickets) == 0 && len(pdIncidents) == 0 { + o.log.Warn("The --reason does not appear to contain a Jira ticket (e.g., OHSS-1234) or PagerDuty incident.") + o.log.Warn("Accepted PD formats: incident ID (Q3V1PFRGY1YT7T), URL (https://redhat.pagerduty.com/incidents/...), or Incident #1234567") + return true + } + + if len(jiraTickets) > 0 { + if o.validateJiraTickets(rc, jiraTickets) { + warned = true + } + } + if len(pdIncidents) > 0 { + if o.validatePDIncidents(rc, pdIncidents) { + warned = true + } + } + return warned +} + +// validateJiraTickets checks whether referenced Jira tickets are associated with the cluster. +func (o *awsCredsOptions) validateJiraTickets(rc *resolvedCluster, tickets []string) bool { + o.log.WithField("count", len(tickets)).Debug("Validating Jira ticket references from reason") + warned := false + + clusterIssues, err := utils.GetJiraIssuesForCluster(rc.internalID, rc.cluster.ExternalID(), "") + if err != nil { + o.log.WithError(err).Debug("Could not query Jira for cluster-associated tickets (token may not be configured)") + return false + } + + clusterTicketKeys := map[string]bool{} + for _, issue := range clusterIssues { + clusterTicketKeys[issue.Key] = true + } + + for _, ticket := range tickets { + if !clusterTicketKeys[ticket] { + o.log.WithField("ticket", ticket[:min(len(ticket), 4)]+"...").Warn("Ticket not found in Jira issues associated with this cluster.") + o.log.Warn("Verify you are operating on the correct cluster. Check with: osdctl cluster context -C " + rc.internalID) + warned = true + } + } + return warned +} + +// validatePDIncidents checks whether referenced PagerDuty incidents are active alerts for the cluster. +func (o *awsCredsOptions) validatePDIncidents(rc *resolvedCluster, incidents []string) bool { + o.log.WithField("count", len(incidents)).Debug("Validating PagerDuty incident references from reason") + warned := false + + baseDomain := rc.cluster.DNS().BaseDomain() + if baseDomain == "" { + o.log.Debug("Cluster has no baseDomain, skipping PD validation") + return false + } + + pdClient := pagerduty.NewClient().WithBaseDomain(baseDomain) + if _, err := pdClient.Init(); err != nil { + o.log.WithError(err).Debug("Could not initialize PagerDuty client (token may not be configured)") + return false + } + + serviceIDs, err := pdClient.GetPDServiceIDs() + if err != nil { + o.log.WithError(err).Debug("Could not query PagerDuty services for cluster") + return false + } + + if len(serviceIDs) == 0 { + o.log.Debug("No PagerDuty services found for cluster baseDomain") + return false + } + + clusterAlerts, err := pdClient.GetFiringAlertsForCluster(serviceIDs) + if err != nil { + o.log.WithError(err).Debug("Could not query PagerDuty alerts for cluster") + return false + } + + clusterIncidentIDs := map[string]bool{} + for _, alerts := range clusterAlerts { + for _, alert := range alerts { + clusterIncidentIDs[alert.ID] = true + } + } + + for _, incident := range incidents { + if !clusterIncidentIDs[incident] { + redacted := incident + if len(redacted) > 6 { + redacted = redacted[:3] + "..." + redacted[len(redacted)-3:] + } + o.log.WithField("incident", redacted).Warn("PagerDuty incident not found in active alerts for this cluster.") + o.log.Warn("Verify you are operating on the correct cluster. Check with: osdctl cluster context -C " + rc.internalID) + warned = true + } + } + return warned +} + +// confirmCluster displays cluster details and prompts the user to confirm they are operating on the correct cluster. +func (o *awsCredsOptions) confirmCluster(rc *resolvedCluster) bool { + ccsStr := "No" + if rc.isCCS { + ccsStr = "Yes" + } + fmt.Fprintf(o.Out, "\n==========================================================================\n") + fmt.Fprintf(o.Out, " Cluster: %s (%s)\n", rc.cluster.Name(), rc.internalID) + fmt.Fprintf(o.Out, " External ID: %s\n", rc.cluster.ExternalID()) + fmt.Fprintf(o.Out, " CCS/BYOC: %s\n", ccsStr) + fmt.Fprintf(o.Out, " Reason: %s\n", o.reason) + fmt.Fprintf(o.Out, "==========================================================================\n") + + warned := o.validateReason(rc) + + if warned { + fmt.Fprintf(o.Out, "\nThe ticket/incident in --reason was not found to be associated with this cluster.\nAre you sure you want to continue? ") + } else { + fmt.Fprintf(o.Out, "\nIs this the correct cluster? ") + } + return utils.ConfirmPrompt() +} + +// validate parses the log level and validates flag constraints common to all aws-creds subcommands. +func (o *awsCredsOptions) validate(cmd *cobra.Command, _ []string) error { + level, err := logrus.ParseLevel(o.logLevel) + if err != nil { + return fmt.Errorf("invalid --log-level '%s': valid values are debug, info, warn, error", o.logLevel) + } + o.log.SetLevel(level) + + if o.awsUseEnv && o.profile != "" { + return cmdutil.UsageErrorf(cmd, "--aws-use-env and --aws-profile are mutually exclusive") + } + + if o.adminUsername != "" && !strings.HasPrefix(o.adminUsername, common.OSDManagedAdminIAM) { + return cmdutil.UsageErrorf(cmd, "admin-username must start with %s", common.OSDManagedAdminIAM) + } + + if o.hiveOcmUrl != "" { + if _, err := utils.ValidateAndResolveOcmUrl(o.hiveOcmUrl); err != nil { + return fmt.Errorf("invalid --hive-ocm-url: %w", err) + } + } + + return nil +} + +type resolvedCluster struct { + ocmConn *sdk.Connection + cluster *cmv1.Cluster + internalID string + isCCS bool + accountID string + suffixLabel string + claimName string + adminUsername string + account *awsv1alpha1.Account + awsClient awsprovider.Client + hiveClient client.Client + managedClient client.Client +} + +// identifyCluster performs the lightweight OCM lookup and prompts the user +// to confirm they are operating on the correct cluster before any expensive +// operations (hive connection, AWS client setup, etc.). +func (o *awsCredsOptions) identifyCluster() (*resolvedCluster, error) { + o.log.Info("Creating OCM connection") + ocmConn, err := utils.CreateConnection() + if err != nil { + return nil, fmt.Errorf("failed to create OCM connection: %w", err) + } + + o.log.WithField("cluster", o.clusterID).Info("Fetching cluster info from OCM") + cluster, err := utils.GetClusterAnyStatus(ocmConn, o.clusterID) + if err != nil { + ocmConn.Close() + return nil, fmt.Errorf("failed to get cluster %s from OCM: %w", o.clusterID, err) + } + + if o.clusterID != cluster.ID() && o.clusterID != cluster.ExternalID() { + ocmConn.Close() + return nil, fmt.Errorf("cluster id '%s' does not match internal '%s' or external '%s' IDs — aliases are not allowed", o.clusterID, cluster.ID(), cluster.ExternalID()) + } + internalID := cluster.ID() + o.log.WithFields(logrus.Fields{"name": cluster.Name(), "id": internalID}).Info("Cluster resolved") + + isCCS, err := utils.IsClusterCCS(ocmConn, internalID) + if err != nil { + ocmConn.Close() + return nil, fmt.Errorf("failed to check CCS status: %w", err) + } + + rc := &resolvedCluster{ + ocmConn: ocmConn, + cluster: cluster, + internalID: internalID, + isCCS: isCCS, + } + + if !o.confirmCluster(rc) { + ocmConn.Close() + return nil, fmt.Errorf("operation cancelled by user") + } + + return rc, nil +} + +// resolveCluster completes the cluster setup after user confirmation: +// connects to Hive, resolves account claim, sets up AWS and managed clients. +func (o *awsCredsOptions) resolveCluster(ctx context.Context, rc *resolvedCluster) error { + o.log.Info("Connecting to Hive cluster") + hiveKubeClient, err := o.connectHive(rc.ocmConn, rc.internalID) + if err != nil { + return fmt.Errorf("failed to connect to hive: %w", err) + } + rc.hiveClient = hiveKubeClient + + o.log.Info("Resolving account claim from Hive") + claimName, err := o.resolveAccountClaimName(ctx, hiveKubeClient, rc.ocmConn, rc.internalID) + if err != nil { + return fmt.Errorf("failed to resolve account claim: %w", err) + } + rc.claimName = claimName + o.log.WithField("claim", claimName).Info("Account claim resolved") + + o.log.WithField("account", claimName).Info("Fetching Account CR") + account, err := k8s.GetAWSAccount(ctx, hiveKubeClient, common.AWSAccountNamespace, claimName) + if err != nil { + return fmt.Errorf("failed to get Account CR '%s': %w", claimName, err) + } + + if account.Spec.ManualSTSMode { + return fmt.Errorf("account %s is STS — no IAM user credentials to manage", claimName) + } + rc.account = account + + rc.accountID = account.Spec.AwsAccountID + suffixLabel, ok := account.Labels["iamUserId"] + if !ok { + return fmt.Errorf("no iamUserId label on Account CR %s", claimName) + } + rc.suffixLabel = suffixLabel + + adminUsername := o.adminUsername + if adminUsername == "" { + adminUsername = common.OSDManagedAdminIAM + "-" + suffixLabel + } + rc.adminUsername = adminUsername + o.log.WithFields(logrus.Fields{"aws_account": rc.accountID, "admin_user": adminUsername}).Info("Account info resolved") + + o.log.Info("Setting up AWS client") + awsClient, err := o.buildAWSClient(ctx, rc.ocmConn, rc.cluster, hiveKubeClient, account, rc.accountID, suffixLabel) + if err != nil { + return fmt.Errorf("failed to set up AWS client: %w", err) + } + rc.awsClient = awsClient + o.log.Info("AWS client ready") + + o.log.Info("Connecting to managed cluster via backplane") + managedClient, err := o.connectManagedCluster(rc.internalID) + if err != nil { + o.log.WithError(err).Warn("Could not connect to managed cluster — credential request diagnostics will be skipped") + } + rc.managedClient = managedClient + + return nil +} + +// toCredsInput converts a resolvedCluster into the controller.AWSCredsInput used by DiagnoseCredentials. +func (rc *resolvedCluster) toCredsInput(log *logrus.Logger, out io.Writer) *controller.AWSCredsInput { + return &controller.AWSCredsInput{ + ClusterID: rc.internalID, + ClusterName: rc.cluster.Name(), + ClusterExternalID: rc.cluster.ExternalID(), + IsCCS: rc.isCCS, + AWSAccountID: rc.accountID, + AccountCRName: rc.claimName, + Account: rc.account, + AdminUsername: rc.adminUsername, + AwsClient: rc.awsClient, + HiveKubeClient: rc.hiveClient, + ManagedClient: rc.managedClient, + Log: log, + Out: out, + } +} + +// connectHive establishes a k8s client to the Hive cluster, using the --hive-ocm-url override if specified. +func (o *awsCredsOptions) connectHive(ocmConn *sdk.Connection, clusterID string) (client.Client, error) { + elevationMsg := fmt.Sprintf("Elevation required for aws-creds on %s", clusterID) + + if o.hiveOcmUrl != "" { + resolvedURL, err := utils.ValidateAndResolveOcmUrl(o.hiveOcmUrl) + if err != nil { + return nil, fmt.Errorf("invalid --hive-ocm-url: %w", err) + } + + hiveOCM, err := utils.CreateConnectionWithUrl(resolvedURL) + if err != nil { + return nil, fmt.Errorf("failed to create hive OCM connection (URL: %s): %w", resolvedURL, err) + } + defer hiveOCM.Close() + + hiveCluster, err := utils.GetHiveClusterWithConn(clusterID, ocmConn, hiveOCM) + if err != nil { + return nil, fmt.Errorf("failed to get hive cluster (URL: %s): %w", resolvedURL, err) + } + + o.log.WithFields(logrus.Fields{"hive": hiveCluster.Name(), "ocm_url": resolvedURL}).Info("Connecting to Hive cluster") + + hiveClient, err := k8s.NewAsBackplaneClusterAdminWithConn( + hiveCluster.ID(), + client.Options{Scheme: scheme.Scheme}, + hiveOCM, + o.reason, + elevationMsg, + ) + if err != nil { + return nil, fmt.Errorf("failed to create hive k8s client (URL: %s): %w", resolvedURL, err) + } + return hiveClient, nil + } + + hiveCluster, err := utils.GetHiveCluster(clusterID) + if err != nil { + return nil, fmt.Errorf("failed to get hive cluster: %w", err) + } + + o.log.WithField("hive", hiveCluster.Name()).Info("Connecting to Hive cluster") + + hiveKubeCli, _, _, err := common.GetKubeConfigAndClient(hiveCluster.ID(), o.reason, elevationMsg) + if err != nil { + return nil, fmt.Errorf("failed to create hive k8s client: %w", err) + } + return hiveKubeCli, nil +} + +// resolveAccountClaimName resolves the Account CR name from the AccountClaim on Hive, +// falling back to the OCM cluster resources API if the claim is not directly accessible. +func (o *awsCredsOptions) resolveAccountClaimName(ctx context.Context, hiveClient client.Client, ocmConn *sdk.Connection, clusterID string) (string, error) { + accountClaim, err := k8s.GetAccountClaimFromClusterID(ctx, hiveClient, clusterID) + if err == nil && accountClaim != nil && accountClaim.Spec.AccountLink != "" { + return accountClaim.Spec.AccountLink, nil + } + + liveResponse, err := ocmConn.ClustersMgmt().V1().Clusters().Cluster(clusterID).Resources().Live().Get().Send() + if err != nil { + return "", fmt.Errorf("failed to fetch cluster resources from OCM: %w", err) + } + + respBody := liveResponse.Body().Resources() + awsAccountClaim, ok := respBody["aws_account_claim"] + if !ok { + return "", fmt.Errorf("cluster does not have an AccountClaim in OCM resources") + } + + var claimJSON map[string]any + if err := json.Unmarshal([]byte(awsAccountClaim), &claimJSON); err != nil { + return "", fmt.Errorf("failed to parse account claim JSON: %w", err) + } + + spec, ok := claimJSON["spec"].(map[string]any) + if !ok { + return "", fmt.Errorf("account claim has no spec") + } + + accountLink, ok := spec["accountLink"].(string) + if !ok || accountLink == "" { + return "", fmt.Errorf("account claim has no accountLink") + } + + return accountLink, nil +} + +// buildAWSClient creates an AWS client using either: +// 1. --aws-profile : explicit local AWS config profile +// 2. --aws-use-env: default AWS credential chain (env vars from rh-aws-saml-login, ~/.aws/config) +// 3. No flags: backplane (default), falling back to default credential chain +func (o *awsCredsOptions) buildAWSClient(ctx context.Context, ocmConn *sdk.Connection, cluster *cmv1.Cluster, hiveClient client.Client, account *awsv1alpha1.Account, accountID, suffixLabel string) (awsprovider.Client, error) { + if o.profile != "" { + return o.buildAWSClientViaProfile(ctx, hiveClient, account, accountID, suffixLabel) + } + + if !o.awsUseEnv { + client, err := o.buildAWSClientViaBackplane(ctx, ocmConn, cluster) + if err != nil { + o.log.WithError(err).Warn("Backplane credential retrieval failed, falling back to default AWS credential chain") + return o.buildAWSClientViaDefaultChain(ctx, hiveClient, account, accountID, suffixLabel) + } + return client, nil + } + + o.log.Info("Using environment AWS credentials (--aws-use-env)") + return o.buildAWSClientViaEnv(ctx, hiveClient, account, accountID, suffixLabel) +} + +// buildAWSClientViaBackplane obtains AWS credentials through OCM backplane without a local AWS profile. +func (o *awsCredsOptions) buildAWSClientViaBackplane(ctx context.Context, ocmConn *sdk.Connection, cluster *cmv1.Cluster) (awsprovider.Client, error) { + o.log.Info("Using backplane for AWS credentials (no --aws-profile or --aws-use-env specified)") + + cfg, err := osdCloud.CreateAWSV2Config(ocmConn, cluster) + if err != nil { + return nil, fmt.Errorf("failed to get AWS credentials via backplane: %w", err) + } + + creds, err := cfg.Credentials.Retrieve(ctx) + if err != nil { + return nil, fmt.Errorf("failed to retrieve backplane AWS credentials: %w", err) + } + + return awsprovider.NewAwsClientWithInput(&awsprovider.ClientInput{ + AccessKeyID: creds.AccessKeyID, + SecretAccessKey: creds.SecretAccessKey, + SessionToken: creds.SessionToken, + Region: "us-east-1", + }) +} + +// buildAWSClientViaProfile creates an AWS client using a local profile with manual +// role chaining through SRE access, jump, and support roles. +func (o *awsCredsOptions) buildAWSClientViaProfile(ctx context.Context, hiveClient client.Client, account *awsv1alpha1.Account, accountID, suffixLabel string) (awsprovider.Client, error) { + awsSetupClient, err := awsprovider.NewAwsClient(o.profile, "us-east-1", "") + if err != nil { + return nil, fmt.Errorf("failed to create initial AWS client with profile '%s': %w", o.profile, err) + } + return o.roleChainToSupport(ctx, awsSetupClient, hiveClient, account, accountID, suffixLabel) +} + +// roleChainToSupport takes an initial AWS client (from any source: profile, env vars, default chain) +// and role-chains through SRE access, jump, and support roles to reach the cluster's AWS account. +func (o *awsCredsOptions) roleChainToSupport(ctx context.Context, awsSetupClient awsprovider.Client, hiveClient client.Client, account *awsv1alpha1.Account, accountID, suffixLabel string) (awsprovider.Client, error) { + callerIdentity, err := awsSetupClient.GetCallerIdentity(&sts.GetCallerIdentityInput{}) + if err != nil { + return nil, fmt.Errorf("failed to get AWS caller identity: %w", err) + } + roleSessionName := getSessionNameFromUserId(*callerIdentity.UserId) + timeout := awsSdk.Int32(900) + + var credKeyID, credSecretKey, credToken *string + + if account.Spec.BYOC { + cm := &corev1.ConfigMap{} + if err := hiveClient.Get(ctx, client.ObjectKey{ + Name: common.DefaultConfigMap, Namespace: common.AWSAccountNamespace, + }, cm); err != nil { + return nil, fmt.Errorf("failed to get aws-account-operator configmap: %w", err) + } + + sreAccessARN := cm.Data["CCS-Access-Arn"] + if sreAccessARN == "" { + return nil, fmt.Errorf("CCS-Access-Arn missing from configmap") + } + jumpARN := cm.Data["support-jump-role"] + if jumpARN == "" { + return nil, fmt.Errorf("support-jump-role missing from configmap") + } + + srepCreds, err := awsprovider.GetAssumeRoleCredentials(awsSetupClient, timeout, &roleSessionName, &sreAccessARN) + if err != nil { + return nil, fmt.Errorf("failed to assume SRE access role: %w", err) + } + + srepClient, err := awsprovider.NewAwsClientWithInput(&awsprovider.ClientInput{ + AccessKeyID: *srepCreds.AccessKeyId, SecretAccessKey: *srepCreds.SecretAccessKey, + SessionToken: *srepCreds.SessionToken, Region: "us-east-1", + }) + if err != nil { + return nil, fmt.Errorf("failed to create SRE access role client: %w", err) + } + + jumpCreds, err := awsprovider.GetAssumeRoleCredentials(srepClient, timeout, &roleSessionName, &jumpARN) + if err != nil { + return nil, fmt.Errorf("failed to assume jump role: %w", err) + } + + jumpClient, err := awsprovider.NewAwsClientWithInput(&awsprovider.ClientInput{ + AccessKeyID: *jumpCreds.AccessKeyId, SecretAccessKey: *jumpCreds.SecretAccessKey, + SessionToken: *jumpCreds.SessionToken, Region: "us-east-1", + }) + if err != nil { + return nil, fmt.Errorf("failed to create jump role client: %w", err) + } + + roleArn := awsSdk.String(fmt.Sprintf("arn:aws:iam::%s:role/ManagedOpenShift-Support-%s", accountID, suffixLabel)) + creds, err := awsprovider.GetAssumeRoleCredentials(jumpClient, timeout, &roleSessionName, roleArn) + if err != nil { + return nil, fmt.Errorf("failed to assume ManagedOpenShift-Support role: %w", err) + } + credKeyID = creds.AccessKeyId + credSecretKey = creds.SecretAccessKey + credToken = creds.SessionToken + } else { + roleArn := awsSdk.String(fmt.Sprintf("arn:aws:iam::%s:role/%s", accountID, awsv1alpha1.AccountOperatorIAMRole)) + creds, err := awsprovider.GetAssumeRoleCredentials(awsSetupClient, timeout, &roleSessionName, roleArn) + if err != nil { + return nil, fmt.Errorf("failed to assume OrganizationAccountAccessRole: %w", err) + } + credKeyID = creds.AccessKeyId + credSecretKey = creds.SecretAccessKey + credToken = creds.SessionToken + } + + return awsprovider.NewAwsClientWithInput(&awsprovider.ClientInput{ + AccessKeyID: *credKeyID, SecretAccessKey: *credSecretKey, + SessionToken: *credToken, Region: "us-east-1", + }) +} + +// resolveForCRSecrets connects only to Hive (for account secret timestamp) +// and the managed cluster (for CR secrets). Skips AWS client setup entirely. +func (o *awsCredsOptions) resolveForCRSecrets(ctx context.Context, rc *resolvedCluster) error { + o.log.Info("Connecting to Hive cluster") + hiveKubeClient, err := o.connectHive(rc.ocmConn, rc.internalID) + if err != nil { + return fmt.Errorf("failed to connect to hive: %w", err) + } + rc.hiveClient = hiveKubeClient + + o.log.Info("Resolving account claim from Hive") + claimName, err := o.resolveAccountClaimName(ctx, hiveKubeClient, rc.ocmConn, rc.internalID) + if err != nil { + return fmt.Errorf("failed to resolve account claim: %w", err) + } + rc.claimName = claimName + o.log.WithField("claim", claimName).Info("Account claim resolved") + + o.log.WithField("account", claimName).Info("Fetching Account CR") + account, err := k8s.GetAWSAccount(ctx, hiveKubeClient, common.AWSAccountNamespace, claimName) + if err != nil { + return fmt.Errorf("failed to get Account CR '%s': %w", claimName, err) + } + if account.Spec.ManualSTSMode { + return fmt.Errorf("account %s is STS — no IAM user credentials to manage", claimName) + } + rc.account = account + rc.accountID = account.Spec.AwsAccountID + + o.log.Info("Connecting to managed cluster via backplane") + managedClient, err := o.connectManagedCluster(rc.internalID) + if err != nil { + return fmt.Errorf("managed cluster connection required for --cr-secrets: %w", err) + } + rc.managedClient = managedClient + + return nil +} + +// buildAWSClientViaEnv creates an AWS client from AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, +// and AWS_SESSION_TOKEN environment variables (e.g. set by rh-aws-saml-login --output env). +// Reads env vars directly and feeds them to roleChainToSupport, bypassing NewAwsClient +// which applies proxy config that interferes with credential chain resolution. +func (o *awsCredsOptions) buildAWSClientViaEnv(ctx context.Context, hiveClient client.Client, account *awsv1alpha1.Account, accountID, suffixLabel string) (awsprovider.Client, error) { + accessKey := os.Getenv("AWS_ACCESS_KEY_ID") + secretKey := os.Getenv("AWS_SECRET_ACCESS_KEY") + sessionToken := os.Getenv("AWS_SESSION_TOKEN") + + if accessKey == "" || secretKey == "" { + return nil, fmt.Errorf("--aws-use-env requires AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables (e.g. from rh-aws-saml-login --output env)") + } + + awsSetupClient, err := awsprovider.NewAwsClientWithInput(&awsprovider.ClientInput{ + AccessKeyID: accessKey, + SecretAccessKey: secretKey, + SessionToken: sessionToken, + Region: "us-east-1", + }) + if err != nil { + return nil, fmt.Errorf("failed to create AWS client from environment credentials: %w", err) + } + + return o.roleChainToSupport(ctx, awsSetupClient, hiveClient, account, accountID, suffixLabel) +} + +// buildAWSClientViaDefaultChain uses the default AWS credential chain (env vars, ~/.aws/config) +// as a fallback when backplane is unavailable. +func (o *awsCredsOptions) buildAWSClientViaDefaultChain(ctx context.Context, hiveClient client.Client, account *awsv1alpha1.Account, accountID, suffixLabel string) (awsprovider.Client, error) { + o.log.Info("Using default AWS credential chain (env vars / ~/.aws/config)") + awsSetupClient, err := awsprovider.NewAwsClient("", "us-east-1", "") + if err != nil { + return nil, fmt.Errorf("failed to create AWS client from default credential chain: %w", err) + } + return o.roleChainToSupport(ctx, awsSetupClient, hiveClient, account, accountID, suffixLabel) +} + +// connectManagedCluster establishes a k8s client to the managed cluster via backplane for CR secret operations. +func (o *awsCredsOptions) connectManagedCluster(clusterID string) (client.Client, error) { + managedScheme := runtime.NewScheme() + if err := corev1.AddToScheme(managedScheme); err != nil { + return nil, fmt.Errorf("failed to register corev1 scheme: %w", err) + } + if err := ccov1.AddToScheme(managedScheme); err != nil { + return nil, fmt.Errorf("failed to register cloudcredential scheme: %w", err) + } + + managedClient, err := k8s.NewAsBackplaneClusterAdmin( + clusterID, + client.Options{Scheme: managedScheme}, + o.reason, + fmt.Sprintf("Elevation required for aws-creds on %s", clusterID), + ) + if err != nil { + return nil, err + } + return managedClient, nil +} diff --git a/cmd/account/cmd.go b/cmd/account/cmd.go index c25accde7..4ca57aa91 100644 --- a/cmd/account/cmd.go +++ b/cmd/account/cmd.go @@ -34,6 +34,7 @@ func NewCmdAccount(streams genericclioptions.IOStreams, client *k8s.LazyClient, accountCmd.AddCommand(newCmdCleanVeleroSnapshots(streams)) accountCmd.AddCommand(newCmdVerifySecrets(streams, client)) accountCmd.AddCommand(newCmdRotateSecret(streams, client)) + accountCmd.AddCommand(newCmdAWSCreds(streams)) accountCmd.AddCommand(newCmdGenerateSecret(streams, client)) return accountCmd diff --git a/cmd/account/rotate-secret.go b/cmd/account/rotate-secret.go index 14d458ad1..618527e42 100644 --- a/cmd/account/rotate-secret.go +++ b/cmd/account/rotate-secret.go @@ -32,6 +32,7 @@ func newCmdRotateSecret(streams genericclioptions.IOStreams, client *k8s.LazyCli Use: "rotate-secret ", Short: "Rotate IAM credentials secret", Long: "When logged into a hive shard, this rotates IAM credential secrets for a given `account` CR.", + Deprecated: "use 'osdctl account aws-creds rotate' instead. The newer command accepts a cluster ID, provides diagnostic reports, and handles account claim resolution automatically.", DisableAutoGenTag: true, Run: func(cmd *cobra.Command, args []string) { cmdutil.CheckErr(ops.complete(cmd, args)) @@ -166,11 +167,13 @@ func (o *rotateSecretOptions) run() error { AccountCRName: o.accountCRName, Account: account, OsdManagedAdminUsername: o.osdManagedAdminUsername, + UpdateManagedAdminCreds: true, UpdateCcsCreds: o.updateCcsCreds, DryRun: o.dryRun, AwsClient: awsClient, HiveKubeClient: kubeCli, ManagedClusterClient: managedClient, + In: os.Stdin, Out: os.Stdout, }) } @@ -293,7 +296,7 @@ func (o *rotateSecretOptions) initHiveClient() (client.Client, error) { return nil, fmt.Errorf("failed to get hive cluster (OCM URL:'%s'): %w", resolvedURL, err) } - fmt.Printf("Connecting to hive cluster %s via OCM URL: %s\n", hive.Name(), resolvedURL) + fmt.Fprintf(os.Stderr, "Connecting to hive cluster %s via OCM URL: %s\n", hive.Name(), resolvedURL) elevationMsg := fmt.Sprintf("Elevation required to rotate secrets for %s", o.accountCRName) hiveClient, err := k8s.NewAsBackplaneClusterAdminWithConn( diff --git a/docs/README.md b/docs/README.md index 256d70532..541bdbf20 100644 --- a/docs/README.md +++ b/docs/README.md @@ -5,6 +5,9 @@ - `aao` - AWS Account Operator Debugging Utilities - `pool` - Get the status of the AWS Account Operator AccountPool - `account` - AWS Account related utilities + - `aws-creds` - Diagnose and manage AWS IAM credentials for a cluster + - `rotate -C --reason [flags]` - Rotate AWS IAM credentials for a cluster + - `snapshot -C --reason [flags]` - Show a read-only credential status report for a cluster - `clean-velero-snapshots` - Cleans up S3 buckets whose name start with managed-velero - `cli` - Generate temporary AWS CLI credentials on demand - `console` - Generate an AWS console URL on the fly @@ -24,7 +27,6 @@ - `list` - List out accounts for username - `unassign` - Unassign account to user - `reset ` - Reset AWS Account CR - - `rotate-secret ` - Rotate IAM credentials secret - `servicequotas` - Interact with AWS service-quotas - `describe` - Describe AWS service-quotas - `set ` - Set AWS Account CR status @@ -245,6 +247,125 @@ osdctl account [flags] -S, --skip-version-check skip checking to see if this is the most recent release ``` +### osdctl account aws-creds + +Subcommands for inspecting and rotating AWS IAM credentials, Hive secrets, and CredentialRequests. + +``` +osdctl account aws-creds [flags] +``` + +#### Flags + +``` + --as string Username to impersonate for the operation. User could be a regular user or a service account in a namespace. + --cluster string The name of the kubeconfig cluster to use + --context string The name of the kubeconfig context to use + -h, --help help for aws-creds + --insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure + --kubeconfig string Path to the kubeconfig file to use for CLI requests. + -o, --output string Valid formats are ['', 'json', 'yaml', 'env'] + --request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0") + -s, --server string The address and port of the Kubernetes API server + --skip-aws-proxy-check aws_proxy Don't use the configured aws_proxy value + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### osdctl account aws-creds rotate + +Rotates AWS IAM credentials for osdManagedAdmin and/or osdCcsAdmin users. +Runs a diagnostic snapshot first, then performs the rotation with +interactive confirmation. + +Use --refresh-secrets to only delete and recreate CredentialRequest secrets +without rotating AWS keys or modifying Hive secrets. This is useful when +CCO needs to re-provision secrets with existing credentials. + +AWS credentials are obtained via backplane by default, falling back to the +default AWS credential chain (env vars, ~/.aws/config). Use --aws-profile +to specify a named profile, or --aws-use-env to skip backplane and use +environment credentials directly (e.g. after rh-aws-saml-login). + +Pre-flight checks (IAM permissions, secret existence) block rotation by +default. Use --force to allow proceeding past errors with explicit YES +confirmation — only when you are certain the errors are benign. + +``` +osdctl account aws-creds rotate -C --reason [flags] +``` + +#### Flags + +``` + --admin-username string Override the osdManagedAdmin IAM username. Only needed if auto-detection fails (e.g. custom or legacy username) + --as string Username to impersonate for the operation. User could be a regular user or a service account in a namespace. + -p, --aws-profile string AWS profile for role chaining. If omitted, tries backplane then falls back to default AWS credential chain + --aws-use-env Use AWS credentials from environment variables (e.g. after rh-aws-saml-login), skipping backplane + --ccs-admin Rotate osdCcsAdmin credentials (CCS clusters only) + --cluster string The name of the kubeconfig cluster to use + -C, --cluster-id string (Required) OCM internal or external cluster ID + --context string The name of the kubeconfig context to use + --dry-run Preview rotation actions without making changes + --force Allow proceeding past pre-flight errors with YES confirmation. Use only when certain the errors are benign (e.g., known SCP restrictions that won't affect rotation) + -h, --help help for rotate + --hive-ocm-url string OCM environment for Hive operations (aliases: production, staging, integration) + --insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure + --kubeconfig string Path to the kubeconfig file to use for CLI requests. + -l, --log-level string Log level: debug, info, warn, error (default "info") + --managed-admin Rotate osdManagedAdmin credentials + -o, --output string Valid formats are ['', 'json', 'yaml', 'env'] + -r, --reason string (Required) Elevation reason, usually a Jira ticket ID + --refresh-secrets Only delete and recreate CredentialRequest secrets (no key rotation) + --request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0") + -s, --server string The address and port of the Kubernetes API server + --skip-aws-proxy-check aws_proxy Don't use the configured aws_proxy value + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### osdctl account aws-creds snapshot + +Produces a diagnostic report of AWS IAM credentials including: + - IAM access keys and which Hive secrets reference them + - CredentialRequest secrets and whether they need refresh + - IAM permission simulation (SCP/policy restriction detection) + +Use --cr-secrets to show only the CredentialRequest secrets table. + +This is a read-only operation — no credentials are modified. + +AWS credentials are obtained via backplane by default, falling back to the +default AWS credential chain (env vars, ~/.aws/config). Use --aws-profile +to specify a named profile, or --aws-use-env to skip backplane and use +environment credentials directly (e.g. after rh-aws-saml-login). + +``` +osdctl account aws-creds snapshot -C --reason [flags] +``` + +#### Flags + +``` + --admin-username string Override the osdManagedAdmin IAM username. Only needed if auto-detection fails (e.g. custom or legacy username) + --as string Username to impersonate for the operation. User could be a regular user or a service account in a namespace. + -p, --aws-profile string AWS profile for role chaining. If omitted, tries backplane then falls back to default AWS credential chain + --aws-use-env Use AWS credentials from environment variables (e.g. after rh-aws-saml-login), skipping backplane + --cluster string The name of the kubeconfig cluster to use + -C, --cluster-id string (Required) OCM internal or external cluster ID + --context string The name of the kubeconfig context to use + --cr-secrets Only show CredentialRequest secrets status + -h, --help help for snapshot + --hive-ocm-url string OCM environment for Hive operations (aliases: production, staging, integration) + --insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure + --kubeconfig string Path to the kubeconfig file to use for CLI requests. + -l, --log-level string Log level: debug, info, warn, error (default "info") + -o, --output string Valid formats are ['', 'json', 'yaml', 'env'] + -r, --reason string (Required) Elevation reason, usually a Jira ticket ID + --request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0") + -s, --server string The address and port of the Kubernetes API server + --skip-aws-proxy-check aws_proxy Don't use the configured aws_proxy value + -S, --skip-version-check skip checking to see if this is the most recent release +``` + ### osdctl account clean-velero-snapshots Cleans up S3 buckets whose name start with managed-velero @@ -773,37 +894,6 @@ osdctl account reset [flags] -S, --skip-version-check skip checking to see if this is the most recent release ``` -### osdctl account rotate-secret - -When logged into a hive shard, this rotates IAM credential secrets for a given `account` CR. - -``` -osdctl account rotate-secret [flags] -``` - -#### Flags - -``` - --admin-username osdManagedAdmin* The admin username to use for generating access keys. Must be in the format of osdManagedAdmin*. If not specified, this is inferred from the account CR. - --as string Username to impersonate for the operation. User could be a regular user or a service account in a namespace. - -p, --aws-profile string specify AWS profile - --ccs Also rotates osdCcsAdmin credential. Use caution. - --cluster string The name of the kubeconfig cluster to use - -C, --cluster-id string OCM internal/external cluster id or cluster name - --context string The name of the kubeconfig context to use - --dry-run Only print what actions would be taken without performing any mutations (no AWS key creation/deletion, no k8s resource changes) - -h, --help help for rotate-secret - --hive-ocm-url string (optional) OCM environment URL for Hive operations. Aliases: 'production', 'staging', 'integration'. This only changes how the Hive cluster is resolved; the target cluster still comes from the current/default OCM environment. - --insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure - --kubeconfig string Path to the kubeconfig file to use for CLI requests. - -o, --output string Valid formats are ['', 'json', 'yaml', 'env'] - --reason string The reason for this command, which requires elevation, to be run (usually an OHSS or PD ticket) - --request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0") - -s, --server string The address and port of the Kubernetes API server - --skip-aws-proxy-check aws_proxy Don't use the configured aws_proxy value - -S, --skip-version-check skip checking to see if this is the most recent release -``` - ### osdctl account servicequotas Interact with AWS service-quotas diff --git a/docs/osdctl_account.md b/docs/osdctl_account.md index d8d08abf1..e7ea1cedf 100644 --- a/docs/osdctl_account.md +++ b/docs/osdctl_account.md @@ -26,6 +26,7 @@ AWS Account related utilities ### SEE ALSO * [osdctl](osdctl.md) - OSD CLI +* [osdctl account aws-creds](osdctl_account_aws-creds.md) - Diagnose and manage AWS IAM credentials for a cluster * [osdctl account clean-velero-snapshots](osdctl_account_clean-velero-snapshots.md) - Cleans up S3 buckets whose name start with managed-velero * [osdctl account cli](osdctl_account_cli.md) - Generate temporary AWS CLI credentials on demand * [osdctl account console](osdctl_account_console.md) - Generate an AWS console URL on the fly @@ -34,7 +35,6 @@ AWS Account related utilities * [osdctl account list](osdctl_account_list.md) - List resources * [osdctl account mgmt](osdctl_account_mgmt.md) - AWS Account Management * [osdctl account reset](osdctl_account_reset.md) - Reset AWS Account CR -* [osdctl account rotate-secret](osdctl_account_rotate-secret.md) - Rotate IAM credentials secret * [osdctl account servicequotas](osdctl_account_servicequotas.md) - Interact with AWS service-quotas * [osdctl account set](osdctl_account_set.md) - Set AWS Account CR status * [osdctl account verify-secrets](osdctl_account_verify-secrets.md) - Verify AWS Account CR IAM User credentials diff --git a/docs/osdctl_account_aws-creds.md b/docs/osdctl_account_aws-creds.md new file mode 100644 index 000000000..f907d8647 --- /dev/null +++ b/docs/osdctl_account_aws-creds.md @@ -0,0 +1,35 @@ +## osdctl account aws-creds + +Diagnose and manage AWS IAM credentials for a cluster + +### Synopsis + +Subcommands for inspecting and rotating AWS IAM credentials, Hive secrets, and CredentialRequests. + +### Options + +``` + -h, --help help for aws-creds +``` + +### Options inherited from parent commands + +``` + --as string Username to impersonate for the operation. User could be a regular user or a service account in a namespace. + --cluster string The name of the kubeconfig cluster to use + --context string The name of the kubeconfig context to use + --insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure + --kubeconfig string Path to the kubeconfig file to use for CLI requests. + -o, --output string Valid formats are ['', 'json', 'yaml', 'env'] + --request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0") + -s, --server string The address and port of the Kubernetes API server + --skip-aws-proxy-check aws_proxy Don't use the configured aws_proxy value + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### SEE ALSO + +* [osdctl account](osdctl_account.md) - AWS Account related utilities +* [osdctl account aws-creds rotate](osdctl_account_aws-creds_rotate.md) - Rotate AWS IAM credentials for a cluster +* [osdctl account aws-creds snapshot](osdctl_account_aws-creds_snapshot.md) - Show a read-only credential status report for a cluster + diff --git a/docs/osdctl_account_aws-creds_rotate.md b/docs/osdctl_account_aws-creds_rotate.md new file mode 100644 index 000000000..fc00cf177 --- /dev/null +++ b/docs/osdctl_account_aws-creds_rotate.md @@ -0,0 +1,92 @@ +## osdctl account aws-creds rotate + +Rotate AWS IAM credentials for a cluster + +### Synopsis + +Rotates AWS IAM credentials for osdManagedAdmin and/or osdCcsAdmin users. +Runs a diagnostic snapshot first, then performs the rotation with +interactive confirmation. + +Use --refresh-secrets to only delete and recreate CredentialRequest secrets +without rotating AWS keys or modifying Hive secrets. This is useful when +CCO needs to re-provision secrets with existing credentials. + +AWS credentials are obtained via backplane by default, falling back to the +default AWS credential chain (env vars, ~/.aws/config). Use --aws-profile +to specify a named profile, or --aws-use-env to skip backplane and use +environment credentials directly (e.g. after rh-aws-saml-login). + +Pre-flight checks (IAM permissions, secret existence) block rotation by +default. Use --force to allow proceeding past errors with explicit YES +confirmation — only when you are certain the errors are benign. + +``` +osdctl account aws-creds rotate -C --reason [flags] +``` + +### Examples + +``` + # Rotate osdManagedAdmin credentials + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin + + # Rotate osdCcsAdmin credentials (CCS clusters only) + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --ccs-admin + + # Rotate both + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin --ccs-admin + + # Only refresh CredentialRequest secrets (no key rotation) + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --refresh-secrets + + # Dry-run: preview what would happen + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin --dry-run + + # Using rh-aws-saml-login credentials (no backplane) + kinit $USER@IPA.REDHAT.COM + eval $(rh-aws-saml-login --output env rhcontrol) + export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin --aws-use-env + + # With staging cluster and production hive + osdctl account aws-creds rotate -C $CLUSTER_ID --reason "$JIRA_TICKET" --managed-admin --hive-ocm-url production +``` + +### Options + +``` + --admin-username string Override the osdManagedAdmin IAM username. Only needed if auto-detection fails (e.g. custom or legacy username) + -p, --aws-profile string AWS profile for role chaining. If omitted, tries backplane then falls back to default AWS credential chain + --aws-use-env Use AWS credentials from environment variables (e.g. after rh-aws-saml-login), skipping backplane + --ccs-admin Rotate osdCcsAdmin credentials (CCS clusters only) + -C, --cluster-id string (Required) OCM internal or external cluster ID + --dry-run Preview rotation actions without making changes + --force Allow proceeding past pre-flight errors with YES confirmation. Use only when certain the errors are benign (e.g., known SCP restrictions that won't affect rotation) + -h, --help help for rotate + --hive-ocm-url string OCM environment for Hive operations (aliases: production, staging, integration) + -l, --log-level string Log level: debug, info, warn, error (default "info") + --managed-admin Rotate osdManagedAdmin credentials + -r, --reason string (Required) Elevation reason, usually a Jira ticket ID + --refresh-secrets Only delete and recreate CredentialRequest secrets (no key rotation) +``` + +### Options inherited from parent commands + +``` + --as string Username to impersonate for the operation. User could be a regular user or a service account in a namespace. + --cluster string The name of the kubeconfig cluster to use + --context string The name of the kubeconfig context to use + --insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure + --kubeconfig string Path to the kubeconfig file to use for CLI requests. + -o, --output string Valid formats are ['', 'json', 'yaml', 'env'] + --request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0") + -s, --server string The address and port of the Kubernetes API server + --skip-aws-proxy-check aws_proxy Don't use the configured aws_proxy value + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### SEE ALSO + +* [osdctl account aws-creds](osdctl_account_aws-creds.md) - Diagnose and manage AWS IAM credentials for a cluster + diff --git a/docs/osdctl_account_aws-creds_snapshot.md b/docs/osdctl_account_aws-creds_snapshot.md new file mode 100644 index 000000000..a3bc861e7 --- /dev/null +++ b/docs/osdctl_account_aws-creds_snapshot.md @@ -0,0 +1,76 @@ +## osdctl account aws-creds snapshot + +Show a read-only credential status report for a cluster + +### Synopsis + +Produces a diagnostic report of AWS IAM credentials including: + - IAM access keys and which Hive secrets reference them + - CredentialRequest secrets and whether they need refresh + - IAM permission simulation (SCP/policy restriction detection) + +Use --cr-secrets to show only the CredentialRequest secrets table. + +This is a read-only operation — no credentials are modified. + +AWS credentials are obtained via backplane by default, falling back to the +default AWS credential chain (env vars, ~/.aws/config). Use --aws-profile +to specify a named profile, or --aws-use-env to skip backplane and use +environment credentials directly (e.g. after rh-aws-saml-login). + +``` +osdctl account aws-creds snapshot -C --reason [flags] +``` + +### Examples + +``` + # Full credential status report (uses backplane) + osdctl account aws-creds snapshot -C $CLUSTER_ID --reason "$JIRA_TICKET" + + # Only show CredentialRequest secret status + osdctl account aws-creds snapshot -C $CLUSTER_ID --reason "$JIRA_TICKET" --cr-secrets + + # Using rh-aws-saml-login credentials (no backplane) + kinit $USER@IPA.REDHAT.COM + eval $(rh-aws-saml-login --output env rhcontrol) + export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN + osdctl account aws-creds snapshot -C $CLUSTER_ID --reason "$JIRA_TICKET" --aws-use-env + + # With staging cluster and production hive + osdctl account aws-creds snapshot -C $CLUSTER_ID --reason "$JIRA_TICKET" --hive-ocm-url production +``` + +### Options + +``` + --admin-username string Override the osdManagedAdmin IAM username. Only needed if auto-detection fails (e.g. custom or legacy username) + -p, --aws-profile string AWS profile for role chaining. If omitted, tries backplane then falls back to default AWS credential chain + --aws-use-env Use AWS credentials from environment variables (e.g. after rh-aws-saml-login), skipping backplane + -C, --cluster-id string (Required) OCM internal or external cluster ID + --cr-secrets Only show CredentialRequest secrets status + -h, --help help for snapshot + --hive-ocm-url string OCM environment for Hive operations (aliases: production, staging, integration) + -l, --log-level string Log level: debug, info, warn, error (default "info") + -r, --reason string (Required) Elevation reason, usually a Jira ticket ID +``` + +### Options inherited from parent commands + +``` + --as string Username to impersonate for the operation. User could be a regular user or a service account in a namespace. + --cluster string The name of the kubeconfig cluster to use + --context string The name of the kubeconfig context to use + --insecure-skip-tls-verify If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure + --kubeconfig string Path to the kubeconfig file to use for CLI requests. + -o, --output string Valid formats are ['', 'json', 'yaml', 'env'] + --request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0") + -s, --server string The address and port of the Kubernetes API server + --skip-aws-proxy-check aws_proxy Don't use the configured aws_proxy value + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### SEE ALSO + +* [osdctl account aws-creds](osdctl_account_aws-creds.md) - Diagnose and manage AWS IAM credentials for a cluster + diff --git a/docs/osdctl_account_rotate-secret.md b/docs/osdctl_account_rotate-secret.md index 4251c751a..2bff88b78 100644 --- a/docs/osdctl_account_rotate-secret.md +++ b/docs/osdctl_account_rotate-secret.md @@ -1,10 +1,21 @@ ## osdctl account rotate-secret -Rotate IAM credentials secret +Rotate IAM credentials secret (see also: osdctl account aws-creds) ### Synopsis -When logged into a hive shard, this rotates IAM credential secrets for a given `account` CR. +When logged into a hive shard, this rotates IAM credential secrets for a given account CR. + +NOTE: Consider using 'osdctl account aws-creds' instead. The newer command: + - Accepts a cluster ID (no need to look up the account CR name) + - Provides a diagnostic snapshot before rotation + - Handles account claim resolution automatically + - Supports interactive key management and CR secret health verification + - Validates IAM permissions via SimulatePrincipalPolicy + +Usage: + osdctl account aws-creds snapshot -C --reason + osdctl account aws-creds rotate -C --reason --managed-admin ``` osdctl account rotate-secret [flags] diff --git a/pkg/controller/awscreds.go b/pkg/controller/awscreds.go new file mode 100644 index 000000000..8059ef7b4 --- /dev/null +++ b/pkg/controller/awscreds.go @@ -0,0 +1,1217 @@ +package controller + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "strings" + "time" + + awsSdk "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/iam" + iamTypes "github.com/aws/aws-sdk-go-v2/service/iam/types" + "github.com/aws/aws-sdk-go-v2/service/sts" + "github.com/fatih/color" + awsv1alpha1 "github.com/openshift/aws-account-operator/api/v1alpha1" + ccov1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" + "github.com/openshift/osdctl/pkg/policies" + awsprovider "github.com/openshift/osdctl/pkg/provider/aws" + "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// AWSCredsInput holds all resolved dependencies needed for both diagnostics +// and rotation. The CLI layer resolves AWS and k8s clients before calling +// DiagnoseCredentials or RotateCredentials. +type AWSCredsInput struct { + ClusterID string + ClusterName string + ClusterExternalID string + IsCCS bool + AWSAccountID string + AccountCRName string + Account *awsv1alpha1.Account + AdminUsername string + AwsClient awsprovider.Client + HiveKubeClient client.Client + ManagedClient client.Client + Log *logrus.Logger + Out io.Writer +} + +type KeyStatus struct { + UserName string + AccessKeyID string + Age time.Duration + CreateDate time.Time + LastUsed string + Status string + HiveMatch bool +} + +type SecretStatus struct { + SecretName string + Namespace string + AccessKeyID string + MatchesAWS bool + Exists bool + ErrorMessage string +} + +type CredRequestStatus struct { + CredRequestName string + SecretName string + Namespace string + Age time.Duration + Exists bool + NeedsRecreation bool + ErrorMessage string +} + +type PermissionResult struct { + Action string + Allowed bool + Category string // "rotation" or "credreq" + RequestedBy []string // CR names that request this action (credreq category only) +} + +type Finding struct { + Severity string // "OK", "WARN", "FAIL" + Message string + Guidance string +} + +type DiagnosticReport struct { + ClusterID string + ClusterName string + ClusterExternalID string + IsCCS bool + AWSAccountID string + AccountCRName string + + ManagedAdminUser string + CcsAdminUser string + CallerARN string + CallerAccount string + + Keys []KeyStatus + Secrets []SecretStatus + CredRequests []CredRequestStatus + Permissions []PermissionResult + Findings []Finding + + AllPermissionsOK bool + AllSecretsInSync bool + RootKeyInSync bool + ClusterRootKeyID string + HiveAccountKeyID string +} + +// DiagnoseCredentials runs a full read-only diagnostic of IAM keys, Hive secrets, +// CredentialRequests, and IAM permissions, returning a structured report. +func DiagnoseCredentials(ctx context.Context, input *AWSCredsInput) (*DiagnosticReport, error) { + report := &DiagnosticReport{ + ClusterID: input.ClusterID, + ClusterName: input.ClusterName, + ClusterExternalID: input.ClusterExternalID, + IsCCS: input.IsCCS, + AWSAccountID: input.AWSAccountID, + AccountCRName: input.AccountCRName, + ManagedAdminUser: input.AdminUsername, + AllPermissionsOK: true, + AllSecretsInSync: true, + } + + if input.IsCCS { + report.CcsAdminUser = "osdCcsAdmin" + } + + log := getLog(input.Log) + + callerIdentity, err := input.AwsClient.GetCallerIdentity(&sts.GetCallerIdentityInput{}) + if err == nil && callerIdentity != nil { + if callerIdentity.Arn != nil { + report.CallerARN = *callerIdentity.Arn + } + if callerIdentity.Account != nil { + report.CallerAccount = *callerIdentity.Account + } + } else { + log.WithError(err).Warn("Could not determine AWS caller identity") + fmt.Fprintf(input.Out, " %s Could not determine AWS caller identity: %v\n", colorYellow("[WARN]"), err) + } + + hiveSecretKeyIDs := map[string]string{} + + if err := diagnoseHiveSecrets(ctx, input, report, hiveSecretKeyIDs); err != nil { + return nil, fmt.Errorf("hive secret diagnostics failed: %w", err) + } + + if err := diagnoseIAMKeys(ctx, input, report, hiveSecretKeyIDs); err != nil { + return nil, fmt.Errorf("IAM key diagnostics failed: %w", err) + } + + if err := diagnoseCredentialRequests(ctx, input, report); err != nil { + return nil, fmt.Errorf("credential request diagnostics failed: %w", err) + } + + if err := diagnosePermissions(ctx, input, report); err != nil { + return nil, fmt.Errorf("IAM permission check failed: %w", err) + } + + return report, nil +} + +// diagnoseHiveSecrets checks each expected Hive secret and records its access key ID for cross-referencing with IAM keys. +func diagnoseHiveSecrets(ctx context.Context, input *AWSCredsInput, report *DiagnosticReport, keyIDs map[string]string) error { + secretChecks := []struct { + name string + namespace string + }{ + {input.AccountCRName + "-secret", awsAccountNamespace}, + {"aws", input.Account.Spec.ClaimLinkNamespace}, + } + + if input.IsCCS { + secretChecks = append(secretChecks, struct { + name string + namespace string + }{"byoc", input.Account.Spec.ClaimLinkNamespace}) + } + + for _, sc := range secretChecks { + ss := SecretStatus{ + SecretName: sc.name, + Namespace: sc.namespace, + } + + secret := &corev1.Secret{} + err := input.HiveKubeClient.Get(ctx, types.NamespacedName{ + Name: sc.name, + Namespace: sc.namespace, + }, secret) + + if err != nil { + if apierrors.IsNotFound(err) { + ss.Exists = false + ss.ErrorMessage = "not found" + report.AllSecretsInSync = false + report.Findings = append(report.Findings, Finding{ + Severity: "FAIL", + Message: fmt.Sprintf("Hive secret %s/%s not found", sc.namespace, sc.name), + Guidance: fmt.Sprintf("Secret may need to be recreated. Check hive namespace %s for the expected secret.", sc.namespace), + }) + } else { + ss.ErrorMessage = err.Error() + report.AllSecretsInSync = false + report.Findings = append(report.Findings, Finding{ + Severity: "FAIL", + Message: fmt.Sprintf("Hive secret %s/%s read failed: %v", sc.namespace, sc.name, err), + Guidance: "Could not read the secret — this may be a transient API error or RBAC issue, not a missing secret.", + }) + } + report.Secrets = append(report.Secrets, ss) + continue + } + + ss.Exists = true + if keyID, ok := secret.Data["aws_access_key_id"]; ok { + ss.AccessKeyID = string(keyID) + keyIDs[sc.name] = string(keyID) + } + + report.Secrets = append(report.Secrets, ss) + } + + return nil +} + +// diagnoseIAMKeys lists IAM access keys for the admin (and CCS) users and +// cross-references them against Hive secret key IDs to detect sync mismatches. +func diagnoseIAMKeys(ctx context.Context, input *AWSCredsInput, report *DiagnosticReport, hiveSecretKeyIDs map[string]string) error { + accountSecretKeyID := hiveSecretKeyIDs[input.AccountCRName+"-secret"] + + adminUser := input.AdminUsername + listOutput, err := input.AwsClient.ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String(adminUser), + }) + if err != nil && isNoSuchEntity(err) && adminUser != osdManagedAdminIAM { + fmt.Fprintf(input.Out, " IAM user %s not found, trying %s...\n", adminUser, osdManagedAdminIAM) + adminUser = osdManagedAdminIAM + listOutput, err = input.AwsClient.ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String(adminUser), + }) + } + if err != nil { + if isNoSuchEntity(err) { + report.Findings = append(report.Findings, Finding{ + Severity: "FAIL", + Message: fmt.Sprintf("IAM user %s not found in AWS account %s", adminUser, input.AWSAccountID), + Guidance: "Neither the suffixed nor unsuffixed osdManagedAdmin user exists.\n Use --admin-username to specify the correct IAM username.", + }) + } else { + return fmt.Errorf("failed to list access keys for %s: %w", adminUser, err) + } + } + report.ManagedAdminUser = adminUser + + users := []string{adminUser} + if input.IsCCS { + users = append(users, "osdCcsAdmin") + } + + for _, username := range users { + var keyList *iam.ListAccessKeysOutput + if username == adminUser { + if listOutput == nil { + continue + } + keyList = listOutput + } else { + keyList, err = input.AwsClient.ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String(username), + }) + if err != nil { + if isNoSuchEntity(err) { + report.Findings = append(report.Findings, Finding{ + Severity: "FAIL", + Message: fmt.Sprintf("IAM user %s does not exist in AWS account %s", username, input.AWSAccountID), + Guidance: "Verify the correct username. Check the Account CR iamUserId label.", + }) + continue + } + return fmt.Errorf("failed to list access keys for %s: %w", username, err) + } + } + + var matchKeyID string + if username == "osdCcsAdmin" { + matchKeyID = hiveSecretKeyIDs["byoc"] + } else { + matchKeyID = accountSecretKeyID + } + + for _, key := range keyList.AccessKeyMetadata { + if key.AccessKeyId == nil { + continue + } + ks := KeyStatus{ + UserName: username, + AccessKeyID: *key.AccessKeyId, + Status: string(key.Status), + LastUsed: "N/A", + } + if key.CreateDate != nil { + ks.CreateDate = *key.CreateDate + ks.Age = time.Since(*key.CreateDate) + } + if matchKeyID != "" && *key.AccessKeyId == matchKeyID { + ks.HiveMatch = true + } + lastUsedOutput, luErr := input.AwsClient.GetAccessKeyLastUsed(ctx, &iam.GetAccessKeyLastUsedInput{ + AccessKeyId: key.AccessKeyId, + }) + if luErr == nil && lastUsedOutput != nil && lastUsedOutput.AccessKeyLastUsed != nil && lastUsedOutput.AccessKeyLastUsed.LastUsedDate != nil { + ks.LastUsed = formatAge(time.Since(*lastUsedOutput.AccessKeyLastUsed.LastUsedDate)) + " ago" + } + report.Keys = append(report.Keys, ks) + } + + keyCount := len(keyList.AccessKeyMetadata) + if keyCount >= 2 { + msg := fmt.Sprintf("%s has %d access keys (max 2). Review which key should be deleted before rotation.", username, keyCount) + report.Findings = append(report.Findings, Finding{ + Severity: "INFO", + Message: msg, + }) + } + + hasMatch := false + for _, key := range keyList.AccessKeyMetadata { + if matchKeyID != "" && *key.AccessKeyId == matchKeyID { + hasMatch = true + break + } + } + if matchKeyID != "" && !hasMatch { + report.AllSecretsInSync = false + report.Findings = append(report.Findings, Finding{ + Severity: "FAIL", + Message: fmt.Sprintf("Hive secret key ID %s for user %s does not match any active AWS access key", truncateKeyID(matchKeyID), username), + Guidance: fmt.Sprintf("Credentials are out of sync. Rotate to fix:\n osdctl account aws-creds rotate -C %s --reason --managed-admin", report.ClusterID), + }) + } + } + + for i := range report.Secrets { + ss := &report.Secrets[i] + if !ss.Exists || ss.AccessKeyID == "" { + continue + } + found := false + for _, ks := range report.Keys { + if ks.AccessKeyID == ss.AccessKeyID { + found = true + ss.MatchesAWS = true + break + } + } + if !found { + ss.MatchesAWS = false + report.AllSecretsInSync = false + } + } + + return nil +} + +// diagnoseCredentialRequests inspects CredentialRequest secrets on the managed cluster +// and flags any that need refresh based on whether the cluster's root credentials +// (kube-system/aws-creds) match the current Hive account secret. +func diagnoseCredentialRequests(ctx context.Context, input *AWSCredsInput, report *DiagnosticReport) error { + if input.ManagedClient == nil { + return nil + } + + crList := &ccov1.CredentialsRequestList{} + if err := input.ManagedClient.List(ctx, crList); err != nil { + report.Findings = append(report.Findings, Finding{ + Severity: "WARN", + Message: fmt.Sprintf("Failed to list CredentialRequests: %v", err), + Guidance: "Ensure you have access to the managed cluster. Try: oc get credentialsrequests -A", + }) + return nil + } + + // Find the Hive account secret key ID for the managed-admin user + var hiveKeyID string + for _, s := range report.Secrets { + if strings.HasSuffix(s.SecretName, "-secret") && s.AccessKeyID != "" { + hiveKeyID = s.AccessKeyID + break + } + } + report.HiveAccountKeyID = hiveKeyID + + // Read the root credential secret from the managed cluster (kube-system/aws-creds) + // This is the secret CCO uses as root credentials, synced from Hive via SyncSet. + rootSecret := &corev1.Secret{} + if err := input.ManagedClient.Get(ctx, types.NamespacedName{Name: "aws-creds", Namespace: "kube-system"}, rootSecret); err != nil { + if apierrors.IsNotFound(err) { + report.Findings = append(report.Findings, Finding{ + Severity: "WARN", + Message: "Root credential secret kube-system/aws-creds not found on managed cluster", + Guidance: "This secret is synced from Hive via SyncSet. If missing, rotation may not have completed properly.", + }) + } else { + report.Findings = append(report.Findings, Finding{ + Severity: "WARN", + Message: fmt.Sprintf("Could not read root credential secret kube-system/aws-creds: %v", err), + Guidance: "This may be a transient API error or RBAC issue.", + }) + } + } else if clusterKeyID, ok := rootSecret.Data["aws_access_key_id"]; ok { + report.ClusterRootKeyID = string(clusterKeyID) + } + + report.RootKeyInSync = report.ClusterRootKeyID != "" && hiveKeyID != "" && report.ClusterRootKeyID == hiveKeyID + needsRefresh := hiveKeyID != "" && !report.RootKeyInSync + + for i := range crList.Items { + cr := &crList.Items[i] + if !isAWSCredentialRequestForDiagnostic(cr) { + continue + } + + crs := CredRequestStatus{ + CredRequestName: cr.Name, + SecretName: cr.Spec.SecretRef.Name, + Namespace: cr.Spec.SecretRef.Namespace, + } + + secret := &corev1.Secret{} + err := input.ManagedClient.Get(ctx, types.NamespacedName{ + Name: cr.Spec.SecretRef.Name, + Namespace: cr.Spec.SecretRef.Namespace, + }, secret) + if err != nil { + if apierrors.IsNotFound(err) { + crs.Exists = false + crs.ErrorMessage = "not found" + report.Findings = append(report.Findings, Finding{ + Severity: "WARN", + Message: fmt.Sprintf("Credential secret %s/%s (from CredentialRequest %s) not found", cr.Spec.SecretRef.Namespace, cr.Spec.SecretRef.Name, cr.Name), + Guidance: "CCO should recreate this secret. If it persists, check CCO logs:\n oc logs -n openshift-cloud-credential-operator deploy/cloud-credential-operator", + }) + } else { + crs.ErrorMessage = err.Error() + report.Findings = append(report.Findings, Finding{ + Severity: "WARN", + Message: fmt.Sprintf("Credential secret %s/%s read failed: %v", cr.Spec.SecretRef.Namespace, cr.Spec.SecretRef.Name, err), + Guidance: "Could not read the secret — this may be a transient API error or RBAC issue.", + }) + } + } else { + crs.Exists = true + if !secret.CreationTimestamp.Time.IsZero() { + crs.Age = time.Since(secret.CreationTimestamp.Time) + } + if needsRefresh { + crs.NeedsRecreation = true + } + } + + report.CredRequests = append(report.CredRequests, crs) + } + + return nil +} + +// isAWSCredentialRequestForDiagnostic is a broader filter for diagnostics — includes all +// CredentialRequests with AWSProviderSpec regardless of name prefix. +func isAWSCredentialRequestForDiagnostic(cr *ccov1.CredentialsRequest) bool { + if !strings.HasPrefix(cr.Namespace, "openshift") { + return false + } + if cr.Spec.ProviderSpec == nil || cr.Spec.ProviderSpec.Raw == nil { + return false + } + var provider struct { + Kind string `json:"kind"` + } + if err := json.Unmarshal(cr.Spec.ProviderSpec.Raw, &provider); err != nil { + return false + } + return provider.Kind == "AWSProviderSpec" +} + +// ccsSCPActions are representative actions from each service required by the +// OSD CCS minimum SCP (docs.redhat.com/en/documentation/openshift_dedicated/4/html/planning_your_environment/aws-ccs#ccs-aws-scp_aws-ccs). +// These go beyond CredentialRequest-defined permissions and cover the broader +// service access osdCcsAdmin needs as the parent user (e.g., creating IAM users, +// managing autoscaling, CloudWatch monitoring, KMS, Route53, etc.). +var ccsSCPActions = []string{ + "autoscaling:CreateAutoScalingGroup", + "autoscaling:DescribeAutoScalingGroups", + "autoscaling:UpdateAutoScalingGroup", + "autoscaling:DeleteAutoScalingGroup", + "cloudwatch:GetMetricData", + "cloudwatch:PutMetricAlarm", + "cloudwatch:DescribeAlarms", + "events:PutRule", + "events:DescribeRule", + "events:PutTargets", + "logs:CreateLogGroup", + "logs:PutLogEvents", + "logs:DescribeLogGroups", + "support:DescribeTrustedAdvisorChecks", + "kms:CreateKey", + "kms:DescribeKey", + "kms:Encrypt", + "kms:Decrypt", + "sts:AssumeRole", + "sts:GetCallerIdentity", + "tag:GetResources", + "tag:TagResources", + "route53:CreateHostedZone", + "route53:ChangeResourceRecordSets", + "route53:ListHostedZones", + "servicequotas:ListServices", + "servicequotas:GetServiceQuota", + "servicequotas:RequestServiceQuotaIncrease", +} + +// diagnosePermissions uses SimulatePrincipalPolicy to verify the admin and CCS users +// have the IAM permissions required for rotation and CredentialRequest fulfillment. +func diagnosePermissions(ctx context.Context, input *AWSCredsInput, report *DiagnosticReport) error { + // Use the resolved username (may have fallen back from suffixed to unsuffixed) + adminUser := report.ManagedAdminUser + if adminUser == "" { + adminUser = input.AdminUsername + } + userArn := fmt.Sprintf("arn:aws:iam::%s:user/%s", input.AWSAccountID, adminUser) + + if err := simulateActions(input.AwsClient, userArn, RotationRequiredActions, "rotation", nil, report); err != nil { + return err + } + + actionToCRs, err := extractCredReqActions(ctx, input) + if err != nil { + report.AllPermissionsOK = false + report.Findings = append(report.Findings, Finding{ + Severity: "WARN", + Message: fmt.Sprintf("Could not enumerate CredentialRequest IAM actions: %v", err), + Guidance: "CredentialRequest permission simulation was skipped. Re-run once managed-cluster access is healthy, or use --force to proceed.", + }) + return nil + } + if len(actionToCRs) > 0 { + var credReqActions []string + for action := range actionToCRs { + credReqActions = append(credReqActions, action) + } + if err := simulateActions(input.AwsClient, userArn, credReqActions, "credreq", actionToCRs, report); err != nil { + return err + } + } + + if input.IsCCS { + ccsArn := fmt.Sprintf("arn:aws:iam::%s:user/osdCcsAdmin", input.AWSAccountID) + + ccsActions := make([]string, len(RotationRequiredActions)) + copy(ccsActions, RotationRequiredActions) + if len(actionToCRs) > 0 { + for action := range actionToCRs { + ccsActions = append(ccsActions, action) + } + } + seen := map[string]bool{} + var dedupedActions []string + for _, a := range ccsActions { + if !seen[a] { + seen[a] = true + dedupedActions = append(dedupedActions, a) + } + } + if err := simulateActions(input.AwsClient, ccsArn, dedupedActions, "ccsadmin", nil, report); err != nil { + return err + } + + if err := simulateActions(input.AwsClient, ccsArn, ccsSCPActions, "ccs-scp", nil, report); err != nil { + return err + } + } + + if !report.AllPermissionsOK { + var denied []string + for _, p := range report.Permissions { + if !p.Allowed { + denied = append(denied, p.Action) + } + } + report.Findings = append(report.Findings, Finding{ + Severity: "FAIL", + Message: fmt.Sprintf("Insufficient IAM permissions. Denied: %s", strings.Join(denied, ", ")), + Guidance: "An SCP or IAM policy is restricting required actions. Contact the customer to resolve policy restrictions before rotating credentials.", + }) + } + + return nil +} + +// simulateActions runs SimulatePrincipalPolicy for a set of IAM actions and records allow/deny results in the report. +func simulateActions(awsClient awsprovider.Client, policySourceArn string, actions []string, category string, actionToCRs map[string][]string, report *DiagnosticReport) error { + output, err := awsClient.SimulatePrincipalPolicy(&iam.SimulatePrincipalPolicyInput{ + PolicySourceArn: awsSdk.String(policySourceArn), + ActionNames: actions, + }) + if err != nil { + report.Findings = append(report.Findings, Finding{ + Severity: "WARN", + Message: fmt.Sprintf("Failed to simulate IAM permissions (%s): %v", category, err), + Guidance: "Permission check could not be completed. This may indicate insufficient access to run SimulatePrincipalPolicy.", + }) + report.AllPermissionsOK = false + return nil + } + + for _, result := range output.EvaluationResults { + if result.EvalActionName == nil { + continue + } + allowed := result.EvalDecision == iamTypes.PolicyEvaluationDecisionTypeAllowed + pr := PermissionResult{ + Action: *result.EvalActionName, + Allowed: allowed, + Category: category, + } + if actionToCRs != nil { + pr.RequestedBy = actionToCRs[*result.EvalActionName] + } + report.Permissions = append(report.Permissions, pr) + if !allowed { + report.AllPermissionsOK = false + } + } + + return nil +} + +// extractCredReqActions collects all IAM actions declared in AWSProviderSpec across +// CredentialRequests, mapping each action to the CR names that require it. +func extractCredReqActions(ctx context.Context, input *AWSCredsInput) (map[string][]string, error) { + if input.ManagedClient == nil { + return nil, nil + } + + actionToCRs := map[string][]string{} + seen := map[string]map[string]bool{} + + crList := &ccov1.CredentialsRequestList{} + if err := input.ManagedClient.List(ctx, crList); err != nil { + return nil, fmt.Errorf("failed to list CredentialRequests for permission check: %w", err) + } + + for i := range crList.Items { + cr := &crList.Items[i] + if !isAWSCredentialRequestForDiagnostic(cr) { + continue + } + awsSpec, err := policies.GetAWSProviderSpec(cr) + if err != nil { + continue + } + for _, stmt := range awsSpec.StatementEntries { + for _, action := range stmt.Action { + if seen[action] == nil { + seen[action] = map[string]bool{} + } + if !seen[action][cr.Name] { + seen[action][cr.Name] = true + actionToCRs[action] = append(actionToCRs[action], cr.Name) + } + } + } + } + + return actionToCRs, nil +} + +// RenderReport formats the diagnostic report as a human-readable table. +func RenderReport(report *DiagnosticReport, out io.Writer) { + div := "==========================================================================" + + renderPermissionSummary(report, out) + fmt.Fprintf(out, "\n%s\n", div) + fmt.Fprintf(out, " AWS Account: %s Account CR: %s\n", report.AWSAccountID, report.AccountCRName) + if report.CallerARN != "" { + fmt.Fprintf(out, " AWS Caller: %s\n", report.CallerARN) + } + fmt.Fprintf(out, "%s\n", div) + RenderCredRequestTable(report, out) + fmt.Fprintf(out, "\n%s\n", div) + renderCredentialsTable(report, out) + fmt.Fprintf(out, "\n%s\n", div) + renderSummary(report, out) + fmt.Fprintf(out, "%s\n\n", div) +} + +func underline(width int) string { + return strings.Repeat("-", width) +} + +// renderCredentialsTable prints the IAM access keys table with Hive secret cross-references and sync status. +func renderCredentialsTable(report *DiagnosticReport, out io.Writer) { + fmt.Fprintf(out, "\nAWS Credentials\n") + fmt.Fprintf(out, " %s\n\n", colorBlue("IAM access keys and their corresponding Hive secrets which reference them.")) + + if len(report.Keys) == 0 { + fmt.Fprintf(out, " (no keys found)\n") + return + } + + secretsByKeyID := map[string][]string{} + for _, s := range report.Secrets { + if s.Exists && s.AccessKeyID != "" { + secretsByKeyID[s.AccessKeyID] = append(secretsByKeyID[s.AccessKeyID], s.SecretName) + } + } + + fmt.Fprintf(out, " %-24s %-16s %-6s %-10s %-8s %-34s %s\n", "USER", "KEY ID", "AGE", "LAST USED", "STATUS", "REF'd by HIVE SECRET(S)", "SYNC") + fmt.Fprintf(out, " %-24s %-16s %-6s %-10s %-8s %-34s %s\n", underline(24), underline(16), underline(6), underline(10), underline(8), underline(34), underline(6)) + + for _, k := range report.Keys { + ageStr := formatAge(k.Age) + secretNames := secretsByKeyID[k.AccessKeyID] + secretStr := colorBlue("(not referenced by this cluster)") + syncStr := "--" + if len(secretNames) > 0 { + secretStr = strings.Join(secretNames, ", ") + syncStr = colorGreen("OK") + } + if k.HiveMatch && len(secretNames) == 0 { + syncStr = colorRed("??") + } + visibleSecretStr := secretStr + if len(secretNames) > 0 { + visibleSecretStr = truncate(secretStr, 38) + } + fmt.Fprintf(out, " %-24s %-16s %-6s %-10s %-8s %-34s %s\n", + truncate(k.UserName, 24), + truncateKeyID(k.AccessKeyID), + ageStr, + k.LastUsed, + k.Status, + visibleSecretStr, + syncStr, + ) + } + + for _, s := range report.Secrets { + if !s.Exists { + fmt.Fprintf(out, "\n %s Hive secret %s/%s not found\n", colorRed("[FAIL]"), s.Namespace, s.SecretName) + } else if !s.MatchesAWS && s.AccessKeyID != "" { + fmt.Fprintf(out, "\n %s Hive secret %s holds key %s which does not match any active AWS key\n", + colorRed("[FAIL]"), s.SecretName, truncateKeyID(s.AccessKeyID)) + } + } + +} + +// DiagnoseCRSecrets produces a lightweight report of just the CredentialRequest +// secrets, comparing the cluster's root credential (kube-system/aws-creds) +// against the Hive account secret to detect staleness. +func DiagnoseCRSecrets(ctx context.Context, hiveClient client.Client, managedClient client.Client, accountCRName string, account *awsv1alpha1.Account, out io.Writer) (*DiagnosticReport, error) { + report := &DiagnosticReport{ + AWSAccountID: account.Spec.AwsAccountID, + AccountCRName: accountCRName, + IsCCS: account.Spec.BYOC, + } + + secretName := accountCRName + "-secret" + accountSecret := &corev1.Secret{} + if err := hiveClient.Get(ctx, types.NamespacedName{Name: secretName, Namespace: awsAccountNamespace}, accountSecret); err != nil { + fmt.Fprintf(out, " %s Could not read Hive account secret %s/%s: %v\n", colorYellow("[WARN]"), awsAccountNamespace, secretName, err) + fmt.Fprintf(out, " CR secret staleness detection will be skipped.\n\n") + } else { + if keyID, ok := accountSecret.Data["aws_access_key_id"]; ok { + report.HiveAccountKeyID = string(keyID) + fmt.Fprintf(out, " Active key in Hive: %s\n", truncateKeyID(report.HiveAccountKeyID)) + } + } + + // Read root credential from managed cluster to compare against Hive + rootSecret := &corev1.Secret{} + if err := managedClient.Get(ctx, types.NamespacedName{Name: "aws-creds", Namespace: "kube-system"}, rootSecret); err != nil { + if apierrors.IsNotFound(err) { + fmt.Fprintf(out, " %s Root credential secret kube-system/aws-creds not found on cluster\n", colorYellow("[WARN]")) + } else { + fmt.Fprintf(out, " %s Could not read kube-system/aws-creds: %v\n", colorYellow("[WARN]"), err) + } + } else if clusterKeyID, ok := rootSecret.Data["aws_access_key_id"]; ok { + report.ClusterRootKeyID = string(clusterKeyID) + fmt.Fprintf(out, " Active key on cluster: %s\n", truncateKeyID(report.ClusterRootKeyID)) + } + + report.RootKeyInSync = report.ClusterRootKeyID != "" && report.HiveAccountKeyID != "" && report.ClusterRootKeyID == report.HiveAccountKeyID + needsRefresh := report.HiveAccountKeyID != "" && !report.RootKeyInSync + + if report.RootKeyInSync { + fmt.Fprintf(out, " %s Cluster root credential matches Hive\n\n", colorGreen("[OK]")) + } else if report.HiveAccountKeyID != "" { + fmt.Fprintf(out, " %s Cluster root credential does NOT match Hive — CR secrets need refresh\n\n", colorYellow("[WARN]")) + } else { + fmt.Fprintln(out) + } + + crList := &ccov1.CredentialsRequestList{} + if err := managedClient.List(ctx, crList); err != nil { + return nil, fmt.Errorf("failed to list CredentialRequests: %w", err) + } + + for i := range crList.Items { + cr := &crList.Items[i] + if !isAWSCredentialRequestForDiagnostic(cr) { + continue + } + + crs := CredRequestStatus{ + CredRequestName: cr.Name, + SecretName: cr.Spec.SecretRef.Name, + Namespace: cr.Spec.SecretRef.Namespace, + } + + secret := &corev1.Secret{} + err := managedClient.Get(ctx, types.NamespacedName{ + Name: cr.Spec.SecretRef.Name, + Namespace: cr.Spec.SecretRef.Namespace, + }, secret) + if err != nil { + if apierrors.IsNotFound(err) { + crs.Exists = false + crs.ErrorMessage = "not found" + } else { + crs.ErrorMessage = fmt.Sprintf("read failed: %v", err) + } + } else { + crs.Exists = true + if !secret.CreationTimestamp.Time.IsZero() { + crs.Age = time.Since(secret.CreationTimestamp.Time) + } + if needsRefresh { + crs.NeedsRecreation = true + } + } + + report.CredRequests = append(report.CredRequests, crs) + } + + return report, nil +} + +// RenderCredRequestTable outputs the CredentialRequest secrets table. +func RenderCredRequestTable(report *DiagnosticReport, out io.Writer) { + fmt.Fprintf(out, "\nCredential Request Secrets (managed cluster)\n") + fmt.Fprintf(out, " %s\n", colorBlue("Secrets referenced by AWS CredentialRequests. After rotation, these")) + fmt.Fprintf(out, " %s\n", colorBlue("secrets are to be deleted so CCO recreates them with the new credentials.")) + + if report.HiveAccountKeyID != "" && report.ClusterRootKeyID != "" { + if report.RootKeyInSync { + fmt.Fprintf(out, " %s\n", colorBlue(fmt.Sprintf("Cluster root credential (kube-system/aws-creds) matches Hive (%s).", truncateKeyID(report.HiveAccountKeyID)))) + } else { + fmt.Fprintf(out, " %s\n", colorYellow(fmt.Sprintf("Cluster root credential (%s) does NOT match Hive (%s) — secrets need refresh.", + truncateKeyID(report.ClusterRootKeyID), truncateKeyID(report.HiveAccountKeyID)))) + } + } else if report.HiveAccountKeyID != "" && report.ClusterRootKeyID == "" { + fmt.Fprintf(out, " %s\n", colorYellow("Could not read cluster root credential (kube-system/aws-creds) — staleness unknown.")) + } + fmt.Fprintf(out, "\n") + + if len(report.CredRequests) == 0 { + fmt.Fprintf(out, " (no credential requests found or managed cluster not connected)\n") + return + } + + staleCount := 0 + for _, cr := range report.CredRequests { + if cr.NeedsRecreation { + staleCount++ + } + } + + fmt.Fprintf(out, " %-40s %-28s %-24s %-8s %s\n", "CREDENTIAL REQUEST", "NAMESPACE", "SECRET", "AGE", "KEY STATUS") + fmt.Fprintf(out, " %-40s %-28s %-24s %-8s %s\n", underline(40), underline(28), underline(24), underline(8), underline(14)) + for _, cr := range report.CredRequests { + ageStr := "--" + statusStr := colorGreen("CURRENT") + if !cr.Exists { + ageStr = "--" + statusStr = colorRed("MISSING") + } else { + if cr.Age > 0 { + ageStr = formatAge(cr.Age) + } + if cr.NeedsRecreation { + statusStr = colorYellow("NEEDS REFRESH") + } + } + fmt.Fprintf(out, " %-40s %-28s %-24s %-8s %s\n", + truncate(cr.CredRequestName, 40), + truncate(cr.Namespace, 28), + truncate(cr.SecretName, 24), + ageStr, + statusStr, + ) + } + + if staleCount > 0 { + fmt.Fprintf(out, "\n %s %s\n", colorBlue("[INFO]"), colorBlue(fmt.Sprintf("%d secret(s) marked NEEDS REFRESH — cluster root credential is out of sync", staleCount))) + fmt.Fprintf(out, " %s\n", colorBlue("with Hive. These will be deleted/recreated during rotation.")) + } + + printInlineFindings(report.Findings, out, "Credential") +} + +// renderPermissionSummary prints the SimulatePrincipalPolicy results grouped by category (rotation, credreq, CCS, SCP). +func renderPermissionSummary(report *DiagnosticReport, out io.Writer) { + fmt.Fprintf(out, "\nIAM Permission Check (SimulatePrincipalPolicy)\n") + fmt.Fprintf(out, " %s\n", colorBlue("Uses SimulatePrincipalPolicy to detect SCP or IAM policy restrictions")) + fmt.Fprintf(out, " %s\n", colorBlue("that would block rotation or CCO credential provisioning.")) + if report.CallerARN != "" { + fmt.Fprintf(out, " %s\n", colorBlue(fmt.Sprintf("AWS caller (used by this tool): %s", report.CallerARN))) + } + fmt.Fprintf(out, "\n") + + if len(report.Permissions) == 0 { + fmt.Fprintf(out, " (permission check was not performed)\n") + return + } + + hasRotation := false + hasCredReq := false + hasCcsAdmin := false + hasCcsSCP := false + for _, p := range report.Permissions { + switch p.Category { + case "rotation": + hasRotation = true + case "credreq": + hasCredReq = true + case "ccsadmin": + hasCcsAdmin = true + case "ccs-scp": + hasCcsSCP = true + } + } + + fmt.Fprintf(out, " %s\n\n", colorBlue(fmt.Sprintf("SimulatePrincipalPolicy for user: %s", report.ManagedAdminUser))) + + if hasRotation { + fmt.Fprintf(out, " %s\n", colorBlue("Rotation tooling (IAM key management):")) + fmt.Fprintf(out, " %-30s %s\n", "ACTION", "RESULT") + fmt.Fprintf(out, " %-30s %s\n", underline(30), underline(10)) + for _, p := range report.Permissions { + if p.Category != "rotation" { + continue + } + result := colorGreen("Allow") + if !p.Allowed { + result = colorRed("DENIED") + } + fmt.Fprintf(out, " %-30s %s\n", p.Action, result) + } + } + + if hasCredReq { + fmt.Fprintf(out, "\n %s\n", colorBlue("CredentialRequest actions (required by cluster operators):")) + fmt.Fprintf(out, " %-42s %-10s %s\n", "ACTION", "RESULT", "CREDENTIAL REQUEST(S)") + fmt.Fprintf(out, " %-42s %-10s %s\n", underline(42), underline(10), underline(40)) + for _, p := range report.Permissions { + if p.Category != "credreq" { + continue + } + result := colorGreen("Allow") + if !p.Allowed { + result = colorRed("DENIED") + } + crNames := "--" + if len(p.RequestedBy) > 0 { + crNames = strings.Join(p.RequestedBy, ", ") + } + fmt.Fprintf(out, " %-42s %-10s %s\n", p.Action, result, crNames) + } + } + + if hasCcsAdmin || hasCcsSCP { + fmt.Fprintf(out, "\n %s\n", colorBlue("SimulatePrincipalPolicy for user: osdCcsAdmin (parent user, manages osdManagedAdmin)")) + } + + if hasCcsAdmin { + fmt.Fprintf(out, "\n %s\n", colorBlue("Rotation + CredentialRequest actions:")) + fmt.Fprintf(out, " %-42s %s\n", "ACTION", "RESULT") + fmt.Fprintf(out, " %-42s %s\n", underline(42), underline(10)) + + allCcsOK := true + for _, p := range report.Permissions { + if p.Category != "ccsadmin" { + continue + } + result := colorGreen("Allow") + if !p.Allowed { + result = colorRed("DENIED") + allCcsOK = false + } + fmt.Fprintf(out, " %-42s %s\n", p.Action, result) + } + if allCcsOK { + fmt.Fprintf(out, "\n %s osdCcsAdmin has required rotation and CR permissions.\n", colorGreen("[OK]")) + } else { + fmt.Fprintf(out, "\n %s osdCcsAdmin is missing required permissions.\n", colorRed("[FAIL]")) + } + } + + if hasCcsSCP { + fmt.Fprintf(out, "\n %s\n", colorBlue("Additional OSD CCS required services (SCP coverage):")) + fmt.Fprintf(out, " %s\n", colorBlue("Ref: docs.redhat.com/en/documentation/openshift_dedicated/4/html/planning_your_environment/aws-ccs#ccs-aws-scp_aws-ccs")) + fmt.Fprintf(out, " %-42s %s\n", "ACTION", "RESULT") + fmt.Fprintf(out, " %-42s %s\n", underline(42), underline(10)) + + allScpOK := true + for _, p := range report.Permissions { + if p.Category != "ccs-scp" { + continue + } + result := colorGreen("Allow") + if !p.Allowed { + result = colorRed("DENIED") + allScpOK = false + } + fmt.Fprintf(out, " %-42s %s\n", p.Action, result) + } + if allScpOK { + fmt.Fprintf(out, "\n %s SCP allows all required OSD CCS services.\n", colorGreen("[OK]")) + } else { + fmt.Fprintf(out, "\n %s SCP may be restricting required services. Review with customer.\n", colorRed("[FAIL]")) + } + } + + if report.AllPermissionsOK { + fmt.Fprintf(out, "\n %s No SCP or IAM policy restrictions detected.\n", colorGreen("[OK]")) + } else { + fmt.Fprintf(out, "\n %s Some required IAM actions are denied. See findings below.\n", colorRed("[FAIL]")) + } +} + +// renderSummary prints the final findings section with pass/fail verdicts for permissions, secrets, and CRs. +func renderSummary(report *DiagnosticReport, out io.Writer) { + fmt.Fprintf(out, "\n Summary:\n") + + rotationPermsOK := true + credReqPermsOK := true + ccsPermsOK := true + scpPermsOK := true + hasRotation := false + hasCredReq := false + hasCcs := false + hasScp := false + for _, p := range report.Permissions { + switch p.Category { + case "rotation": + hasRotation = true + if !p.Allowed { + rotationPermsOK = false + } + case "credreq": + hasCredReq = true + if !p.Allowed { + credReqPermsOK = false + } + case "ccsadmin": + hasCcs = true + if !p.Allowed { + ccsPermsOK = false + } + case "ccs-scp": + hasScp = true + if !p.Allowed { + scpPermsOK = false + } + } + } + + if hasRotation || hasCredReq { + if rotationPermsOK && credReqPermsOK { + fmt.Fprintf(out, " %s %s has adequate permissions to fulfill CRs.\n", colorGreen("[OK]"), report.ManagedAdminUser) + } else { + if !rotationPermsOK { + fmt.Fprintf(out, " %s %s is missing rotation tooling permissions.\n", colorRed("[FAIL]"), report.ManagedAdminUser) + } + if !credReqPermsOK { + fmt.Fprintf(out, " %s %s is missing CredentialRequest permissions.\n", colorRed("[FAIL]"), report.ManagedAdminUser) + } + } + } + + if hasCcs { + if ccsPermsOK { + fmt.Fprintf(out, " %s osdCcsAdmin has adequate permissions.\n", colorGreen("[OK]")) + } else { + fmt.Fprintf(out, " %s osdCcsAdmin is missing required permissions.\n", colorRed("[FAIL]")) + } + } + + if hasScp { + if scpPermsOK { + fmt.Fprintf(out, " %s SCP allows all required OSD CCS services.\n", colorGreen("[OK]")) + } else { + fmt.Fprintf(out, " %s SCP may be restricting required services.\n", colorRed("[FAIL]")) + } + } + + if len(report.CredRequests) > 0 { + if report.RootKeyInSync { + fmt.Fprintf(out, " %s Cluster root credential matches Hive — CR secrets are current.\n", colorGreen("[OK]")) + } else if report.HiveAccountKeyID != "" && report.ClusterRootKeyID != "" { + staleCount := 0 + for _, cr := range report.CredRequests { + if cr.NeedsRecreation { + staleCount++ + } + } + fmt.Fprintf(out, " %s Cluster root credential is out of sync — %d CR secret(s) need refresh.\n", colorYellow("[WARN]"), staleCount) + } else if report.HiveAccountKeyID != "" { + fmt.Fprintf(out, " %s Could not verify cluster root credential sync status.\n", colorYellow("[WARN]")) + } + } + + for _, f := range report.Findings { + var tag string + switch f.Severity { + case "FAIL": + tag = colorRed("[FAIL]") + case "WARN": + tag = colorYellow("[WARN]") + case "INFO": + tag = colorBlue("[INFO]") + default: + continue + } + fmt.Fprintf(out, " %s %s\n", tag, f.Message) + if f.Guidance != "" { + for line := range strings.SplitSeq(f.Guidance, "\n") { + fmt.Fprintf(out, " -> %s\n", line) + } + } + } + + if report.AllPermissionsOK && report.AllSecretsInSync && report.RootKeyInSync { + fmt.Fprintf(out, " No issues found during pre-rotation checks.\n") + } +} + +// printInlineFindings prints findings whose message contains the given keyword, used for contextual inline output. +func printInlineFindings(findings []Finding, out io.Writer, keyword string) { + for _, f := range findings { + if strings.Contains(strings.ToLower(f.Message), strings.ToLower(keyword)) { + fmt.Fprintf(out, "\n [%s] %s\n", f.Severity, f.Message) + if f.Guidance != "" { + for line := range strings.SplitSeq(f.Guidance, "\n") { + fmt.Fprintf(out, " -> %s\n", line) + } + } + } + } +} + +func truncateKeyID(keyID string) string { + if len(keyID) <= 8 { + return keyID + } + return keyID[:4] + "..." + keyID[len(keyID)-4:] +} + +func truncate(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + return s[:maxLen-2] + ".." +} + +func formatAge(d time.Duration) string { + days := int(d.Hours() / 24) + if days > 0 { + return fmt.Sprintf("%dd", days) + } + hours := int(d.Hours()) + if hours > 0 { + return fmt.Sprintf("%dh", hours) + } + return fmt.Sprintf("%dm", int(d.Minutes())) +} + +// isNoSuchEntity returns true if the error is an IAM NoSuchEntityException. +func isNoSuchEntity(err error) bool { + var nse *iamTypes.NoSuchEntityException + return errors.As(err, &nse) +} + +var ( + colorGreen = color.New(color.FgGreen).SprintFunc() + colorYellow = color.New(color.FgYellow).SprintFunc() + colorRed = color.New(color.FgRed).SprintFunc() + colorBlue = color.New(color.FgCyan).SprintFunc() + + defaultLog = logrus.New() +) + +func getLog(l *logrus.Logger) *logrus.Logger { + if l != nil { + return l + } + return defaultLog +} diff --git a/pkg/controller/awscreds_test.go b/pkg/controller/awscreds_test.go new file mode 100644 index 000000000..829046d04 --- /dev/null +++ b/pkg/controller/awscreds_test.go @@ -0,0 +1,1993 @@ +package controller + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "strings" + "testing" + "time" + + awsSdk "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/iam" + iamTypes "github.com/aws/aws-sdk-go-v2/service/iam/types" + "github.com/aws/aws-sdk-go-v2/service/sts" + ccov1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" + mock_aws "github.com/openshift/osdctl/pkg/provider/aws/mock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" +) + +func clusterRootSecret(keyID string) *corev1.Secret { + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws-creds", Namespace: "kube-system"}, + Data: map[string][]byte{"aws_access_key_id": []byte(keyID)}, + } +} + +func mockGetCallerIdentity(mockClient *mock_aws.MockClient) { + mockClient.EXPECT().GetCallerIdentity(gomock.Any()).Return(&sts.GetCallerIdentityOutput{ + Arn: awsSdk.String("arn:aws:sts::123456789012:assumed-role/test-role/session"), + Account: awsSdk.String("123456789012"), + UserId: awsSdk.String("AROA1234:session"), + }, nil).AnyTimes() +} + +func mockGetAccessKeyLastUsed(mockClient *mock_aws.MockClient) { + mockClient.EXPECT().GetAccessKeyLastUsed(gomock.Any(), gomock.Any()).Return(&iam.GetAccessKeyLastUsedOutput{ + AccessKeyLastUsed: &iamTypes.AccessKeyLastUsed{}, + }, nil).AnyTimes() +} + +func diagInput(t *testing.T, mockClient *mock_aws.MockClient, hiveObjs []runtime.Object, managedObjs []runtime.Object) *AWSCredsInput { + t.Helper() + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + account := testAccount(false, false) + + hiveClient := fake.NewClientBuilder(). + WithScheme(testScheme(t)). + WithRuntimeObjects(append(hiveObjs, account)...). + Build() + + var managedClient = testManagedClient(t, managedObjs...) + + return &AWSCredsInput{ + ClusterID: "test-cluster-id", + ClusterName: "test-cluster", + ClusterExternalID: "ext-12345", + IsCCS: false, + AWSAccountID: "123456789012", + AccountCRName: "test-account", + Account: account, + AdminUsername: "osdManagedAdmin-abcd", + AwsClient: mockClient, + HiveKubeClient: hiveClient, + ManagedClient: managedClient, + Out: &bytes.Buffer{}, + } +} + +func TestDiagnoseCredentials_AllHealthy(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + input := diagInput(t, mockClient, secrets, nil) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.True(t, report.AllPermissionsOK) + assert.True(t, report.AllSecretsInSync) + assert.Len(t, report.Keys, 1) + assert.True(t, report.Keys[0].HiveMatch) + assert.Len(t, report.Secrets, 2) + for _, s := range report.Secrets { + assert.True(t, s.Exists) + assert.True(t, s.MatchesAWS) + } + + failFindings := 0 + for _, f := range report.Findings { + if f.Severity == "FAIL" { + failFindings++ + } + } + assert.Equal(t, 0, failFindings) +} + +func TestDiagnoseCredentials_KeyMismatch(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIANEWKEY99999"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAOLDKEY00000")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAOLDKEY00000")}, + }, + } + + input := diagInput(t, mockClient, secrets, nil) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.False(t, report.AllSecretsInSync) + + hasMismatchFinding := false + for _, f := range report.Findings { + if f.Severity == "FAIL" && strings.Contains(f.Message, "does not match") { + hasMismatchFinding = true + } + } + assert.True(t, hasMismatchFinding, "expected a FAIL finding about key mismatch") +} + +func TestDiagnoseCredentials_MaxKeys(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + oldKey := now.Add(-90 * 24 * time.Hour) + newKey := now.Add(-1 * 24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAOLDKEY00000"), + Status: iamTypes.StatusTypeActive, + CreateDate: &oldKey, + }, + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIANEWKEY99999"), + Status: iamTypes.StatusTypeActive, + CreateDate: &newKey, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIANEWKEY99999")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIANEWKEY99999")}, + }, + } + + input := diagInput(t, mockClient, secrets, nil) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Len(t, report.Keys, 2) + + hasMaxKeyInfo := false + for _, f := range report.Findings { + if f.Severity == "INFO" && strings.Contains(f.Message, "max 2") { + hasMaxKeyInfo = true + } + } + assert.True(t, hasMaxKeyInfo, "expected INFO finding about max keys") +} + +func TestDiagnoseCredentials_PermissionDenied(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockClient.EXPECT().SimulatePrincipalPolicy(gomock.Any()).Return(&iam.SimulatePrincipalPolicyOutput{ + EvaluationResults: []iamTypes.EvaluationResult{ + {EvalActionName: awsSdk.String("iam:CreateAccessKey"), EvalDecision: iamTypes.PolicyEvaluationDecisionTypeAllowed}, + {EvalActionName: awsSdk.String("iam:DeleteAccessKey"), EvalDecision: "implicitDeny"}, + {EvalActionName: awsSdk.String("iam:ListAccessKeys"), EvalDecision: iamTypes.PolicyEvaluationDecisionTypeAllowed}, + }, + }, nil) + + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + input := diagInput(t, mockClient, secrets, nil) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.False(t, report.AllPermissionsOK) + + hasPermFinding := false + for _, f := range report.Findings { + if f.Severity == "FAIL" && strings.Contains(f.Message, "Insufficient IAM permissions") { + hasPermFinding = true + assert.Contains(t, f.Message, "iam:DeleteAccessKey") + } + } + assert.True(t, hasPermFinding, "expected FAIL finding about denied permissions") +} + +func TestDiagnoseCredentials_MissingSecret(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + // Only provide one secret — the "aws" secret in the CD namespace is missing + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + input := diagInput(t, mockClient, secrets, nil) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.False(t, report.AllSecretsInSync) + + hasMissingFinding := false + for _, f := range report.Findings { + if f.Severity == "FAIL" && strings.Contains(f.Message, "not found") { + hasMissingFinding = true + } + } + assert.True(t, hasMissingFinding, "expected FAIL finding about missing secret") +} + +func TestDiagnoseCredentials_WithCredentialRequests(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + clusterRootSecret("AKIAEXAMPLE1234"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-image-registry", + Namespace: credentialRequestNamespace, + }, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{ + Name: "installer-cloud-credentials", + Namespace: "openshift-image-registry", + }, + ProviderSpec: &runtime.RawExtension{Raw: providerSpec}, + }, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "installer-cloud-credentials", + Namespace: "openshift-image-registry", + CreationTimestamp: metav1.NewTime(now.Add(-48 * time.Hour)), + }, + }, + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Len(t, report.CredRequests, 1) + assert.True(t, report.CredRequests[0].Exists) + assert.Equal(t, "openshift-image-registry", report.CredRequests[0].CredRequestName) + assert.True(t, report.RootKeyInSync, "root key should be in sync when cluster matches hive") + assert.False(t, report.CredRequests[0].NeedsRecreation, "CR should be CURRENT when root key matches") +} + +func TestDiagnoseCredentials_MissingCredentialSecret(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + clusterRootSecret("AKIAEXAMPLE1234"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-ingress-operator", + Namespace: credentialRequestNamespace, + }, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{ + Name: "cloud-credentials", + Namespace: "openshift-ingress-operator", + }, + ProviderSpec: &runtime.RawExtension{Raw: providerSpec}, + }, + }, + // No secret created — simulates missing credential secret + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Len(t, report.CredRequests, 1) + assert.False(t, report.CredRequests[0].Exists) + + hasWarn := false + for _, f := range report.Findings { + if f.Severity == "WARN" && strings.Contains(f.Message, "cloud-credentials") { + hasWarn = true + assert.Contains(t, f.Guidance, "CCO") + } + } + assert.True(t, hasWarn, "expected WARN about missing credential secret") +} + +func TestDiagnoseCredentials_CCSCluster(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + // Expect ListAccessKeys called for both osdManagedAdmin and osdCcsAdmin + mockClient.EXPECT().ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String("osdManagedAdmin-abcd"), + }).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAMANAGED0001"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockClient.EXPECT().ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String("osdCcsAdmin"), + }).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdCcsAdmin"), + AccessKeyId: awsSdk.String("AKIACCSADM00001"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + account := testAccount(true, false) + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAMANAGED0001")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAMANAGED0001")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "byoc", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIACCSADM00001")}, + }, + } + + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + managedClient := testManagedClient(t) + + input := &AWSCredsInput{ + ClusterID: "test-cluster-id", + ClusterName: "test-ccs-cluster", + ClusterExternalID: "ext-ccs-12345", + IsCCS: true, + AWSAccountID: "123456789012", + AccountCRName: "test-account", + Account: account, + AdminUsername: "osdManagedAdmin-abcd", + AwsClient: mockClient, + HiveKubeClient: hiveClient, + ManagedClient: managedClient, + Out: &bytes.Buffer{}, + } + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Equal(t, "osdCcsAdmin", report.CcsAdminUser) + assert.Len(t, report.Secrets, 3) // account-secret, aws, byoc + assert.Len(t, report.Keys, 2) // managed + ccs + assert.True(t, report.AllSecretsInSync) + assert.True(t, report.AllPermissionsOK) +} + +func TestRenderReport_OutputFormat(t *testing.T) { + report := &DiagnosticReport{ + ClusterID: "abc123", + ClusterName: "my-cluster", + ClusterExternalID: "ext-99999", + IsCCS: true, + AWSAccountID: "123456789012", + AccountCRName: "test-account", + ManagedAdminUser: "osdManagedAdmin-abcd", + CcsAdminUser: "osdCcsAdmin", + Keys: []KeyStatus{ + {UserName: "osdManagedAdmin-abcd", AccessKeyID: "AKIAEXAMPLE1234", Age: 48 * time.Hour, Status: "Active", HiveMatch: true}, + }, + Secrets: []SecretStatus{ + {SecretName: "test-account-secret", Namespace: "aws-account-operator", AccessKeyID: "AKIAEXAMPLE1234", Exists: true, MatchesAWS: true}, + {SecretName: "aws", Namespace: "uhc-production-test", AccessKeyID: "AKIAEXAMPLE1234", Exists: true, MatchesAWS: true}, + }, + CredRequests: []CredRequestStatus{ + {CredRequestName: "openshift-image-registry", SecretName: "cloud-creds", Namespace: "openshift-image-registry", Age: 48 * time.Hour, Exists: true}, + }, + Permissions: []PermissionResult{{Action: "iam:CreateAccessKey", Allowed: true}}, + AllPermissionsOK: true, + AllSecretsInSync: true, + RootKeyInSync: true, + HiveAccountKeyID: "AKIAEXAMPLE1234", + ClusterRootKeyID: "AKIAEXAMPLE1234", + } + + var buf bytes.Buffer + RenderReport(report, &buf) + output := buf.String() + + assert.Contains(t, output, "AWS Account: 123456789012") + assert.Contains(t, output, "Account CR: test-account") + assert.Contains(t, output, "AWS Credentials") + assert.Contains(t, output, "REF'd by HIVE SECRET(S)") + assert.Contains(t, output, "Credential Request Secrets") + assert.Contains(t, output, "IAM Permission Check") + assert.Contains(t, output, "No issues found during pre-rotation checks") +} + +func TestRenderReport_WithFindings(t *testing.T) { + report := &DiagnosticReport{ + ClusterID: "abc123", + ClusterName: "my-cluster", + ClusterExternalID: "ext-99999", + IsCCS: false, + AWSAccountID: "123456789012", + AccountCRName: "test-account", + ManagedAdminUser: "osdManagedAdmin-abcd", + Keys: []KeyStatus{}, + Secrets: []SecretStatus{}, + AllPermissionsOK: true, + AllSecretsInSync: false, + Findings: []Finding{ + {Severity: "FAIL", Message: "Hive secret aws/uhc-production-test not found", Guidance: "Check hive namespace"}, + {Severity: "WARN", Message: "osdManagedAdmin-abcd has 2 access keys (max 2)", Guidance: "Delete the old key"}, + }, + } + + var buf bytes.Buffer + RenderReport(report, &buf) + output := buf.String() + + assert.Contains(t, output, "[FAIL]") + assert.Contains(t, output, "[WARN]") + assert.Contains(t, output, "Hive secret aws/uhc-production-test not found") + assert.Contains(t, output, "2 access keys (max 2)") +} + +func TestFormatAge(t *testing.T) { + tests := []struct { + duration time.Duration + expected string + }{ + {48 * time.Hour, "2d"}, + {3 * time.Hour, "3h"}, + {30 * time.Minute, "30m"}, + {90 * 24 * time.Hour, "90d"}, + } + for _, tt := range tests { + assert.Equal(t, tt.expected, formatAge(tt.duration)) + } +} + +func TestTruncateKeyID(t *testing.T) { + assert.Equal(t, "AKIA...1234", truncateKeyID("AKIAEXAMPLE1234")) + assert.Equal(t, "short", truncateKeyID("short")) + assert.Equal(t, "", truncateKeyID("")) +} + +func TestDiagnoseCredentials_AdminUsernameFallback(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + nse := &iamTypes.NoSuchEntityException{Message: awsSdk.String("user not found")} + mockClient.EXPECT().ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String("osdManagedAdmin-abcd"), + }).Return(nil, nse) + + mockClient.EXPECT().ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String("osdManagedAdmin"), + }).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + input := diagInput(t, mockClient, secrets, nil) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Equal(t, "osdManagedAdmin", report.ManagedAdminUser) + assert.Len(t, report.Keys, 1) + assert.True(t, report.AllPermissionsOK) +} + +func TestDiagnoseCredentials_AdminUsernameBothNotFound(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + + nse := &iamTypes.NoSuchEntityException{Message: awsSdk.String("user not found")} + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(nil, nse).Times(2) + + mockSimulateAllAllowed(mockClient) + + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + input := diagInput(t, mockClient, secrets, nil) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + hasFinding := false + for _, f := range report.Findings { + if f.Severity == "FAIL" && strings.Contains(f.Message, "not found") { + hasFinding = true + assert.Contains(t, f.Guidance, "--admin-username") + } + } + assert.True(t, hasFinding, "expected FAIL finding about admin user not found") +} + +func TestDiagnoseCredentials_NilManagedClient(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + account := testAccount(false, false) + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + input := &AWSCredsInput{ + ClusterID: "test-cluster-id", + ClusterName: "test-cluster", + ClusterExternalID: "ext-12345", + IsCCS: false, + AWSAccountID: "123456789012", + AccountCRName: "test-account", + Account: account, + AdminUsername: "osdManagedAdmin-abcd", + AwsClient: mockClient, + HiveKubeClient: hiveClient, + ManagedClient: nil, + Out: &bytes.Buffer{}, + } + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Empty(t, report.CredRequests) + assert.True(t, report.AllPermissionsOK) +} + +func TestDiagnoseCredentials_StaleCredentialSecrets(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-1 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + // Cluster root secret has OLD key — doesn't match Hive + clusterRootSecret("AKIAOLDCLUSTER00"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-image-registry", + Namespace: "openshift-cloud-credential-operator", + }, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{ + Name: "cloud-credentials", + Namespace: "openshift-image-registry", + }, + ProviderSpec: &runtime.RawExtension{Raw: providerSpec}, + }, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cloud-credentials", + Namespace: "openshift-image-registry", + CreationTimestamp: metav1.NewTime(now.Add(-30 * 24 * time.Hour)), + }, + }, + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + require.Len(t, report.CredRequests, 1) + assert.False(t, report.RootKeyInSync, "root key should NOT be in sync when cluster has old key") + assert.True(t, report.CredRequests[0].NeedsRecreation, "CR should need refresh when root key is out of sync") +} + +func TestDiagnoseCredentials_CallerIdentityInReport(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetAccessKeyLastUsed(mockClient) + + mockClient.EXPECT().GetCallerIdentity(gomock.Any()).Return(&sts.GetCallerIdentityOutput{ + Arn: awsSdk.String("arn:aws:sts::123456789012:assumed-role/ManagedOpenShift-Support-abcd/session"), + Account: awsSdk.String("123456789012"), + UserId: awsSdk.String("AROA1234:session"), + }, nil) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + account := testAccount(false, false) + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(append(secrets, account)...).Build() + + input := &AWSCredsInput{ + ClusterID: "test-cluster-id", + ClusterName: "test-cluster", + ClusterExternalID: "ext-12345", + IsCCS: false, + AWSAccountID: "123456789012", + AccountCRName: "test-account", + Account: account, + AdminUsername: "osdManagedAdmin-abcd", + AwsClient: mockClient, + HiveKubeClient: hiveClient, + ManagedClient: testManagedClient(t), + Out: &bytes.Buffer{}, + } + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Equal(t, "arn:aws:sts::123456789012:assumed-role/ManagedOpenShift-Support-abcd/session", report.CallerARN) + assert.Equal(t, "123456789012", report.CallerAccount) + + var buf bytes.Buffer + RenderReport(report, &buf) + assert.Contains(t, buf.String(), "ManagedOpenShift-Support-abcd") +} + +func TestDiagnoseCredentials_CredReqNamespaceFilter(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + // Should be included: openshift namespace prefix + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "aws-ebs-csi", Namespace: "openshift-cluster-csi-drivers"}, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{Name: "ebs-creds", Namespace: "openshift-cluster-csi-drivers"}, + ProviderSpec: &runtime.RawExtension{Raw: providerSpec}, + }, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "ebs-creds", Namespace: "openshift-cluster-csi-drivers", + CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour))}, + }, + // Should be excluded: non-openshift namespace + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "customer-cr", Namespace: "customer-namespace"}, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{Name: "customer-secret", Namespace: "customer-namespace"}, + ProviderSpec: &runtime.RawExtension{Raw: providerSpec}, + }, + }, + // Should be excluded: GCP provider + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "gcp-cr", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{Name: "gcp-secret", Namespace: "openshift-gcp"}, + ProviderSpec: &runtime.RawExtension{Raw: mustMarshal(t, map[string]string{"kind": "GCPProviderSpec"})}, + }, + }, + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Len(t, report.CredRequests, 1, "should only include AWS CRs in openshift-* namespaces") + assert.Equal(t, "aws-ebs-csi", report.CredRequests[0].CredRequestName) +} + +func TestDiagnoseCredentials_SimulateAPIError(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + { + UserName: awsSdk.String("osdManagedAdmin-abcd"), + AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), + Status: iamTypes.StatusTypeActive, + CreateDate: &keyCreated, + }, + }, + }, nil) + + mockClient.EXPECT().SimulatePrincipalPolicy(gomock.Any()).Return(nil, fmt.Errorf("access denied")).AnyTimes() + + secrets := []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + + account := testAccount(false, false) + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(append(secrets, account)...).Build() + + input := &AWSCredsInput{ + ClusterID: "test-cluster-id", + ClusterName: "test-cluster", + ClusterExternalID: "ext-12345", + IsCCS: false, + AWSAccountID: "123456789012", + AccountCRName: "test-account", + Account: account, + AdminUsername: "osdManagedAdmin-abcd", + AwsClient: mockClient, + HiveKubeClient: hiveClient, + ManagedClient: testManagedClient(t), + Out: &bytes.Buffer{}, + } + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.False(t, report.AllPermissionsOK) + + hasWarn := false + for _, f := range report.Findings { + if f.Severity == "WARN" && strings.Contains(f.Message, "Failed to simulate") { + hasWarn = true + } + } + assert.True(t, hasWarn, "expected WARN about simulate API failure") +} + +func mustMarshal(t *testing.T, v any) []byte { + t.Helper() + b, err := json.Marshal(v) + require.NoError(t, err) + return b +} + +func TestDiagnoseCRSecrets_Healthy(t *testing.T) { + now := time.Now() + account := testAccount(false, false) + + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-account-secret", + Namespace: awsAccountNamespace, + CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour)), + }, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + clusterRootSecret("AKIAEXAMPLE1234"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, + ProviderSpec: &runtime.RawExtension{Raw: providerSpec}, + }, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cloud-creds", + Namespace: "openshift-ingress-operator", + CreationTimestamp: metav1.NewTime(now.Add(-30 * time.Minute)), + }, + }, + } + managedClient := testManagedClient(t, managedObjs...) + + var buf bytes.Buffer + report, err := DiagnoseCRSecrets(context.TODO(), hiveClient, managedClient, "test-account", account, &buf) + require.NoError(t, err) + + require.Len(t, report.CredRequests, 1) + assert.True(t, report.CredRequests[0].Exists) + assert.True(t, report.RootKeyInSync, "root key should be in sync") + assert.False(t, report.CredRequests[0].NeedsRecreation, "CR should be CURRENT when root key matches") +} + +func TestDiagnoseCRSecrets_Stale(t *testing.T) { + now := time.Now() + account := testAccount(false, false) + + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-account-secret", + Namespace: awsAccountNamespace, + CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour)), + }, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + // Cluster root secret has OLD key — doesn't match Hive + clusterRootSecret("AKIAOLDKEY00000"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, + ProviderSpec: &runtime.RawExtension{Raw: providerSpec}, + }, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cloud-creds", + Namespace: "openshift-ingress-operator", + CreationTimestamp: metav1.NewTime(now.Add(-7 * 24 * time.Hour)), + }, + }, + } + managedClient := testManagedClient(t, managedObjs...) + + var buf bytes.Buffer + report, err := DiagnoseCRSecrets(context.TODO(), hiveClient, managedClient, "test-account", account, &buf) + require.NoError(t, err) + + require.Len(t, report.CredRequests, 1) + assert.False(t, report.RootKeyInSync, "root key should NOT be in sync") + assert.True(t, report.CredRequests[0].NeedsRecreation, "CR should need refresh when root key is out of sync") +} + +func TestDiagnoseCRSecrets_MissingSecret(t *testing.T) { + now := time.Now() + account := testAccount(false, false) + + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-account-secret", + Namespace: awsAccountNamespace, + CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour)), + }, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}, + }, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + clusterRootSecret("AKIAEXAMPLE1234"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{ + SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, + ProviderSpec: &runtime.RawExtension{Raw: providerSpec}, + }, + }, + } + managedClient := testManagedClient(t, managedObjs...) + + var buf bytes.Buffer + report, err := DiagnoseCRSecrets(context.TODO(), hiveClient, managedClient, "test-account", account, &buf) + require.NoError(t, err) + + require.Len(t, report.CredRequests, 1) + assert.False(t, report.CredRequests[0].Exists) +} + +// --- Test helpers for injecting errors into fake clients --- + +func errorOnGetClient(t *testing.T, errNS, errName string, objs ...runtime.Object) client.Client { + t.Helper() + underlying := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(objs...).Build() + return interceptor.NewClient(underlying, interceptor.Funcs{ + Get: func(ctx context.Context, c client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if key.Namespace == errNS && key.Name == errName { + return fmt.Errorf("simulated API error for %s/%s", errNS, errName) + } + return c.Get(ctx, key, obj, opts...) + }, + }) +} + +func errorOnListClient(t *testing.T, objs ...runtime.Object) client.Client { + t.Helper() + underlying := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(objs...).Build() + return interceptor.NewClient(underlying, interceptor.Funcs{ + List: func(ctx context.Context, c client.WithWatch, list client.ObjectList, opts ...client.ListOption) error { + return fmt.Errorf("simulated List API error") + }, + }) +} + +// --- Category A: Root secret edge cases --- + +func TestDiagnoseCredentials_RootSecretMissingOnCluster(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + // No kube-system/aws-creds — root secret missing + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour))}}, + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Equal(t, "AKIAEXAMPLE1234", report.HiveAccountKeyID) + assert.Empty(t, report.ClusterRootKeyID) + assert.False(t, report.RootKeyInSync) + require.Len(t, report.CredRequests, 1) + assert.True(t, report.CredRequests[0].NeedsRecreation, "CR should need refresh when root secret is missing") + + hasWarn := false + for _, f := range report.Findings { + if f.Severity == "WARN" && strings.Contains(f.Message, "kube-system/aws-creds not found") { + hasWarn = true + } + } + assert.True(t, hasWarn, "should have WARN finding about missing root secret") +} + +func TestDiagnoseCredentials_RootSecretReadError(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + account := testAccount(false, false) + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour))}}, + } + // Error on Get for kube-system/aws-creds, all other Gets pass + managedClient := errorOnGetClient(t, "kube-system", "aws-creds", managedObjs...) + + input := &AWSCredsInput{ + ClusterID: "test-cluster-id", ClusterName: "test-cluster", ClusterExternalID: "ext-12345", + AWSAccountID: "123456789012", AccountCRName: "test-account", Account: account, + AdminUsername: "osdManagedAdmin-abcd", AwsClient: mockClient, HiveKubeClient: hiveClient, + ManagedClient: managedClient, Out: &bytes.Buffer{}, + } + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Empty(t, report.ClusterRootKeyID) + assert.False(t, report.RootKeyInSync) + require.Len(t, report.CredRequests, 1) + assert.True(t, report.CredRequests[0].NeedsRecreation) + + hasWarn := false + for _, f := range report.Findings { + if f.Severity == "WARN" && strings.Contains(f.Message, "Could not read root credential secret") { + hasWarn = true + } + } + assert.True(t, hasWarn, "should have WARN finding about root secret read error") +} + +func TestDiagnoseCredentials_RootSecretMissingKeyField(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + // Root secret exists but has no aws_access_key_id field + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws-creds", Namespace: "kube-system"}, Data: map[string][]byte{"aws_secret_access_key": []byte("somesecret")}}, + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour))}}, + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Empty(t, report.ClusterRootKeyID, "should be empty when aws_access_key_id field missing") + assert.False(t, report.RootKeyInSync) + require.Len(t, report.CredRequests, 1) + assert.True(t, report.CredRequests[0].NeedsRecreation) +} + +func TestDiagnoseCredentials_BothKeysEmpty(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + // Hive secret exists but has no aws_access_key_id + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{}}, + } + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + // Cluster root secret also has no aws_access_key_id + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws-creds", Namespace: "kube-system"}, Data: map[string][]byte{}}, + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour))}}, + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + assert.Empty(t, report.HiveAccountKeyID) + assert.Empty(t, report.ClusterRootKeyID) + assert.False(t, report.RootKeyInSync) + require.Len(t, report.CredRequests, 1) + assert.False(t, report.CredRequests[0].NeedsRecreation, "CR should NOT need refresh when both keys are empty (graceful degradation)") +} + +// --- Category B: DiagnoseCRSecrets edge cases --- + +func TestDiagnoseCRSecrets_HiveSecretReadFailure(t *testing.T) { + account := testAccount(false, false) + + // Hive client that errors on the account secret + hiveClient := errorOnGetClient(t, awsAccountNamespace, "test-account-secret", account) + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + clusterRootSecret("AKIAEXAMPLE1234"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(time.Now().Add(-30 * time.Minute))}}, + } + managedClient := testManagedClient(t, managedObjs...) + + var buf bytes.Buffer + report, err := DiagnoseCRSecrets(context.TODO(), hiveClient, managedClient, "test-account", account, &buf) + require.NoError(t, err) + + assert.Empty(t, report.HiveAccountKeyID) + assert.Contains(t, buf.String(), "[WARN]") + assert.Contains(t, buf.String(), "Could not read Hive account secret") + require.Len(t, report.CredRequests, 1) + assert.False(t, report.CredRequests[0].NeedsRecreation, "should not flag when Hive key unknown") +} + +func TestDiagnoseCRSecrets_RootSecretMissing(t *testing.T) { + now := time.Now() + account := testAccount(false, false) + + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + // No kube-system/aws-creds + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-30 * time.Minute))}}, + } + managedClient := testManagedClient(t, managedObjs...) + + var buf bytes.Buffer + report, err := DiagnoseCRSecrets(context.TODO(), hiveClient, managedClient, "test-account", account, &buf) + require.NoError(t, err) + + assert.Contains(t, buf.String(), "kube-system/aws-creds not found") + assert.Empty(t, report.ClusterRootKeyID) + assert.False(t, report.RootKeyInSync) + require.Len(t, report.CredRequests, 1) + assert.True(t, report.CredRequests[0].NeedsRecreation, "should need refresh when root secret missing") +} + +func TestDiagnoseCRSecrets_RootSecretReadError(t *testing.T) { + now := time.Now() + account := testAccount(false, false) + + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + crObjs := []runtime.Object{ + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-30 * time.Minute))}}, + } + managedClient := errorOnGetClient(t, "kube-system", "aws-creds", crObjs...) + + var buf bytes.Buffer + report, err := DiagnoseCRSecrets(context.TODO(), hiveClient, managedClient, "test-account", account, &buf) + require.NoError(t, err) + + assert.Contains(t, buf.String(), "Could not read kube-system/aws-creds") + assert.Empty(t, report.ClusterRootKeyID) + require.Len(t, report.CredRequests, 1) + assert.True(t, report.CredRequests[0].NeedsRecreation) +} + +func TestDiagnoseCRSecrets_OutputContent_InSync(t *testing.T) { + now := time.Now() + account := testAccount(false, false) + + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + clusterRootSecret("AKIAEXAMPLE1234"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-30 * time.Minute))}}, + } + managedClient := testManagedClient(t, managedObjs...) + + var buf bytes.Buffer + report, err := DiagnoseCRSecrets(context.TODO(), hiveClient, managedClient, "test-account", account, &buf) + require.NoError(t, err) + + output := buf.String() + assert.Contains(t, output, "Active key in Hive: AKIA...1234") + assert.Contains(t, output, "Active key on cluster: AKIA...1234") + assert.Contains(t, output, "Cluster root credential matches Hive") + assert.True(t, report.RootKeyInSync) + require.Len(t, report.CredRequests, 1) + assert.False(t, report.CredRequests[0].NeedsRecreation) +} + +func TestDiagnoseCRSecrets_OutputContent_OutOfSync(t *testing.T) { + now := time.Now() + account := testAccount(false, false) + + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIANEWKEY99999")}}, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + clusterRootSecret("AKIAOLDKEY00000"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: "openshift-cloud-credential-operator"}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-30 * time.Minute))}}, + } + managedClient := testManagedClient(t, managedObjs...) + + var buf bytes.Buffer + report, err := DiagnoseCRSecrets(context.TODO(), hiveClient, managedClient, "test-account", account, &buf) + require.NoError(t, err) + + output := buf.String() + assert.Contains(t, output, "Active key in Hive: AKIA...9999") + assert.Contains(t, output, "Active key on cluster: AKIA...0000") + assert.Contains(t, output, "does NOT match Hive") + assert.False(t, report.RootKeyInSync) + require.Len(t, report.CredRequests, 1) + assert.True(t, report.CredRequests[0].NeedsRecreation) +} + +// --- Category C: CR List/Read Errors --- + +func TestDiagnoseCredentials_CredentialRequestListError(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + account := testAccount(false, false) + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + // Managed client that errors on List (can't list CRs) + managedClient := errorOnListClient(t) + + input := &AWSCredsInput{ + ClusterID: "test-cluster-id", ClusterName: "test-cluster", ClusterExternalID: "ext-12345", + AWSAccountID: "123456789012", AccountCRName: "test-account", Account: account, + AdminUsername: "osdManagedAdmin-abcd", AwsClient: mockClient, HiveKubeClient: hiveClient, + ManagedClient: managedClient, Out: &bytes.Buffer{}, + } + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err, "should handle List error gracefully") + + assert.Empty(t, report.CredRequests, "no CRs when List fails") + assert.NotEmpty(t, report.Keys, "keys should still be populated") + assert.NotEmpty(t, report.Secrets, "secrets should still be populated") + + hasWarn := false + for _, f := range report.Findings { + if f.Severity == "WARN" && strings.Contains(f.Message, "Failed to list CredentialRequests") { + hasWarn = true + } + } + assert.True(t, hasWarn) +} + +func TestDiagnoseCredentials_TransientCRSecretReadError(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockGetAccessKeyLastUsed(mockClient) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + account := testAccount(false, false) + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + crObjs := []runtime.Object{ + clusterRootSecret("AKIAEXAMPLE1234"), + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-machine-api-aws", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "aws-cloud-credentials", Namespace: "openshift-machine-api"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + // First CR's secret exists + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour))}}, + // Second CR's secret will error on Get + } + // Error on Get for the second CR's secret + managedClient := errorOnGetClient(t, "openshift-machine-api", "aws-cloud-credentials", crObjs...) + + input := &AWSCredsInput{ + ClusterID: "test-cluster-id", ClusterName: "test-cluster", ClusterExternalID: "ext-12345", + AWSAccountID: "123456789012", AccountCRName: "test-account", Account: account, + AdminUsername: "osdManagedAdmin-abcd", AwsClient: mockClient, HiveKubeClient: hiveClient, + ManagedClient: managedClient, Out: &bytes.Buffer{}, + } + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + require.Len(t, report.CredRequests, 2) + assert.True(t, report.CredRequests[0].Exists, "first CR secret should be found") + assert.NotEmpty(t, report.CredRequests[1].ErrorMessage, "second CR should have error message") + + hasWarn := false + for _, f := range report.Findings { + if f.Severity == "WARN" && strings.Contains(f.Message, "aws-cloud-credentials read failed") { + hasWarn = true + } + } + assert.True(t, hasWarn) +} + +func TestDiagnoseCredentials_NonAWSProviderSpecFiltered(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + + awsSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + gcpSpec := mustMarshal(t, map[string]string{"kind": "GCPProviderSpec"}) + managedObjs := []runtime.Object{ + clusterRootSecret("AKIAEXAMPLE1234"), + // AWS CR — should be included + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "openshift-ingress", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "cloud-creds", Namespace: "openshift-ingress-operator"}, ProviderSpec: &runtime.RawExtension{Raw: awsSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "cloud-creds", Namespace: "openshift-ingress-operator", CreationTimestamp: metav1.NewTime(now.Add(-1 * time.Hour))}}, + // GCP CR — should be filtered out + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "gcp-cr", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "gcp-creds", Namespace: "openshift-gcp"}, ProviderSpec: &runtime.RawExtension{Raw: gcpSpec}}, + }, + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + require.Len(t, report.CredRequests, 1, "only AWS CRs should be included") + assert.Equal(t, "openshift-ingress", report.CredRequests[0].CredRequestName) +} + +func TestDiagnoseCredentials_MixedCRStatus(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + now := time.Now() + keyCreated := now.Add(-1 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + hiveSecrets := []runtime.Object{ + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + + providerSpec := mustMarshal(t, map[string]string{"kind": "AWSProviderSpec"}) + managedObjs := []runtime.Object{ + // Root key out of sync — all existing CRs should need refresh + clusterRootSecret("AKIAOLDKEY00000"), + // CR 1: secret exists — should be NEEDS REFRESH + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "cr-present-1", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "creds-1", Namespace: "openshift-ns-1"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "creds-1", Namespace: "openshift-ns-1", CreationTimestamp: metav1.NewTime(now.Add(-2 * time.Hour))}}, + // CR 2: secret missing — should be MISSING (not NEEDS REFRESH) + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "cr-missing", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "creds-missing", Namespace: "openshift-ns-2"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + // CR 3: secret exists — should be NEEDS REFRESH + &ccov1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{Name: "cr-present-2", Namespace: credentialRequestNamespace}, + Spec: ccov1.CredentialsRequestSpec{SecretRef: corev1.ObjectReference{Name: "creds-2", Namespace: "openshift-ns-3"}, ProviderSpec: &runtime.RawExtension{Raw: providerSpec}}, + }, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "creds-2", Namespace: "openshift-ns-3", CreationTimestamp: metav1.NewTime(now.Add(-3 * time.Hour))}}, + } + + input := diagInput(t, mockClient, hiveSecrets, managedObjs) + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err) + + require.Len(t, report.CredRequests, 3) + existCount, missingCount, needsRefreshCount := 0, 0, 0 + for _, cr := range report.CredRequests { + if cr.Exists { + existCount++ + if cr.NeedsRecreation { + needsRefreshCount++ + } + } else { + missingCount++ + assert.False(t, cr.NeedsRecreation, "missing CR %s should NOT be marked NeedsRecreation", cr.CredRequestName) + } + } + assert.Equal(t, 2, existCount, "2 CRs should exist") + assert.Equal(t, 1, missingCount, "1 CR should be missing") + assert.Equal(t, 2, needsRefreshCount, "2 existing CRs should need refresh (root out of sync)") +} + +// --- Category D: Rendering tests --- + +func TestRenderCredRequestTable_RootKeyInSync(t *testing.T) { + report := &DiagnosticReport{ + RootKeyInSync: true, + HiveAccountKeyID: "AKIAEXAMPLE1234", + ClusterRootKeyID: "AKIAEXAMPLE1234", + CredRequests: []CredRequestStatus{ + {CredRequestName: "openshift-ingress", SecretName: "cloud-creds", Namespace: "openshift-ingress-operator", Age: 30 * time.Minute, Exists: true, NeedsRecreation: false}, + {CredRequestName: "openshift-image-registry", SecretName: "installer-creds", Namespace: "openshift-image-registry", Age: 25 * time.Minute, Exists: true, NeedsRecreation: false}, + }, + } + + var buf bytes.Buffer + RenderCredRequestTable(report, &buf) + output := buf.String() + + assert.Contains(t, output, "matches Hive (AKIA...1234)") + assert.Contains(t, output, "CURRENT") + assert.NotContains(t, output, "NEEDS REFRESH") + assert.NotContains(t, output, "does NOT match") + assert.NotContains(t, output, "secret(s) marked NEEDS REFRESH") +} + +func TestRenderCredRequestTable_RootKeyStale(t *testing.T) { + report := &DiagnosticReport{ + RootKeyInSync: false, + HiveAccountKeyID: "AKIANEWKEY99999", + ClusterRootKeyID: "AKIAOLDKEY00000", + CredRequests: []CredRequestStatus{ + {CredRequestName: "openshift-ingress", SecretName: "cloud-creds", Namespace: "openshift-ingress-operator", Age: 30 * time.Minute, Exists: true, NeedsRecreation: true}, + {CredRequestName: "openshift-image-registry", SecretName: "installer-creds", Namespace: "openshift-image-registry", Age: 25 * time.Minute, Exists: true, NeedsRecreation: true}, + }, + } + + var buf bytes.Buffer + RenderCredRequestTable(report, &buf) + output := buf.String() + + assert.Contains(t, output, "does NOT match Hive") + assert.Contains(t, output, "AKIA...0000") + assert.Contains(t, output, "AKIA...9999") + assert.Contains(t, output, "NEEDS REFRESH") + assert.Contains(t, output, "2 secret(s) marked NEEDS REFRESH") +} + +func TestRenderCredRequestTable_ClusterKeyUnavailable(t *testing.T) { + report := &DiagnosticReport{ + RootKeyInSync: false, + HiveAccountKeyID: "AKIAEXAMPLE1234", + ClusterRootKeyID: "", + CredRequests: []CredRequestStatus{ + {CredRequestName: "openshift-ingress", SecretName: "cloud-creds", Namespace: "openshift-ingress-operator", Age: 30 * time.Minute, Exists: true, NeedsRecreation: true}, + }, + } + + var buf bytes.Buffer + RenderCredRequestTable(report, &buf) + output := buf.String() + + assert.Contains(t, output, "staleness unknown") +} + +func TestRenderSummary_RootKeyInSync(t *testing.T) { + report := &DiagnosticReport{ + ManagedAdminUser: "osdManagedAdmin-abcd", + AllPermissionsOK: true, + AllSecretsInSync: true, + RootKeyInSync: true, + HiveAccountKeyID: "AKIAEXAMPLE1234", + ClusterRootKeyID: "AKIAEXAMPLE1234", + CredRequests: []CredRequestStatus{ + {CredRequestName: "openshift-ingress", Exists: true, NeedsRecreation: false}, + }, + } + + var buf bytes.Buffer + renderSummary(report, &buf) + output := buf.String() + + assert.Contains(t, output, "Cluster root credential matches Hive") + assert.Contains(t, output, "CR secrets are current") + assert.Contains(t, output, "No issues found during pre-rotation checks") +} + +func TestRenderSummary_RootKeyOutOfSync(t *testing.T) { + report := &DiagnosticReport{ + ManagedAdminUser: "osdManagedAdmin-abcd", + AllPermissionsOK: true, + AllSecretsInSync: true, + RootKeyInSync: false, + HiveAccountKeyID: "AKIANEWKEY99999", + ClusterRootKeyID: "AKIAOLDKEY00000", + CredRequests: []CredRequestStatus{ + {CredRequestName: "openshift-ingress", Exists: true, NeedsRecreation: true}, + {CredRequestName: "openshift-image-registry", Exists: true, NeedsRecreation: true}, + }, + } + + var buf bytes.Buffer + renderSummary(report, &buf) + output := buf.String() + + assert.Contains(t, output, "out of sync") + assert.Contains(t, output, "2 CR secret(s) need refresh") + assert.NotContains(t, output, "No issues found") +} + +func TestRenderSummary_CannotVerifyRootKey(t *testing.T) { + report := &DiagnosticReport{ + ManagedAdminUser: "osdManagedAdmin-abcd", + AllPermissionsOK: true, + AllSecretsInSync: true, + RootKeyInSync: false, + HiveAccountKeyID: "AKIAEXAMPLE1234", + ClusterRootKeyID: "", + CredRequests: []CredRequestStatus{ + {CredRequestName: "openshift-ingress", Exists: true}, + }, + } + + var buf bytes.Buffer + renderSummary(report, &buf) + output := buf.String() + + assert.Contains(t, output, "Could not verify cluster root credential sync status") +} + +// --- Category E: IAM API error handling --- + +func TestDiagnoseCredentials_GetAccessKeyLastUsedError(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + // GetAccessKeyLastUsed returns error + mockClient.EXPECT().GetAccessKeyLastUsed(gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("access denied")).AnyTimes() + + now := time.Now() + keyCreated := now.Add(-24 * time.Hour) + + mockClient.EXPECT().ListAccessKeys(gomock.Any()).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAEXAMPLE1234"), Status: iamTypes.StatusTypeActive, CreateDate: &keyCreated}, + }, + }, nil) + mockSimulateAllAllowed(mockClient) + + account := testAccount(false, false) + hiveObjs := []runtime.Object{ + account, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-account-secret", Namespace: awsAccountNamespace}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "aws", Namespace: "uhc-production-test"}, Data: map[string][]byte{"aws_access_key_id": []byte("AKIAEXAMPLE1234")}}, + } + hiveClient := fake.NewClientBuilder().WithScheme(testScheme(t)).WithRuntimeObjects(hiveObjs...).Build() + managedClient := testManagedClient(t) + + input := &AWSCredsInput{ + ClusterID: "test-cluster-id", ClusterName: "test-cluster", ClusterExternalID: "ext-12345", + AWSAccountID: "123456789012", AccountCRName: "test-account", Account: account, + AdminUsername: "osdManagedAdmin-abcd", AwsClient: mockClient, HiveKubeClient: hiveClient, + ManagedClient: managedClient, Out: &bytes.Buffer{}, + } + + report, err := DiagnoseCredentials(context.TODO(), input) + require.NoError(t, err, "should handle GetAccessKeyLastUsed error gracefully") + + require.Len(t, report.Keys, 1) + assert.Equal(t, "N/A", report.Keys[0].LastUsed, "LastUsed should be N/A on error") + assert.True(t, report.AllPermissionsOK, "other diagnostics should still complete") +} diff --git a/pkg/controller/rotatesecret.go b/pkg/controller/rotatesecret.go index 863f84318..48e513a34 100644 --- a/pkg/controller/rotatesecret.go +++ b/pkg/controller/rotatesecret.go @@ -2,7 +2,6 @@ package controller import ( "context" - "encoding/json" "errors" "fmt" "io" @@ -12,11 +11,14 @@ import ( awsSdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/iam" iamTypes "github.com/aws/aws-sdk-go-v2/service/iam/types" + "github.com/aws/aws-sdk-go-v2/service/sts" awsv1alpha1 "github.com/openshift/aws-account-operator/api/v1alpha1" ccov1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" hiveapiv1 "github.com/openshift/hive/apis/hive/v1" hiveinternalv1alpha1 "github.com/openshift/hive/apis/hiveinternal/v1alpha1" awsprovider "github.com/openshift/osdctl/pkg/provider/aws" + "github.com/sirupsen/logrus" + authv1 "k8s.io/api/authorization/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -29,12 +31,42 @@ const ( osdManagedAdminIAM = "osdManagedAdmin" ) +// RotationRequiredActions are the IAM actions needed by the rotation tooling +// to create/delete access keys and manage IAM users. +var RotationRequiredActions = []string{ + "iam:CreateAccessKey", + "iam:CreateUser", + "iam:DeleteAccessKey", + "iam:DeleteUser", + "iam:DeleteUserPolicy", + "iam:GetUser", + "iam:GetUserPolicy", + "iam:ListAccessKeys", + "iam:PutUserPolicy", + "iam:TagUser", +} + +// confirmFrom prompts for y/N confirmation reading from the given reader. +func confirmFrom(in io.Reader) bool { + var response string + if _, err := fmt.Fscanln(in, &response); err != nil { + return false + } + switch strings.ToLower(response) { + case "y", "yes": + return true + default: + return false + } +} + // InsufficientPermissionsError is returned when SimulatePrincipalPolicy // reports that one or more required IAM actions are denied. type InsufficientPermissionsError struct { DeniedActions []string } +// Error returns a human-readable message listing the denied IAM actions. func (e *InsufficientPermissionsError) Error() string { return fmt.Sprintf("insufficient permissions for secret rotation. Denied actions: %v", e.DeniedActions) } @@ -59,6 +91,9 @@ type RotateSecretInput struct { // If empty, it is derived from the Account CR's iamUserId label. OsdManagedAdminUsername string + // UpdateManagedAdminCreds controls whether osdManagedAdmin credentials are rotated. + UpdateManagedAdminCreds bool + // UpdateCcsCreds controls whether osdCcsAdmin credentials are also rotated. UpdateCcsCreds bool @@ -79,7 +114,21 @@ type RotateSecretInput struct { // creation/deletion/updates). DryRun bool - // Out is the writer for informational output. + // Report is the pre-computed diagnostic report from the snapshot phase. + // Used to display key/secret context during interactive key deletion. + Report *DiagnosticReport + + // Log is the logger for operational messages (writes to stderr). + Log *logrus.Logger + + // SkipPermissionCheck skips the managed-admin permission verification when + // the caller has already run diagnostics and the user confirmed despite failures. + SkipPermissionCheck bool + + // In is the reader for interactive prompts (defaults to os.Stdin). + In io.Reader + + // Out is the writer for structured report output (writes to stdout). Out io.Writer } @@ -93,6 +142,7 @@ type RotateSecretInput struct { // 7. Polls ClusterSync for completion and cleans up the SyncSet // 8. Optionally rotates osdCcsAdmin credentials func RotateSecret(ctx context.Context, input *RotateSecretInput) error { + log := getLog(input.Log) account := input.Account if account.Spec.ManualSTSMode { @@ -101,134 +151,208 @@ func RotateSecret(ctx context.Context, input *RotateSecretInput) error { accountID := account.Spec.AwsAccountID - accountIDSuffixLabel, ok := account.Labels["iamUserId"] - if !ok { - return fmt.Errorf("no label on Account CR for IAM User") - } + // Managed-admin username resolution — only needed when rotating managed-admin creds + var adminUsername string + if input.UpdateManagedAdminCreds { + accountIDSuffixLabel, ok := account.Labels["iamUserId"] + if !ok { + return fmt.Errorf("no label on Account CR for IAM User") + } - // Resolve the admin username, with fallback from suffixed to unsuffixed - adminUsername := input.OsdManagedAdminUsername - if adminUsername == "" { - adminUsername = osdManagedAdminIAM + "-" + accountIDSuffixLabel - } + adminUsername = input.OsdManagedAdminUsername + if adminUsername == "" { + adminUsername = osdManagedAdminIAM + "-" + accountIDSuffixLabel + } - adminUsername, err := resolveAdminUsername(input.Out, input.AwsClient, accountID, adminUsername, accountIDSuffixLabel) - if err != nil { - return err + if !input.SkipPermissionCheck { + var err error + adminUsername, err = resolveAdminUsername(input.Out, input.AwsClient, accountID, adminUsername, accountIDSuffixLabel) + if err != nil { + return err + } + } } if input.DryRun { - fmt.Fprintln(input.Out, "[Dry Run] Would create a new IAM access key for user:", adminUsername) - fmt.Fprintln(input.Out, "[Dry Run] Would list access keys and report old keys to remove via rh-aws-saml-login") - fmt.Fprintf(input.Out, "[Dry Run] Would update secret %s/%s with new credentials\n", awsAccountNamespace, input.AccountCRName+"-secret") - fmt.Fprintf(input.Out, "[Dry Run] Would update secret %s/%s with new credentials\n", account.Spec.ClaimLinkNamespace, "aws") - fmt.Fprintf(input.Out, "[Dry Run] Would create SyncSet %s/%s to sync credentials to cluster\n", account.Spec.ClaimLinkNamespace, "aws-sync") - fmt.Fprintf(input.Out, "[Dry Run] Would poll ClusterSync and then delete SyncSet %s/%s\n", account.Spec.ClaimLinkNamespace, "aws-sync") - - if err := dryRunDeleteCredentialSecrets(ctx, input.ManagedClusterClient, input.Out); err != nil { - return err + dr := &dryRunChecker{out: input.Out, allOK: true} + ns := account.Spec.ClaimLinkNamespace + secretName := input.AccountCRName + "-secret" + + if input.UpdateManagedAdminCreds { + // AWS: verify IAM access + dr.would("create a new IAM access key for user: %s", adminUsername) + _, listErr := input.AwsClient.ListAccessKeys(&iam.ListAccessKeysInput{UserName: awsSdk.String(adminUsername)}) + dr.report(listErr == nil, "AWS: ListAccessKeys for %s", adminUsername) + dr.would("list access keys and report old keys to remove") + + // Hive: verify secret access + RBAC for update + dr.would("update secret %s/%s with new credentials", awsAccountNamespace, secretName) + dr.checkResourceAccess(ctx, input.HiveKubeClient, "Hive", "update", "secrets", awsAccountNamespace, secretName) + + dr.would("update secret %s/%s with new credentials", ns, "aws") + dr.checkResourceAccess(ctx, input.HiveKubeClient, "Hive", "update", "secrets", ns, "aws") + + // Hive: verify ClusterDeployment exists + SyncSet RBAC + dr.would("create SyncSet %s/%s to sync credentials to cluster", ns, "aws-sync") + dr.info("SyncSet is created fresh during rotation (not expected to exist beforehand)") + cdList := &hiveapiv1.ClusterDeploymentList{} + listCDErr := input.HiveKubeClient.List(ctx, cdList, client.InNamespace(ns)) + dr.report(listCDErr == nil && len(cdList.Items) > 0, "Hive: LIST ClusterDeployments in %s", ns) + dr.checkCanI(ctx, input.HiveKubeClient, "Hive", "create", "syncsets", "hive.openshift.io", ns) + dr.checkCanI(ctx, input.HiveKubeClient, "Hive", "delete", "syncsets", "hive.openshift.io", ns) + + dr.would("poll ClusterSync and then delete SyncSet %s/%s", ns, "aws-sync") + + // Managed cluster: verify credential secret access + if input.ManagedClusterClient == nil { + dr.info("Managed cluster client not available — credential secret checks skipped") + } else if err := dryRunDeleteCredentialSecrets(ctx, input.ManagedClusterClient, input.Out, dr); err != nil { + return err + } } if input.UpdateCcsCreds { if account.Spec.BYOC { - fmt.Fprintln(input.Out, "[Dry Run] Would create a new IAM access key for user: osdCcsAdmin") - fmt.Fprintf(input.Out, "[Dry Run] Would update secret %s/%s with new osdCcsAdmin credentials\n", account.Spec.ClaimLinkNamespace, "byoc") + dr.would("create a new IAM access key for user: osdCcsAdmin") + _, ccsListErr := input.AwsClient.ListAccessKeys(&iam.ListAccessKeysInput{UserName: awsSdk.String("osdCcsAdmin")}) + dr.report(ccsListErr == nil, "AWS: ListAccessKeys for osdCcsAdmin") + + dr.would("update secret %s/%s with new osdCcsAdmin credentials", ns, "byoc") + dr.checkResourceAccess(ctx, input.HiveKubeClient, "Hive", "update", "secrets", ns, "byoc") } else { - fmt.Fprintln(input.Out, "[Dry Run] Account is not CCS, would skip osdCcsAdmin credential rotation") + fmt.Fprintf(input.Out, "%s Account is not CCS, would skip osdCcsAdmin credential rotation\n", colorBlue("[Dry Run]")) } } - fmt.Fprintln(input.Out, "[Dry Run] No changes were made.") + if dr.allOK { + fmt.Fprintf(input.Out, "\n%s %s All pre-flight checks passed. No changes were made.\n", colorBlue("[Dry Run]"), colorGreen("OK")) + } else { + fmt.Fprintf(input.Out, "\n%s %s Some pre-flight checks failed. Resolve issues before rotating.\n", colorBlue("[Dry Run]"), colorRed("FAIL")) + } return nil } - // Create new access key - createAccessKeyOutput, err := input.AwsClient.CreateAccessKey(&iam.CreateAccessKeyInput{ - UserName: awsSdk.String(adminUsername), - }) - if err != nil { - var nse *iamTypes.NoSuchEntityException - if errors.As(err, &nse) { - // Retry without the suffix - // Retry without the suffix, re-verifying permissions first. - if err := VerifyRotationPermissions(input.Out, input.AwsClient, accountID, osdManagedAdminIAM); err != nil { - return err - } - adminUsername = osdManagedAdminIAM - createAccessKeyOutput, err = input.AwsClient.CreateAccessKey(&iam.CreateAccessKeyInput{ - UserName: awsSdk.String(adminUsername), - }) - if err != nil { - return err + if err := preflightCheckArtifacts(ctx, input); err != nil { + return err + } + + var completedSteps []string + reportFailure := func(failedStep string, err error) error { + log.WithError(err).Error(failedStep) + fmt.Fprintf(input.Out, "\n%s Rotation failed at: %s\n", colorRed("[ERROR]"), failedStep) + if len(completedSteps) > 0 { + fmt.Fprintln(input.Out, "\nCompleted steps before failure:") + for _, step := range completedSteps { + fmt.Fprintf(input.Out, " %s %s\n", colorGreen("[DONE]"), step) } - } else { - return err } + fmt.Fprintf(input.Out, " %s %s: %v\n", colorRed("[FAIL]"), failedStep, err) + fmt.Fprintln(input.Out, "\nThis command can be re-run safely. Already-completed steps will be retried.") + return err } - newKeyID := *createAccessKeyOutput.AccessKey.AccessKeyId - rotationCommitted := false - defer func() { - if rotationCommitted { - return + if input.UpdateManagedAdminCreds { + if input.UpdateCcsCreds { + fmt.Fprintf(input.Out, "\n%s\n", "==========================================================================") + fmt.Fprintf(input.Out, " Phase 1: Rotating %s credentials\n", adminUsername) + fmt.Fprintf(input.Out, "%s\n\n", "==========================================================================") } - _, _ = input.AwsClient.DeleteAccessKey(&iam.DeleteAccessKeyInput{ - UserName: awsSdk.String(adminUsername), - AccessKeyId: awsSdk.String(newKeyID), - }) - }() - if err := reportAccessKeys(input.AwsClient, adminUsername, newKeyID, input.Out); err != nil { - return err - } + createAccessKeyOutput, err := createAccessKeyWithRetry(input.AwsClient, adminUsername, accountID, input.Report, input.In, input.Out) + if err != nil { + return reportFailure("Create new IAM access key", err) + } + if createAccessKeyOutput == nil || createAccessKeyOutput.AccessKey == nil { + return reportFailure("Create new IAM access key", fmt.Errorf("AWS returned nil access key")) + } + adminUsername = *createAccessKeyOutput.AccessKey.UserName + newKeyID := *createAccessKeyOutput.AccessKey.AccessKeyId + completedSteps = append(completedSteps, fmt.Sprintf("Created new access key %s for %s", truncateKeyID(newKeyID), adminUsername)) + + rotationCommitted := false + defer func() { + if rotationCommitted { + return + } + log.Warn("Rotation did not complete — cleaning up newly created access key") + if _, delErr := input.AwsClient.DeleteAccessKey(&iam.DeleteAccessKeyInput{ + UserName: awsSdk.String(adminUsername), + AccessKeyId: awsSdk.String(newKeyID), + }); delErr != nil { + log.WithError(delErr).Errorf("Failed to delete orphaned access key %s for %s — manual cleanup required", truncateKeyID(newKeyID), adminUsername) + fmt.Fprintf(input.Out, "\n%s Failed to clean up access key %s for %s: %v\n", colorRed("[ERROR]"), truncateKeyID(newKeyID), adminUsername, delErr) + fmt.Fprintln(input.Out, " Manual cleanup required: aws iam delete-access-key --user-name", adminUsername, "--access-key-id", newKeyID) + } + }() - newSecretData := map[string][]byte{ - "aws_user_name": []byte(*createAccessKeyOutput.AccessKey.UserName), - "aws_access_key_id": []byte(newKeyID), - "aws_secret_access_key": []byte(*createAccessKeyOutput.AccessKey.SecretAccessKey), - } + if err := reportAccessKeys(input.AwsClient, adminUsername, newKeyID, input.Out); err != nil { + return reportFailure("List access keys", err) + } - // Update the account secret - if err := updateSecret(ctx, input.HiveKubeClient, input.AccountCRName+"-secret", awsAccountNamespace, newSecretData); err != nil { - return err - } + newSecretData := map[string][]byte{ + "aws_user_name": []byte(*createAccessKeyOutput.AccessKey.UserName), + "aws_access_key_id": []byte(newKeyID), + "aws_secret_access_key": []byte(*createAccessKeyOutput.AccessKey.SecretAccessKey), + } - // The AAO secret has been updated - if anything fails from this point onward it must either be manually fixed or rolled back - rotationCommitted = true + log.Info("Updating AAO account secret on Hive") + if err := updateSecret(ctx, input.HiveKubeClient, input.AccountCRName+"-secret", awsAccountNamespace, newSecretData); err != nil { + return reportFailure("Update AAO account secret on Hive", err) + } + completedSteps = append(completedSteps, fmt.Sprintf("Updated secret %s/%s", awsAccountNamespace, input.AccountCRName+"-secret")) - // Update the secret in ClusterDeployment's namespace - if err := updateSecret(ctx, input.HiveKubeClient, "aws", account.Spec.ClaimLinkNamespace, newSecretData); err != nil { - fmt.Fprintln(input.Out, "AWS creds updated for AAO account but failed for Cluster namespace") - fmt.Fprintln(input.Out, "Please update the cluster secret by hand and sync to cluster.") - return err - } + rotationCommitted = true - fmt.Fprintln(input.Out, "AWS creds updated on hive.") + log.Info("Updating cluster namespace secret on Hive") + if err := updateSecret(ctx, input.HiveKubeClient, "aws", account.Spec.ClaimLinkNamespace, newSecretData); err != nil { + return reportFailure("Update cluster namespace secret on Hive", err) + } + completedSteps = append(completedSteps, fmt.Sprintf("Updated secret %s/aws", account.Spec.ClaimLinkNamespace)) - if err := syncCredentialsToCluster(ctx, input.HiveKubeClient, account.Spec.ClaimLinkNamespace, input.Out); err != nil { - fmt.Fprintln(input.Out, "AWS creds updated on hive but not synced to cluster") - return err - } + fmt.Fprintln(input.Out, "AWS creds updated on hive.") - fmt.Fprintf(input.Out, "Successfully rotated access keys for %s\n", adminUsername) + log.Info("Syncing credentials to cluster via SyncSet") + if err := syncCredentialsToCluster(ctx, input.HiveKubeClient, account.Spec.ClaimLinkNamespace, input.Out); err != nil { + return reportFailure("Sync credentials to cluster via SyncSet", err) + } + completedSteps = append(completedSteps, "Synced credentials to cluster via SyncSet") - // Delete the secrets referenced by AWS CredentialRequests so CCO recreates - // them with the newly synced credentials. - if err := deleteCredentialSecrets(ctx, input.ManagedClusterClient, input.Out); err != nil { - return err + log.WithField("user", adminUsername).Info("Successfully rotated access keys") + fmt.Fprintf(input.Out, "Successfully rotated access keys for %s\n", adminUsername) + + if input.ManagedClusterClient == nil { + log.Warn("Managed cluster client not available — skipping credential secret deletion") + fmt.Fprintln(input.Out, "\nManaged cluster client not available. Delete credential secrets manually:") + fmt.Fprintln(input.Out, " oc get credentialsrequests -A -o wide") + completedSteps = append(completedSteps, "Skipped credential secret deletion (managed cluster not connected)") + } else { + log.Info("Deleting credential secrets so CCO recreates them") + if err := DeleteCredentialSecrets(ctx, input.ManagedClusterClient, input.In, input.Out); err != nil { + return reportFailure("Delete credential secrets on managed cluster", err) + } + completedSteps = append(completedSteps, "Deleted credential secrets for CCO to recreate") + } } if input.UpdateCcsCreds { - if err := rotateCcsAdminCredentials(ctx, input.AwsClient, input.HiveKubeClient, account, input.Out); err != nil { - return err + fmt.Fprintf(input.Out, "\n%s\n", "==========================================================================") + if input.UpdateManagedAdminCreds { + fmt.Fprintf(input.Out, " Phase 2: Rotating osdCcsAdmin credentials\n") + } else { + fmt.Fprintf(input.Out, " Rotating osdCcsAdmin credentials\n") + } + fmt.Fprintf(input.Out, "%s\n\n", "==========================================================================") + log.Info("Rotating osdCcsAdmin credentials") + if err := rotateCcsAdminCredentials(ctx, input.AwsClient, input.HiveKubeClient, account, input.Report, input.Log, input.In, input.Out); err != nil { + return reportFailure("Rotate osdCcsAdmin credentials", err) } + completedSteps = append(completedSteps, "Rotated osdCcsAdmin credentials") } - if !input.DryRun { - fmt.Fprintln(input.Out, "The rotation should be successfully finished:") - fmt.Fprintln(input.Out, "- do not forget to remove the old access key in AWS!") - fmt.Fprintln(input.Out, "- Confirm secrets are recreated and new access key was used") - fmt.Fprintln(input.Out, "- Remove the old access key (use rh-aws-saml-login)") + fmt.Fprintln(input.Out, "\nPost-rotation: verifying cluster health...") + if err := postRotationCheck(ctx, input, adminUsername); err != nil { + fmt.Fprintf(input.Out, "%s Post-rotation check encountered issues: %v\n", colorYellow("[WARN]"), err) + fmt.Fprintln(input.Out, "Rotation completed but manual verification recommended.") } return nil @@ -236,36 +360,25 @@ func RotateSecret(ctx context.Context, input *RotateSecretInput) error { // VerifyRotationPermissions checks if the assumed role has the necessary IAM // permissions to perform secret rotation by simulating the required actions. +// Uses simulateActions internally for consistency with the diagnostic path. func VerifyRotationPermissions(out io.Writer, awsClient awsprovider.Client, accountID string, username string) error { - requiredActions := []string{ - "iam:CreateAccessKey", - "iam:CreateUser", - "iam:DeleteAccessKey", - "iam:DeleteUser", - "iam:DeleteUserPolicy", - "iam:GetUser", - "iam:GetUserPolicy", - "iam:ListAccessKeys", - "iam:PutUserPolicy", - "iam:TagUser", - } + fmt.Fprintf(out, "Verifying IAM permissions for user %s...\n", username) + report := &DiagnosticReport{AllPermissionsOK: true} userArn := fmt.Sprintf("arn:aws:iam::%s:user/%s", accountID, username) - fmt.Fprintf(out, "Verifying IAM permissions for user %s...\n", username) - - output, err := awsClient.SimulatePrincipalPolicy(&iam.SimulatePrincipalPolicyInput{ - PolicySourceArn: awsSdk.String(userArn), - ActionNames: requiredActions, - }) - if err != nil { + if err := simulateActions(awsClient, userArn, RotationRequiredActions, "rotation", nil, report); err != nil { return fmt.Errorf("failed to simulate principal policy: %w", err) } + if !report.AllPermissionsOK && len(report.Permissions) == 0 { + return fmt.Errorf("failed to simulate principal policy: permission check could not be completed") + } + var deniedActions []string - for _, result := range output.EvaluationResults { - if result.EvalDecision != iamTypes.PolicyEvaluationDecisionTypeAllowed { - deniedActions = append(deniedActions, *result.EvalActionName) + for _, p := range report.Permissions { + if !p.Allowed { + deniedActions = append(deniedActions, p.Action) } } @@ -277,6 +390,157 @@ func VerifyRotationPermissions(out io.Writer, awsClient awsprovider.Client, acco return nil } +// createAccessKeyWithRetry attempts to create an IAM access key, falling back to +// the unsuffixed admin user on NoSuchEntity and prompting for interactive key +// deletion when the 2-key limit is reached. +func createAccessKeyWithRetry(awsClient awsprovider.Client, username, accountID string, report *DiagnosticReport, in io.Reader, out io.Writer) (*iam.CreateAccessKeyOutput, error) { + output, err := awsClient.CreateAccessKey(&iam.CreateAccessKeyInput{ + UserName: awsSdk.String(username), + }) + if err == nil { + return output, nil + } + + var nse *iamTypes.NoSuchEntityException + if errors.As(err, &nse) { + // Only fall back to unsuffixed osdManagedAdmin for suffixed managed-admin users + if strings.HasPrefix(username, osdManagedAdminIAM) && username != osdManagedAdminIAM { + if err := VerifyRotationPermissions(out, awsClient, accountID, osdManagedAdminIAM); err != nil { + return nil, err + } + return awsClient.CreateAccessKey(&iam.CreateAccessKeyInput{ + UserName: awsSdk.String(osdManagedAdminIAM), + }) + } + return nil, fmt.Errorf("IAM user %s not found: %w", username, nse) + } + + if !isLimitExceeded(err) { + return nil, err + } + + fmt.Fprintf(out, "\n%s IAM user %s already has the maximum number of access keys (2).\n", colorYellow("[WARN]"), username) + fmt.Fprintln(out, "A key must be deleted before a new one can be created.") + fmt.Fprintln(out, "Review the keys below and select which one to delete:") + + userKeys := filterKeysForUser(report, username) + if len(userKeys) == 0 { + return nil, fmt.Errorf("no key data available for %s — run snapshot first", username) + } + + renderKeySelectionTable(userKeys, report, out) + + fmt.Fprintf(out, "\nEnter the number of the key to delete (or 'q' to quit): ") + var choice string + if _, scanErr := fmt.Fscanln(in, &choice); scanErr != nil || choice == "q" || choice == "Q" { + return nil, fmt.Errorf("key deletion cancelled by user") + } + + idx := 0 + if _, parseErr := fmt.Sscanf(choice, "%d", &idx); parseErr != nil || idx < 1 || idx > len(userKeys) { + return nil, fmt.Errorf("invalid selection: %s", choice) + } + + keyToDelete := userKeys[idx-1] + fmt.Fprintf(out, "Deleting access key %s...\n", truncateKeyID(keyToDelete.AccessKeyID)) + if _, delErr := awsClient.DeleteAccessKey(&iam.DeleteAccessKeyInput{ + UserName: awsSdk.String(username), + AccessKeyId: awsSdk.String(keyToDelete.AccessKeyID), + }); delErr != nil { + return nil, fmt.Errorf("failed to delete access key %s: %w", keyToDelete.AccessKeyID, delErr) + } + fmt.Fprintf(out, "%s Deleted access key %s\n", colorGreen("[OK]"), truncateKeyID(keyToDelete.AccessKeyID)) + + fmt.Fprintln(out, "Creating new access key...") + return awsClient.CreateAccessKey(&iam.CreateAccessKeyInput{ + UserName: awsSdk.String(username), + }) +} + +// filterKeysForUser returns the subset of diagnostic report keys belonging to the given IAM user. +func filterKeysForUser(report *DiagnosticReport, username string) []KeyStatus { + if report == nil { + return nil + } + var keys []KeyStatus + for _, k := range report.Keys { + if k.UserName == username { + keys = append(keys, k) + } + } + return keys +} + +// renderKeySelectionTable prints an interactive numbered table of IAM keys with +// Hive secret references, highlighting the recommended deletion candidate. +func renderKeySelectionTable(keys []KeyStatus, report *DiagnosticReport, out io.Writer) { + secretsByKeyID := map[string][]string{} + if report != nil { + for _, s := range report.Secrets { + if s.Exists && s.AccessKeyID != "" { + secretsByKeyID[s.AccessKeyID] = append(secretsByKeyID[s.AccessKeyID], s.SecretName) + } + } + } + + if len(keys) > 0 { + fmt.Fprintf(out, "\n IAM User: %s\n", keys[0].UserName) + } + fmt.Fprintf(out, " %-4s %-24s %-6s %-10s %-8s %-34s %s\n", "#", "KEY ID", "AGE", "LAST USED", "STATUS", "REF'd by HIVE SECRET(S)", "SYNC") + fmt.Fprintf(out, " %-4s %-24s %-6s %-10s %-8s %-34s %s\n", underline(4), underline(24), underline(6), underline(10), underline(8), underline(34), underline(6)) + + suggestedIdx := -1 + for i, k := range keys { + secretNames := secretsByKeyID[k.AccessKeyID] + secretStr := colorBlue("(not referenced by this cluster)") + syncStr := "--" + if len(secretNames) > 0 { + secretStr = strings.Join(secretNames, ", ") + syncStr = colorGreen("OK") + } else { + if suggestedIdx == -1 { + suggestedIdx = i + } else if k.Age > keys[suggestedIdx].Age { + suggestedIdx = i + } + } + + visibleSecretStr := secretStr + if len(secretNames) > 0 { + visibleSecretStr = truncate(secretStr, 38) + } + + marker := fmt.Sprintf("%d", i+1) + if i == suggestedIdx { + marker = colorYellow(fmt.Sprintf("%d *", i+1)) + } + + fmt.Fprintf(out, " %-4s %-24s %-6s %-10s %-8s %-34s %s\n", + marker, + k.AccessKeyID, + formatAge(k.Age), + k.LastUsed, + k.Status, + visibleSecretStr, + syncStr, + ) + } + + if suggestedIdx >= 0 { + fmt.Fprintf(out, "\n %s Key #%d is not referenced by this cluster and is the recommended candidate for deletion.\n", + colorYellow("*"), suggestedIdx+1) + } +} + +// isLimitExceeded returns true if the error is an IAM LimitExceededException (e.g., max 2 access keys). +func isLimitExceeded(err error) bool { + if err == nil { + return false + } + var le *iamTypes.LimitExceededException + return errors.As(err, &le) +} + // resolveAdminUsername verifies rotation permissions for the given username, // falling back to the unsuffixed osdManagedAdmin only when the suffixed user // has insufficient permissions. Transport or API errors are never retried @@ -310,15 +574,21 @@ func reportAccessKeys(awsClient awsprovider.Client, username, newKeyID string, o fmt.Fprintf(out, "\nAccess keys for IAM user %s:\n", username) for _, key := range listOutput.AccessKeyMetadata { + if key.AccessKeyId == nil { + continue + } if *key.AccessKeyId == newKeyID { - fmt.Fprintf(out, " - %s (new - just created)\n", *key.AccessKeyId) + fmt.Fprintf(out, " - %s (new - just created)\n", truncateKeyID(*key.AccessKeyId)) } else { - fmt.Fprintf(out, " - %s (old - should be removed)\n", *key.AccessKeyId) + fmt.Fprintf(out, " - %s (old - should be removed)\n", truncateKeyID(*key.AccessKeyId)) } } hasOldKeys := false for _, key := range listOutput.AccessKeyMetadata { + if key.AccessKeyId == nil { + continue + } if *key.AccessKeyId != newKeyID { hasOldKeys = true break @@ -326,9 +596,256 @@ func reportAccessKeys(awsClient awsprovider.Client, username, newKeyID string, o } if hasOldKeys { fmt.Fprintf(out, "\nThe old access key(s) listed above should be removed once rotation is finished.\n") - fmt.Fprintf(out, "Use 'rh-aws-saml-login' to gain access to the account and delete them.\n\n") + fmt.Fprintf(out, "\n") + } + + return nil +} + +// postRotationCheck validates that cluster artifacts are healthy after rotation. +func postRotationCheck(ctx context.Context, input *RotateSecretInput, adminUsername string) error { + account := input.Account + ns := account.Spec.ClaimLinkNamespace + allOK := true + + postReport := &DiagnosticReport{ + IsCCS: account.Spec.BYOC, + AWSAccountID: account.Spec.AwsAccountID, + AccountCRName: input.AccountCRName, + } + hiveKeyIDs := map[string]string{} + + type postSecretCheck struct { + name string + namespace string + } + var secretChecks []postSecretCheck + if input.UpdateManagedAdminCreds { + secretChecks = append(secretChecks, + postSecretCheck{input.AccountCRName + "-secret", awsAccountNamespace}, + postSecretCheck{"aws", ns}, + ) + } + if input.UpdateCcsCreds && account.Spec.BYOC { + secretChecks = append(secretChecks, + postSecretCheck{"byoc", ns}, + ) + } + + for _, sc := range secretChecks { + ss := SecretStatus{SecretName: sc.name, Namespace: sc.namespace} + secret := &corev1.Secret{} + if err := input.HiveKubeClient.Get(ctx, types.NamespacedName{Name: sc.name, Namespace: sc.namespace}, secret); err != nil { + ss.Exists = false + allOK = false + } else { + ss.Exists = true + if keyID, ok := secret.Data["aws_access_key_id"]; ok { + ss.AccessKeyID = string(keyID) + hiveKeyIDs[sc.name] = string(keyID) + } + } + postReport.Secrets = append(postReport.Secrets, ss) + } + + var users []string + if adminUsername != "" { + users = append(users, adminUsername) + } + if input.UpdateCcsCreds && account.Spec.BYOC { + users = append(users, "osdCcsAdmin") + } + + newestKeyByUser := map[string]string{} + for _, username := range users { + listOutput, err := input.AwsClient.ListAccessKeys(&iam.ListAccessKeysInput{UserName: awsSdk.String(username)}) + if err != nil { + fmt.Fprintf(input.Out, " %s Could not list access keys for %s: %v\n", colorRed("[FAIL]"), username, err) + allOK = false + continue + } + + var matchKeyID string + if username == "osdCcsAdmin" { + matchKeyID = hiveKeyIDs["byoc"] + } else { + matchKeyID = hiveKeyIDs[input.AccountCRName+"-secret"] + } + + var newestDate time.Time + for _, key := range listOutput.AccessKeyMetadata { + if key.AccessKeyId == nil { + continue + } + ks := KeyStatus{ + UserName: username, + AccessKeyID: *key.AccessKeyId, + Status: string(key.Status), + } + if key.CreateDate != nil { + ks.CreateDate = *key.CreateDate + ks.Age = time.Since(*key.CreateDate) + if key.CreateDate.After(newestDate) { + newestDate = *key.CreateDate + newestKeyByUser[username] = *key.AccessKeyId + } + } + if matchKeyID != "" && *key.AccessKeyId == matchKeyID { + ks.HiveMatch = true + } + postReport.Keys = append(postReport.Keys, ks) + } + } + + for i := range postReport.Secrets { + ss := &postReport.Secrets[i] + if !ss.Exists || ss.AccessKeyID == "" { + continue + } + for _, ks := range postReport.Keys { + if ks.AccessKeyID == ss.AccessKeyID { + ss.MatchesAWS = true + break + } + } + } + + renderCredentialsTable(postReport, input.Out) + fmt.Fprintln(input.Out) + + for _, ss := range postReport.Secrets { + expectedKey := newestKeyByUser[adminUsername] + if ss.SecretName == "byoc" { + expectedKey = newestKeyByUser["osdCcsAdmin"] + } + if !ss.Exists { + fmt.Fprintf(input.Out, " %s Hive secret %s/%s not found\n", colorRed("[FAIL]"), ss.Namespace, ss.SecretName) + allOK = false + } else if expectedKey != "" && ss.AccessKeyID == expectedKey { + fmt.Fprintf(input.Out, " %s Hive secret %s/%s contains the new key\n", colorGreen("[OK]"), ss.Namespace, ss.SecretName) + } else if ss.AccessKeyID != "" && expectedKey != "" { + fmt.Fprintf(input.Out, " %s Hive secret %s/%s key does not match newest key for its user\n", colorYellow("[WARN]"), ss.Namespace, ss.SecretName) + allOK = false + } else if ss.AccessKeyID != "" { + fmt.Fprintf(input.Out, " %s Hive secret %s/%s has key %s\n", colorGreen("[OK]"), ss.Namespace, ss.SecretName, truncateKeyID(ss.AccessKeyID)) + } + } + + if allOK { + fmt.Fprintf(input.Out, " %s Post-rotation checks passed.\n", colorGreen("[OK]")) + fmt.Fprintln(input.Out, "\n Suggested follow-up steps:") + fmt.Fprintln(input.Out, " - Remove old access key(s) from the AWS account") + fmt.Fprintln(input.Out, " - Verify CCO is healthy:") + fmt.Fprintln(input.Out, " oc logs -n openshift-cloud-credential-operator deploy/cloud-credential-operator --tail=50") + fmt.Fprintln(input.Out, " - Verify credential secrets are recreated:") + fmt.Fprintln(input.Out, " oc get credentialsrequests -A -o wide") + } else { + return fmt.Errorf("post-rotation validation found issues — review warnings above") + } + return nil +} + +// preflightCheckArtifacts validates that all required cluster artifacts exist +// before any AWS keys are created or modified. If a secret is missing, the +// user is prompted to recreate it or abort. +func preflightCheckArtifacts(ctx context.Context, input *RotateSecretInput) error { + log := getLog(input.Log) + + fmt.Fprintf(input.Out, "Pre-flight: verifying client connections...\n") + + if input.AwsClient == nil { + return fmt.Errorf("aws client is not connected — cannot proceed with rotation") + } + if _, err := input.AwsClient.GetCallerIdentity(&sts.GetCallerIdentityInput{}); err != nil { + log.WithError(err).Error("AWS client connection check failed") + return fmt.Errorf("aws client is not responsive: %w", err) + } + fmt.Fprintf(input.Out, " %s AWS client connected\n", colorGreen("[OK]")) + + if input.HiveKubeClient == nil { + return fmt.Errorf("hive client is not connected — cannot proceed with rotation") + } + fmt.Fprintf(input.Out, " %s Hive client connected\n", colorGreen("[OK]")) + + if input.ManagedClusterClient == nil { + log.Warn("Managed cluster client is not connected — credential secret deletion will be skipped") + fmt.Fprintf(input.Out, " %s Managed cluster client not connected — CR secret deletion will be skipped\n", colorYellow("[WARN]")) + } else { + fmt.Fprintf(input.Out, " %s Managed cluster client connected\n", colorGreen("[OK]")) } + account := input.Account + ns := account.Spec.ClaimLinkNamespace + + type secretCheck struct { + name string + namespace string + purpose string + } + var secretsToCheck []secretCheck + if input.UpdateManagedAdminCreds { + secretsToCheck = append(secretsToCheck, + secretCheck{input.AccountCRName + "-secret", awsAccountNamespace, "AAO account secret (stores osdManagedAdmin credentials)"}, + secretCheck{"aws", ns, "Cluster namespace secret (synced to kube-system/aws-creds)"}, + ) + } + if input.UpdateCcsCreds && account.Spec.BYOC { + secretsToCheck = append(secretsToCheck, + secretCheck{"byoc", ns, "CCS admin secret (stores osdCcsAdmin credentials)"}, + ) + } + + fmt.Fprintf(input.Out, "Pre-flight: verifying required Hive secrets exist before rotation...\n") + + for _, sc := range secretsToCheck { + secret := &corev1.Secret{} + err := input.HiveKubeClient.Get(ctx, types.NamespacedName{Name: sc.name, Namespace: sc.namespace}, secret) + if err == nil { + fmt.Fprintf(input.Out, " %s %s/%s (%s)\n", colorGreen("[OK]"), sc.namespace, sc.name, sc.purpose) + continue + } + + if !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to check secret %s/%s: %w", sc.namespace, sc.name, err) + } + + fmt.Fprintf(input.Out, "\n %s Secret %s/%s not found.\n", colorRed("[MISSING]"), sc.namespace, sc.name) + fmt.Fprintf(input.Out, " Purpose: %s\n", sc.purpose) + fmt.Fprintf(input.Out, " Verify manually:\n") + fmt.Fprintf(input.Out, " oc get secret %s -n %s\n", sc.name, sc.namespace) + fmt.Fprintf(input.Out, "\n The rotation tool can create an empty secret that will be populated\n") + fmt.Fprintf(input.Out, " with the new credentials during rotation.\n") + fmt.Fprintf(input.Out, " Create secret %s/%s? (y/N): ", sc.namespace, sc.name) + + if !confirmFrom(input.In) { + return fmt.Errorf("required secret %s/%s is missing — rotation aborted", sc.namespace, sc.name) + } + + newSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: sc.name, + Namespace: sc.namespace, + }, + Data: map[string][]byte{}, + } + if err := input.HiveKubeClient.Create(ctx, newSecret); err != nil { + return fmt.Errorf("failed to create secret %s/%s: %w", sc.namespace, sc.name, err) + } + fmt.Fprintf(input.Out, " %s Created secret %s/%s\n", colorGreen("[OK]"), sc.namespace, sc.name) + } + + if input.UpdateManagedAdminCreds { + cdList := &hiveapiv1.ClusterDeploymentList{} + if err := input.HiveKubeClient.List(ctx, cdList, client.InNamespace(ns)); err != nil { + return fmt.Errorf("failed to list ClusterDeployments in %s: %w", ns, err) + } + if len(cdList.Items) == 0 { + return fmt.Errorf("no ClusterDeployments found in %s — cannot create SyncSet", ns) + } + fmt.Fprintf(input.Out, " %s ClusterDeployment found in %s\n", colorGreen("[OK]"), ns) + } + + fmt.Fprintf(input.Out, "Pre-flight: all required artifacts verified.\n\n") return nil } @@ -446,22 +963,39 @@ func syncCredentialsToCluster(ctx context.Context, kubeClient client.Client, cla // rotateCcsAdminCredentials rotates the osdCcsAdmin IAM user credentials // if the account is CCS (BYOC). -func rotateCcsAdminCredentials(ctx context.Context, awsClient awsprovider.Client, kubeClient client.Client, account *awsv1alpha1.Account, out io.Writer) error { +func rotateCcsAdminCredentials(ctx context.Context, awsClient awsprovider.Client, kubeClient client.Client, account *awsv1alpha1.Account, report *DiagnosticReport, logger *logrus.Logger, in io.Reader, out io.Writer) error { + log := getLog(logger) if !account.Spec.BYOC { fmt.Fprintln(out, "Account is not CCS, skipping osdCcsAdmin credential rotation") return nil } ccsUsername := "osdCcsAdmin" - createAccessKeyOutput, err := awsClient.CreateAccessKey(&iam.CreateAccessKeyInput{ - UserName: awsSdk.String(ccsUsername), - }) + createAccessKeyOutput, err := createAccessKeyWithRetry(awsClient, ccsUsername, account.Spec.AwsAccountID, report, in, out) if err != nil { return err } + if createAccessKeyOutput == nil || createAccessKeyOutput.AccessKey == nil { + return fmt.Errorf("AWS returned nil access key for %s", ccsUsername) + } ccsNewKeyID := *createAccessKeyOutput.AccessKey.AccessKeyId + ccsRotationCommitted := false + defer func() { + if !ccsRotationCommitted { + log.Warnf("CCS rotation did not complete — rolling back access key %s", ccsNewKeyID) + if _, delErr := awsClient.DeleteAccessKey(&iam.DeleteAccessKeyInput{ + UserName: awsSdk.String(ccsUsername), + AccessKeyId: awsSdk.String(ccsNewKeyID), + }); delErr != nil { + log.WithError(delErr).Errorf("Failed to delete orphaned CCS key %s — manual cleanup required via AWS IAM console", ccsNewKeyID) + } else { + log.Infof("Rolled back CCS access key %s", ccsNewKeyID) + } + } + }() + if err := reportAccessKeys(awsClient, ccsUsername, ccsNewKeyID, out); err != nil { return err } @@ -472,95 +1006,270 @@ func rotateCcsAdminCredentials(ctx context.Context, awsClient awsprovider.Client "aws_secret_access_key": []byte(*createAccessKeyOutput.AccessKey.SecretAccessKey), } + fmt.Fprintf(out, "Updating Hive secret %s/byoc with new osdCcsAdmin credentials...\n", account.Spec.ClaimLinkNamespace) if err := updateSecret(ctx, kubeClient, "byoc", account.Spec.ClaimLinkNamespace, newSecretData); err != nil { - return err + return fmt.Errorf("failed to update byoc secret: %w", err) } - - fmt.Fprintln(out, "Successfully rotated secrets for osdCcsAdmin") + ccsRotationCommitted = true + fmt.Fprintf(out, "%s Updated Hive secret %s/byoc\n", colorGreen("[OK]"), account.Spec.ClaimLinkNamespace) + fmt.Fprintln(out, "Successfully rotated credentials for osdCcsAdmin") return nil } -const ( - credentialRequestNamespace = "openshift-cloud-credential-operator" - credentialRequestPrefix = "openshift-" -) - -// isAWSCredentialRequest returns true when the CredentialRequest has the -// "openshift-" name prefix and its providerSpec.kind is "AWSProviderSpec". -func isAWSCredentialRequest(cr *ccov1.CredentialsRequest) bool { - if !strings.HasPrefix(cr.Name, credentialRequestPrefix) { - return false - } - if cr.Spec.ProviderSpec == nil || cr.Spec.ProviderSpec.Raw == nil { - return false - } - var provider struct { - Kind string `json:"kind"` - } - if err := json.Unmarshal(cr.Spec.ProviderSpec.Raw, &provider); err != nil { - return false - } - return provider.Kind == "AWSProviderSpec" -} +const credentialRequestNamespace = "openshift-cloud-credential-operator" // dryRunDeleteCredentialSecrets lists the secrets referenced by AWS -// CredentialRequests that would be deleted during a real rotation. -func dryRunDeleteCredentialSecrets(ctx context.Context, managedClient client.Client, out io.Writer) error { +// CredentialRequests that would be deleted during a real rotation, +// and verifies access + RBAC for each. +func dryRunDeleteCredentialSecrets(ctx context.Context, managedClient client.Client, out io.Writer, dr *dryRunChecker) error { crList := &ccov1.CredentialsRequestList{} - if err := managedClient.List(ctx, crList, client.InNamespace(credentialRequestNamespace)); err != nil { - return fmt.Errorf("failed to list CredentialRequests in %s: %w", credentialRequestNamespace, err) + if err := managedClient.List(ctx, crList); err != nil { + dr.report(false, "Managed cluster: LIST CredentialRequests (all namespaces)") + return fmt.Errorf("failed to list CredentialRequests: %w", err) } + dr.report(true, "Managed cluster: LIST CredentialRequests (all namespaces)") count := 0 for i := range crList.Items { cr := &crList.Items[i] - if !isAWSCredentialRequest(cr) { + if !isAWSCredentialRequestForDiagnostic(cr) { continue } - fmt.Fprintf(out, "[Dry Run] Would delete secret %s/%s (referenced by CredentialRequest %s)\n", - cr.Spec.SecretRef.Namespace, cr.Spec.SecretRef.Name, cr.Name) + ref := cr.Spec.SecretRef + fmt.Fprintf(out, "%s %s\n", colorBlue("[Dry Run]"), + colorBlue(fmt.Sprintf("Would delete secret %s/%s (referenced by CredentialRequest %s)", ref.Namespace, ref.Name, cr.Name))) + + s := &corev1.Secret{} + getErr := managedClient.Get(ctx, types.NamespacedName{Name: ref.Name, Namespace: ref.Namespace}, s) + if getErr != nil { + if apierrors.IsNotFound(getErr) { + dr.info("Managed cluster: secret %s/%s does not exist — delete will be skipped, CCO recreates after rotation", ref.Namespace, ref.Name) + } else { + dr.report(false, "Managed cluster: GET secret %s/%s", ref.Namespace, ref.Name) + } + } else { + dr.report(true, "Managed cluster: GET secret %s/%s", ref.Namespace, ref.Name) + dr.checkCanI(ctx, managedClient, "Managed cluster", "delete", "secrets", "", ref.Namespace) + } count++ } - fmt.Fprintf(out, "[Dry Run] Would delete %d credential secret(s) total\n", count) + fmt.Fprintf(out, "%s %s\n", colorBlue("[Dry Run]"), colorBlue(fmt.Sprintf("Would delete %d credential secret(s) total", count))) return nil } -// deleteCredentialSecrets lists AWS CredentialRequests on the managed cluster, -// resolves the secret each one references via .spec.secretRef, and deletes -// those secrets so CCO recreates them using the newly synced credentials. -func deleteCredentialSecrets(ctx context.Context, managedClient client.Client, out io.Writer) error { +// CRSecretPollInterval is the delay between CR health checks after deleting a secret. +var CRSecretPollInterval = 5 * time.Second + +// CRSecretPollTimeout is the maximum time to wait for CCO to recreate a secret. +var CRSecretPollTimeout = 60 * time.Second + +// DeleteCredentialSecrets sequentially deletes each credential secret and +// waits for CCO to recreate it before proceeding to the next one. +func DeleteCredentialSecrets(ctx context.Context, managedClient client.Client, in io.Reader, out io.Writer) error { fmt.Fprintln(out, "The 'aws-creds' secret in 'kube-system' has been updated via SyncSet.") - fmt.Fprintln(out, "Deleting credential secrets so CCO recreates them with the new credentials...") + fmt.Fprintln(out, "Deleting credential secrets one at a time, verifying CCO recreates each before continuing...") crList := &ccov1.CredentialsRequestList{} - if err := managedClient.List(ctx, crList, client.InNamespace(credentialRequestNamespace)); err != nil { - return fmt.Errorf("failed to list CredentialRequests in %s: %w", credentialRequestNamespace, err) + if err := managedClient.List(ctx, crList); err != nil { + return fmt.Errorf("failed to list CredentialRequests: %w", err) } - deletedCount := 0 + var awsCRs []*ccov1.CredentialsRequest for i := range crList.Items { cr := &crList.Items[i] - if !isAWSCredentialRequest(cr) { - continue + if isAWSCredentialRequestForDiagnostic(cr) { + awsCRs = append(awsCRs, cr) } + } + + deletedCount := 0 + for idx, cr := range awsCRs { ref := cr.Spec.SecretRef - secret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: ref.Name, - Namespace: ref.Namespace, - }, - } - if err := managedClient.Delete(ctx, secret); err != nil { + fmt.Fprintf(out, "\n [%d/%d] CredentialRequest: %s\n", idx+1, len(awsCRs), cr.Name) + + // Record the existing secret's UID so we can detect a genuine recreation + existingSecret := &corev1.Secret{} + var oldUID types.UID + if err := managedClient.Get(ctx, types.NamespacedName{Name: ref.Name, Namespace: ref.Namespace}, existingSecret); err != nil { if apierrors.IsNotFound(err) { - fmt.Fprintf(out, " Secret %s/%s already absent, skipping\n", ref.Namespace, ref.Name) + fmt.Fprintf(out, " Secret %s/%s already absent, waiting for CCO to create it...\n", ref.Namespace, ref.Name) + recreated := waitForCRSecret(ctx, managedClient, cr, oldUID, out) + if recreated { + fmt.Fprintf(out, " %s Secret recreated by CCO\n", colorGreen("[OK]")) + } continue } - return fmt.Errorf("failed to delete secret %s/%s (from CredentialRequest %s): %w", ref.Namespace, ref.Name, cr.Name, err) + return fmt.Errorf("failed to read secret %s/%s: %w", ref.Namespace, ref.Name, err) + } + oldUID = existingSecret.UID + + if err := managedClient.Delete(ctx, existingSecret); err != nil { + if apierrors.IsNotFound(err) { + fmt.Fprintf(out, " Secret %s/%s deleted between read and delete, waiting for CCO...\n", ref.Namespace, ref.Name) + } else { + return fmt.Errorf("failed to delete secret %s/%s: %w", ref.Namespace, ref.Name, err) + } + } else { + fmt.Fprintf(out, " Deleted secret %s/%s\n", ref.Namespace, ref.Name) } - fmt.Fprintf(out, " Deleted secret %s/%s (referenced by CredentialRequest %s)\n", ref.Namespace, ref.Name, cr.Name) deletedCount++ + + if idx == 0 { + fmt.Fprintf(out, " Waiting for CCO to recreate secret (first secret — validating CCO health)...\n") + } else { + fmt.Fprintf(out, " Waiting for CCO to recreate secret...\n") + } + + recreated := waitForCRSecret(ctx, managedClient, cr, oldUID, out) + if !recreated { + fmt.Fprintf(out, "\n %s Secret %s/%s was not recreated within %s.\n", colorYellow("[TIMEOUT]"), ref.Namespace, ref.Name, CRSecretPollTimeout) + fmt.Fprintf(out, " Check CCO health: oc logs -n openshift-cloud-credential-operator deploy/cloud-credential-operator\n") + fmt.Fprintf(out, " Check CR status: oc get credentialsrequest %s -n %s -o yaml\n", cr.Name, cr.Namespace) + + remaining := len(awsCRs) - idx - 1 + if remaining > 0 { + fmt.Fprintf(out, "\n %d credential secret(s) remaining to delete.\n", remaining) + fmt.Fprintf(out, " Continue deleting remaining secrets? (y/N): ") + if !confirmFrom(in) { + fmt.Fprintf(out, "\n Operation paused. %d of %d secret(s) deleted so far.\n", deletedCount, len(awsCRs)) + fmt.Fprintln(out, "\n To complete the rotation manually, delete the remaining credential secrets:") + for _, remaining := range awsCRs[idx+1:] { + rRef := remaining.Spec.SecretRef + fmt.Fprintf(out, " oc delete secret %s -n %s\n", rRef.Name, rRef.Namespace) + } + fmt.Fprintln(out, "\n Then verify all secrets are recreated:") + fmt.Fprintln(out, " oc get credentialsrequests -A -o wide") + return nil + } + } + } else { + fmt.Fprintf(out, " %s Secret %s/%s recreated by CCO\n", colorGreen("[OK]"), ref.Namespace, ref.Name) + } } - fmt.Fprintf(out, "Deleted %d credential secret(s). CCO will recreate them with the updated credentials.\n", deletedCount) + fmt.Fprintf(out, "\nDeleted and verified %d credential secret(s).\n", deletedCount) return nil } + +// waitForCRSecret polls until CCO recreates the secret referenced by a CredentialRequest +// (detected by a new UID and Provisioned status), returning false on timeout. +func waitForCRSecret(ctx context.Context, managedClient client.Client, cr *ccov1.CredentialsRequest, oldUID types.UID, out io.Writer) bool { + ref := cr.Spec.SecretRef + deadline := time.Now().Add(CRSecretPollTimeout) + + for time.Now().Before(deadline) { + select { + case <-ctx.Done(): + fmt.Fprintf(out, "\n") + return false + case <-time.After(CRSecretPollInterval): + } + + secret := &corev1.Secret{} + if err := managedClient.Get(ctx, types.NamespacedName{Name: ref.Name, Namespace: ref.Namespace}, secret); err != nil { + fmt.Fprintf(out, ".") + continue + } + + // If we have an old UID, ensure this is a new object (not the pre-delete one still visible) + if oldUID != "" && secret.UID == oldUID { + fmt.Fprintf(out, ".") + continue + } + + updatedCR := &ccov1.CredentialsRequest{} + if err := managedClient.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.Namespace}, updatedCR); err != nil { + fmt.Fprintf(out, ".") + continue + } + + if updatedCR.Status.Provisioned { + hasFailed := false + for _, cond := range updatedCR.Status.Conditions { + if cond.Type == ccov1.CredentialsProvisionFailure && cond.Status == corev1.ConditionTrue { + hasFailed = true + fmt.Fprintf(out, "\n %s CR %s has CredentialsProvisionFailure: %s\n", colorRed("[WARN]"), cr.Name, cond.Message) + } + } + if !hasFailed { + fmt.Fprintf(out, "\n") + return true + } + } + fmt.Fprintf(out, ".") + } + fmt.Fprintf(out, "\n") + return false +} + +type dryRunChecker struct { + out io.Writer + allOK bool +} + +func (d *dryRunChecker) would(format string, args ...any) { + fmt.Fprintf(d.out, "%s %s %s\n", colorBlue("[Dry Run]"), colorBlue("Would"), colorBlue(fmt.Sprintf(format, args...))) +} + +func (d *dryRunChecker) report(ok bool, format string, args ...any) { + status := colorGreen("[OK]") + if !ok { + status = colorRed("[FAIL]") + d.allOK = false + } + fmt.Fprintf(d.out, "%s %s %s\n", colorBlue("[Dry Run]"), status, fmt.Sprintf(format, args...)) +} + +// checkCanI performs a SelfSubjectAccessReview to verify the caller has RBAC +// permission for the given verb on the resource in the namespace. +func (d *dryRunChecker) checkCanI(ctx context.Context, k8sClient client.Client, label, verb, resource, group, namespace string) { + ssar := &authv1.SelfSubjectAccessReview{ + Spec: authv1.SelfSubjectAccessReviewSpec{ + ResourceAttributes: &authv1.ResourceAttributes{ + Namespace: namespace, + Verb: verb, + Group: group, + Resource: resource, + }, + }, + } + + _ = authv1.AddToScheme(k8sClient.Scheme()) + + if err := k8sClient.Create(ctx, ssar); err != nil { + fmt.Fprintf(d.out, "%s %s %s: auth can-i %s %s in %s (could not verify: %v)\n", + colorBlue("[Dry Run]"), colorYellow("[SKIP]"), label, verb, resource, namespace, err) + return + } + + if !ssar.Status.Allowed { + reason := ssar.Status.Reason + if reason == "" { + reason = "denied by RBAC" + } + d.report(false, "%s: auth can-i %s %s in %s (%s)", label, verb, resource, namespace, reason) + } else { + d.report(true, "%s: auth can-i %s %s in %s", label, verb, resource, namespace) + } +} + +// checkResourceAccess combines a GET (resource exists + readable) with a +// SelfSubjectAccessReview for the target verb. +func (d *dryRunChecker) checkResourceAccess(ctx context.Context, k8sClient client.Client, label, verb, resource, namespace, name string) { + obj := &corev1.Secret{} + getErr := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, obj) + if getErr != nil { + if apierrors.IsNotFound(getErr) { + d.report(false, "%s: secret %s/%s not found — rotation requires this secret to exist", label, namespace, name) + } else { + d.report(false, "%s: GET secret %s/%s (%v)", label, namespace, name, getErr) + } + return + } + d.report(true, "%s: GET secret %s/%s", label, namespace, name) + d.checkCanI(ctx, k8sClient, label, verb, resource, "", namespace) +} + +func (d *dryRunChecker) info(format string, args ...any) { + fmt.Fprintf(d.out, "%s %s %s\n", colorBlue("[Dry Run]"), colorBlue("[INFO]"), colorBlue(fmt.Sprintf(format, args...))) +} diff --git a/pkg/controller/rotatesecret_test.go b/pkg/controller/rotatesecret_test.go index 27c641a14..73b70ee77 100644 --- a/pkg/controller/rotatesecret_test.go +++ b/pkg/controller/rotatesecret_test.go @@ -4,7 +4,9 @@ import ( "bytes" "context" "encoding/json" + "strings" "testing" + "time" awsSdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/iam" @@ -118,7 +120,7 @@ func testManagedClient(t *testing.T, crs ...runtime.Object) client.Client { func mockSimulateAllAllowed(mockClient *mock_aws.MockClient) *gomock.Call { return mockClient.EXPECT(). - SimulatePrincipalPolicy(gomock.Any()). + SimulatePrincipalPolicy(gomock.Any()).AnyTimes(). Return(&iam.SimulatePrincipalPolicyOutput{ EvaluationResults: []iamTypes.EvaluationResult{ {EvalActionName: awsSdk.String("iam:CreateAccessKey"), EvalDecision: iamTypes.PolicyEvaluationDecisionTypeAllowed}, @@ -151,7 +153,7 @@ func mockCreateAccessKey(mockClient *mock_aws.MockClient) *gomock.Call { func mockListAccessKeys(mockClient *mock_aws.MockClient, oldKeyID, newKeyID string) *gomock.Call { return mockClient.EXPECT(). - ListAccessKeys(gomock.Any()). + ListAccessKeys(gomock.Any()).AnyTimes(). Return(&iam.ListAccessKeysOutput{ AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ {AccessKeyId: awsSdk.String(oldKeyID)}, @@ -167,16 +169,18 @@ func TestRotateSecret_STSAccount(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) kubeCli := fake.NewClientBuilder().WithScheme(testScheme(t)).Build() err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.Error(t, err) @@ -191,16 +195,18 @@ func TestRotateSecret_MissingIamUserIdLabel(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) kubeCli := fake.NewClientBuilder().WithScheme(testScheme(t)).Build() err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.Error(t, err) @@ -230,6 +236,7 @@ func TestRotateSecret_SuccessfulRotation(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) mockSimulateAllAllowed(mockClient) mockCreateAccessKey(mockClient) @@ -238,20 +245,21 @@ func TestRotateSecret_SuccessfulRotation(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + AccountCRName: "test-account", + Account: account, + UpdateManagedAdminCreds: true, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.NoError(t, err) assert.Contains(t, out.String(), "AWS creds updated on hive.") assert.Contains(t, out.String(), "Successfully rotated access keys for osdManagedAdmin-abcd") - assert.Contains(t, out.String(), "OLDKEY999 (old - should be removed)") - assert.Contains(t, out.String(), "NEWKEY123 (new - just created)") - assert.Contains(t, out.String(), "rh-aws-saml-login") + assert.Contains(t, out.String(), "OLDK...Y999 (old - should be removed)") + assert.Contains(t, out.String(), "NEWK...Y123 (new - just created)") + assert.Contains(t, out.String(), "old - should be removed") } func TestRotateSecret_SuccessfulRotationWithCCS(t *testing.T) { @@ -287,6 +295,7 @@ func TestRotateSecret_SuccessfulRotationWithCCS(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) mockSimulateAllAllowed(mockClient) // First call: osdManagedAdmin key @@ -299,18 +308,19 @@ func TestRotateSecret_SuccessfulRotationWithCCS(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - UpdateCcsCreds: true, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + AccountCRName: "test-account", + Account: account, + UpdateManagedAdminCreds: true, + UpdateCcsCreds: true, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.NoError(t, err) assert.Contains(t, out.String(), "Successfully rotated access keys for osdManagedAdmin-abcd") - assert.Contains(t, out.String(), "Successfully rotated secrets for osdCcsAdmin") + assert.Contains(t, out.String(), "Successfully rotated credentials for osdCcsAdmin") } func TestRotateSecret_CCSFlagOnNonBYOCAccount(t *testing.T) { @@ -335,6 +345,7 @@ func TestRotateSecret_CCSFlagOnNonBYOCAccount(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) mockSimulateAllAllowed(mockClient) mockCreateAccessKey(mockClient) @@ -343,13 +354,14 @@ func TestRotateSecret_CCSFlagOnNonBYOCAccount(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - UpdateCcsCreds: true, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + AccountCRName: "test-account", + Account: account, + UpdateManagedAdminCreds: true, + UpdateCcsCreds: true, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.NoError(t, err) @@ -378,6 +390,7 @@ func TestRotateSecret_AdminUsernameFallback(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) // First simulate call fails (suffixed username) mockClient.EXPECT(). @@ -413,12 +426,13 @@ func TestRotateSecret_AdminUsernameFallback(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.NoError(t, err) @@ -448,6 +462,7 @@ func TestRotateSecret_CreateAccessKeyNoSuchEntityFallback(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) // First SimulatePrincipalPolicy for the suffixed user (passes), // second for the unsuffixed fallback user after CreateAccessKey returns NoSuchEntityException. @@ -478,12 +493,13 @@ func TestRotateSecret_CreateAccessKeyNoSuchEntityFallback(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.NoError(t, err) @@ -513,6 +529,7 @@ func TestRotateSecret_SyncSetTimeout(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) mockSimulateAllAllowed(mockClient) mockCreateAccessKey(mockClient) @@ -521,12 +538,13 @@ func TestRotateSecret_SyncSetTimeout(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.Error(t, err) @@ -568,6 +586,7 @@ func TestRotateSecret_StaleSyncSetIsUpdated(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) mockSimulateAllAllowed(mockClient) mockCreateAccessKey(mockClient) @@ -576,12 +595,13 @@ func TestRotateSecret_StaleSyncSetIsUpdated(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: testManagedClient(t), - Out: out, + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, }) assert.NoError(t, err) @@ -649,6 +669,7 @@ func TestVerifyRotationPermissions(t *testing.T) { defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) expectedArn := "arn:aws:iam::" + tt.accountID + ":user/" + tt.username mockClient.EXPECT(). @@ -695,6 +716,7 @@ func TestRotateSecret_ExplicitAdminUsername(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) mockSimulateAllAllowed(mockClient) mockCreateAccessKey(mockClient) @@ -703,6 +725,7 @@ func TestRotateSecret_ExplicitAdminUsername(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ + UpdateManagedAdminCreds: true, AccountCRName: "test-account", Account: account, OsdManagedAdminUsername: "osdManagedAdmin-custom", @@ -720,17 +743,17 @@ func TestRotateSecret_DryRun(t *testing.T) { account := testAccount(false, false) secrets := testSecrets() - objs := append(secrets, account) + objs := append(secrets, account, testClusterDeployment()) scheme := testScheme(t) kubeCli := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(objs...).Build() ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) - // Only SimulatePrincipalPolicy should be called (read-only validation). - // No CreateAccessKey calls should happen. mockSimulateAllAllowed(mockClient) + mockListAccessKeys(mockClient, "AKIAEXISTING01", "AKIAEXISTING02") crIngress := &ccov1.CredentialsRequest{ ObjectMeta: metav1.ObjectMeta{ @@ -745,7 +768,7 @@ func TestRotateSecret_DryRun(t *testing.T) { crCustom := &ccov1.CredentialsRequest{ ObjectMeta: metav1.ObjectMeta{ Name: "custom-cr", - Namespace: credentialRequestNamespace, + Namespace: "customer-namespace", }, Spec: ccov1.CredentialsRequestSpec{ SecretRef: corev1.ObjectReference{Name: "custom-creds", Namespace: "custom-ns"}, @@ -757,13 +780,14 @@ func TestRotateSecret_DryRun(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - DryRun: true, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: managedClient, - Out: out, + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + DryRun: true, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: managedClient, + Out: out, }) assert.NoError(t, err) @@ -775,7 +799,7 @@ func TestRotateSecret_DryRun(t *testing.T) { assert.Contains(t, output, "[Dry Run] Would delete secret openshift-ingress-operator/ingress-creds (referenced by CredentialRequest openshift-ingress)") assert.NotContains(t, output, "custom-creds") assert.Contains(t, output, "[Dry Run] Would delete 1 credential secret(s) total") - assert.Contains(t, output, "[Dry Run] No changes were made.") + assert.Contains(t, output, "All pre-flight checks passed") // Verify secrets were NOT modified secret := &corev1.Secret{} @@ -787,37 +811,57 @@ func TestRotateSecret_DryRun(t *testing.T) { func TestRotateSecret_DryRunWithCCS(t *testing.T) { account := testAccount(true, false) secrets := testSecrets() + byocSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "byoc", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIACCS001")}, + } - objs := append(secrets, account) + objs := append(secrets, account, testClusterDeployment(), byocSecret) scheme := testScheme(t) kubeCli := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(objs...).Build() ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) mockSimulateAllAllowed(mockClient) + mockClient.EXPECT().ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String("osdManagedAdmin-abcd"), + }).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("AKIAMANAGED01")}, + }, + }, nil) + mockClient.EXPECT().ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String("osdCcsAdmin"), + }).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdCcsAdmin"), AccessKeyId: awsSdk.String("AKIACCS001")}, + }, + }, nil) managedClient := testManagedClient(t) out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - DryRun: true, - UpdateCcsCreds: true, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: managedClient, - Out: out, + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + DryRun: true, + UpdateCcsCreds: true, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: managedClient, + Out: out, }) assert.NoError(t, err) output := out.String() assert.Contains(t, output, "[Dry Run] Would create a new IAM access key for user: osdCcsAdmin") assert.Contains(t, output, "[Dry Run] Would update secret uhc-production-test/byoc") - assert.Contains(t, output, "[Dry Run] No changes were made.") + assert.Contains(t, output, "All pre-flight checks passed") } func awsProviderSpec(t *testing.T) *runtime.RawExtension { @@ -837,11 +881,17 @@ func gcpProviderSpec(t *testing.T) *runtime.RawExtension { func TestRotateSecret_CredentialSecretDeletion(t *testing.T) { origInterval := SyncPollInterval origRetries := SyncMaxRetries + origCRInterval := CRSecretPollInterval + origCRTimeout := CRSecretPollTimeout SyncPollInterval = 0 SyncMaxRetries = 1 + CRSecretPollInterval = 1 * time.Millisecond + CRSecretPollTimeout = 0 defer func() { SyncPollInterval = origInterval SyncMaxRetries = origRetries + CRSecretPollInterval = origCRInterval + CRSecretPollTimeout = origCRTimeout }() account := testAccount(false, false) @@ -856,6 +906,7 @@ func TestRotateSecret_CredentialSecretDeletion(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) mockSimulateAllAllowed(mockClient) mockCreateAccessKey(mockClient) @@ -899,7 +950,7 @@ func TestRotateSecret_CredentialSecretDeletion(t *testing.T) { }, } crCustom := &ccov1.CredentialsRequest{ - ObjectMeta: metav1.ObjectMeta{Name: "custom-cr", Namespace: credentialRequestNamespace}, + ObjectMeta: metav1.ObjectMeta{Name: "custom-cr", Namespace: "customer-namespace"}, Spec: ccov1.CredentialsRequestSpec{ SecretRef: corev1.ObjectReference{Name: "custom-creds", Namespace: "custom-ns"}, ProviderSpec: awsProviderSpec(t), @@ -914,29 +965,225 @@ func TestRotateSecret_CredentialSecretDeletion(t *testing.T) { out := &bytes.Buffer{} err := RotateSecret(context.Background(), &RotateSecretInput{ - AccountCRName: "test-account", - Account: account, - AwsClient: mockClient, - HiveKubeClient: kubeCli, - ManagedClusterClient: managedClient, - Out: out, + AccountCRName: "test-account", + Account: account, + UpdateManagedAdminCreds: true, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: managedClient, + In: strings.NewReader("n\n"), + Out: out, }) assert.NoError(t, err) output := out.String() + + // With no CCO in tests, the first secret deletion triggers a timeout + // and the user declines to continue, pausing the operation. assert.Contains(t, output, "Deleted secret openshift-ingress-operator/ingress-creds") - assert.Contains(t, output, "Deleted secret openshift-machine-api/machine-api-creds") + assert.Contains(t, output, "[TIMEOUT]") + assert.Contains(t, output, "Operation paused") + assert.Contains(t, output, "oc delete secret machine-api-creds") assert.NotContains(t, output, "gcp-creds") assert.NotContains(t, output, "custom-creds") - assert.Contains(t, output, "Deleted 2 credential secret(s)") // Verify that the GCP and custom secrets still exist s := &corev1.Secret{} assert.NoError(t, managedClient.Get(context.Background(), types.NamespacedName{Name: "gcp-creds", Namespace: "openshift-gcp"}, s)) assert.NoError(t, managedClient.Get(context.Background(), types.NamespacedName{Name: "custom-creds", Namespace: "custom-ns"}, s)) + // Second AWS secret should NOT have been deleted (operation was paused) + assert.NoError(t, managedClient.Get(context.Background(), types.NamespacedName{Name: "machine-api-creds", Namespace: "openshift-machine-api"}, s)) + // Verify all CredentialRequests still exist (they should not be deleted) remaining := &ccov1.CredentialsRequestList{} - assert.NoError(t, managedClient.List(context.Background(), remaining, client.InNamespace(credentialRequestNamespace))) + assert.NoError(t, managedClient.List(context.Background(), remaining)) assert.Len(t, remaining.Items, 4) } + +func TestRotateSecret_CCSOnlyRotation(t *testing.T) { + origInterval := SyncPollInterval + origRetries := SyncMaxRetries + SyncPollInterval = 0 + SyncMaxRetries = 1 + defer func() { + SyncPollInterval = origInterval + SyncMaxRetries = origRetries + }() + + account := testAccount(true, false) + // Remove iamUserId label — CCS-only should not require it + delete(account.Labels, "iamUserId") + + byocSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "byoc", Namespace: "uhc-production-test"}, + Data: map[string][]byte{ + "aws_user_name": []byte("old-ccs-user"), + "aws_access_key_id": []byte("old-ccs-key"), + "aws_secret_access_key": []byte("old-ccs-secret"), + }, + } + + objs := []runtime.Object{account, byocSecret} + scheme := testScheme(t) + kubeCli := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(objs...).Build() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + + // CCS-only: should NOT call SimulatePrincipalPolicy for managed-admin + // Only CreateAccessKey + ListAccessKeys for osdCcsAdmin + mockClient.EXPECT().CreateAccessKey(&iam.CreateAccessKeyInput{ + UserName: awsSdk.String("osdCcsAdmin"), + }).Return(&iam.CreateAccessKeyOutput{ + AccessKey: &iamTypes.AccessKey{ + UserName: awsSdk.String("osdCcsAdmin"), + AccessKeyId: awsSdk.String("NEWCCSKEY123"), + SecretAccessKey: awsSdk.String("NEWCCSSECRET456"), + }, + }, nil) + mockClient.EXPECT().ListAccessKeys(gomock.Any()).AnyTimes().Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdCcsAdmin"), AccessKeyId: awsSdk.String("NEWCCSKEY123")}, + }, + }, nil) + + out := &bytes.Buffer{} + + err := RotateSecret(context.Background(), &RotateSecretInput{ + AccountCRName: "test-account", + Account: account, + UpdateCcsCreds: true, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + Out: out, + }) + + assert.NoError(t, err) + output := out.String() + assert.Contains(t, output, "Successfully rotated credentials for osdCcsAdmin") + assert.NotContains(t, output, "Phase 1") + assert.NotContains(t, output, "Phase 2") + assert.NotContains(t, output, "osdManagedAdmin") +} + +func TestRotateSecret_CCSOnlyDryRun(t *testing.T) { + account := testAccount(true, false) + // Remove iamUserId label — CCS-only dry-run should not require it + delete(account.Labels, "iamUserId") + + byocSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "byoc", Namespace: "uhc-production-test"}, + Data: map[string][]byte{"aws_access_key_id": []byte("AKIACCS001")}, + } + + objs := []runtime.Object{account, byocSecret} + scheme := testScheme(t) + kubeCli := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(objs...).Build() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + + // CCS-only dry-run: should only ListAccessKeys for osdCcsAdmin + mockClient.EXPECT().ListAccessKeys(&iam.ListAccessKeysInput{ + UserName: awsSdk.String("osdCcsAdmin"), + }).Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdCcsAdmin"), AccessKeyId: awsSdk.String("AKIACCS001")}, + }, + }, nil) + + out := &bytes.Buffer{} + + err := RotateSecret(context.Background(), &RotateSecretInput{ + AccountCRName: "test-account", + Account: account, + DryRun: true, + UpdateCcsCreds: true, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + Out: out, + }) + + assert.NoError(t, err) + output := out.String() + assert.Contains(t, output, "Would create a new IAM access key for user: osdCcsAdmin") + assert.Contains(t, output, "All pre-flight checks passed") + // Should NOT contain managed-admin preflight + assert.NotContains(t, output, "osdManagedAdmin") +} + +func TestRotateSecret_CreateAccessKeyNoFallbackForCCS(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + + nse := &iamTypes.NoSuchEntityException{Message: awsSdk.String("user not found")} + mockClient.EXPECT().CreateAccessKey(&iam.CreateAccessKeyInput{ + UserName: awsSdk.String("osdCcsAdmin"), + }).Return(nil, nse) + + out := &bytes.Buffer{} + _, err := createAccessKeyWithRetry(mockClient, "osdCcsAdmin", "123456789012", nil, nil, out) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "IAM user osdCcsAdmin not found") + assert.NotContains(t, out.String(), "osdManagedAdmin") +} + +func TestRotateSecret_PostRotationValidationFailure(t *testing.T) { + origInterval := SyncPollInterval + origRetries := SyncMaxRetries + SyncPollInterval = 0 + SyncMaxRetries = 1 + defer func() { + SyncPollInterval = origInterval + SyncMaxRetries = origRetries + }() + + account := testAccount(false, false) + secrets := testSecrets() + cd := testClusterDeployment() + cs := testClusterSync(true) + + objs := append(secrets, account, cd, cs) + scheme := testScheme(t) + kubeCli := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(objs...).Build() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockClient := mock_aws.NewMockClient(ctrl) + mockGetCallerIdentity(mockClient) + mockSimulateAllAllowed(mockClient) + mockCreateAccessKey(mockClient) + + // Post-rotation ListAccessKeys returns a key that doesn't match the new key + now := time.Now() + mockClient.EXPECT().ListAccessKeys(gomock.Any()).AnyTimes().Return(&iam.ListAccessKeysOutput{ + AccessKeyMetadata: []iamTypes.AccessKeyMetadata{ + {UserName: awsSdk.String("osdManagedAdmin-abcd"), AccessKeyId: awsSdk.String("WRONGKEY999"), CreateDate: &now, Status: iamTypes.StatusTypeActive}, + }, + }, nil) + + out := &bytes.Buffer{} + + err := RotateSecret(context.Background(), &RotateSecretInput{ + UpdateManagedAdminCreds: true, + AccountCRName: "test-account", + Account: account, + AwsClient: mockClient, + HiveKubeClient: kubeCli, + ManagedClusterClient: testManagedClient(t), + Out: out, + }) + + // Rotation itself succeeds; post-rotation check finds key mismatch and logs a warning + assert.NoError(t, err) + output := out.String() + assert.Contains(t, output, "key does not match newest key") + assert.Contains(t, output, "Post-rotation check encountered issues") +} diff --git a/pkg/provider/aws/client.go b/pkg/provider/aws/client.go index 32d08a5a0..a2d8f38e9 100644 --- a/pkg/provider/aws/client.go +++ b/pkg/provider/aws/client.go @@ -63,6 +63,7 @@ type Client interface { CreateAccessKey(*iam.CreateAccessKeyInput) (*iam.CreateAccessKeyOutput, error) DeleteAccessKey(*iam.DeleteAccessKeyInput) (*iam.DeleteAccessKeyOutput, error) ListAccessKeys(*iam.ListAccessKeysInput) (*iam.ListAccessKeysOutput, error) + GetAccessKeyLastUsed(ctx context.Context, input *iam.GetAccessKeyLastUsedInput) (*iam.GetAccessKeyLastUsedOutput, error) GetUser(*iam.GetUserInput) (*iam.GetUserOutput, error) CreateUser(*iam.CreateUserInput) (*iam.CreateUserOutput, error) ListUsers(*iam.ListUsersInput) (*iam.ListUsersOutput, error) @@ -314,6 +315,10 @@ func (c *AwsClient) ListAccessKeys(input *iam.ListAccessKeysInput) (*iam.ListAcc return c.iamClient.ListAccessKeys(context.TODO(), input) } +func (c *AwsClient) GetAccessKeyLastUsed(ctx context.Context, input *iam.GetAccessKeyLastUsedInput) (*iam.GetAccessKeyLastUsedOutput, error) { + return c.iamClient.GetAccessKeyLastUsed(ctx, input) +} + func (c *AwsClient) GetUser(input *iam.GetUserInput) (*iam.GetUserOutput, error) { return c.iamClient.GetUser(context.TODO(), input) } diff --git a/pkg/provider/aws/mock/client.go b/pkg/provider/aws/mock/client.go index 8fe36df15..c5dbb04a4 100644 --- a/pkg/provider/aws/mock/client.go +++ b/pkg/provider/aws/mock/client.go @@ -10,6 +10,7 @@ package mock import ( + context "context" reflect "reflect" cloudtrail "github.com/aws/aws-sdk-go-v2/service/cloudtrail" @@ -546,6 +547,21 @@ func (mr *MockClientMockRecorder) DetachUserPolicy(arg0 any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DetachUserPolicy", reflect.TypeOf((*MockClient)(nil).DetachUserPolicy), arg0) } +// GetAccessKeyLastUsed mocks base method. +func (m *MockClient) GetAccessKeyLastUsed(ctx context.Context, input *iam.GetAccessKeyLastUsedInput) (*iam.GetAccessKeyLastUsedOutput, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetAccessKeyLastUsed", ctx, input) + ret0, _ := ret[0].(*iam.GetAccessKeyLastUsedOutput) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetAccessKeyLastUsed indicates an expected call of GetAccessKeyLastUsed. +func (mr *MockClientMockRecorder) GetAccessKeyLastUsed(ctx, input any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAccessKeyLastUsed", reflect.TypeOf((*MockClient)(nil).GetAccessKeyLastUsed), ctx, input) +} + // GetCallerIdentity mocks base method. func (m *MockClient) GetCallerIdentity(arg0 *sts.GetCallerIdentityInput) (*sts.GetCallerIdentityOutput, error) { m.ctrl.T.Helper()