From bc8d695026c46b0d90ea883bcc7fafb742cf503c Mon Sep 17 00:00:00 2001
From: Tom Keller <kellertk@amazon.com>
Date: Fri, 8 May 2026 11:01:28 -0700
Subject: [PATCH] feat: add more retry logic and better logging

Wraps exportAccountId and validateCredentials calls in retryAndBackoff.
Closes #1681. Adds a label parameter to retryAndBackoff for better
info-level log messages.
---
 src/helpers.ts       |  10 +++--
 src/index.ts         |  91 ++++++++++++++++++++------------------
 test/helpers.test.ts |  23 ++++++++++
 test/index.test.ts   | 101 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 178 insertions(+), 47 deletions(-)
diff --git a/src/helpers.ts b/src/helpers.ts
index 82e25d5..cf32029 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -189,6 +189,7 @@ export async function retryAndBackoff<T>(
   maxRetries = 12,
   retries = 0,
   base = 50,
+  label?: string,
 ): Promise<T> {
   try {
     return await fn();
@@ -200,20 +201,21 @@ export async function retryAndBackoff<T>(
     // It's retryable, so sleep and retry.
     const delay = Math.random() * (2 ** retries * base);
     const nextRetry = retries + 1;
+    const opName = label ? ` ${label}` : '';
 
-    core.debug(
-      `retryAndBackoff: attempt ${nextRetry} of ${maxRetries} failed: ${errorMessage(err)}. ` +
+    core.info(
+      `Retry${opName}: attempt ${nextRetry} of ${maxRetries} failed: ${errorMessage(err)}. ` +
         `Retrying after ${Math.floor(delay)}ms.`,
     );
 
     await sleep(delay);
 
     if (nextRetry >= maxRetries) {
-      core.debug('retryAndBackoff: reached max retries; giving up.');
+      core.info(`Retry${opName}: reached max retries (${maxRetries}); giving up.`);
       throw err;
     }
 
-    return await retryAndBackoff(fn, isRetryable, maxRetries, nextRetry, base);
+    return await retryAndBackoff(fn, isRetryable, maxRetries, nextRetry, base, label);
   }
 }
 
diff --git a/src/index.ts b/src/index.ts
index b87ef84..a53de0a 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -90,6 +90,9 @@ export async function run() {
       maxRetries = 1;
     }
 
+    const withRetry = <T>(fn: () => Promise<T>, label: string): Promise<T> =>
+      retryAndBackoff(fn, !disableRetry, maxRetries, 0, 50, label);
+
     // Logic to decide whether to attempt to use OIDC or not
     const useGitHubOIDCProvider = () => {
       if (forceSkipOidc) return false;
@@ -161,13 +164,9 @@ export async function run() {
     // Else, export credentials provided as input
     if (useGitHubOIDCProvider()) {
       try {
-        webIdentityToken = await retryAndBackoff(
-          async () => {
-            return core.getIDToken(audience);
-          },
-          !disableRetry,
-          maxRetries,
-        );
+        webIdentityToken = await withRetry(async () => {
+          return core.getIDToken(audience);
+        }, 'getIDToken');
       } catch (error) {
         throw new Error(`getIDToken call failed: ${errorMessage(error)}`);
       }
@@ -188,16 +187,22 @@ export async function run() {
       }
     } else if (!webIdentityTokenFile && !roleChaining) {
       // Proceed only if credentials can be picked up
-      await credentialsClient.validateCredentials(undefined, roleChaining, expectedAccountIds);
-      sourceAccountId = await exportAccountId(credentialsClient, maskAccountId);
+      await withRetry(
+        () => credentialsClient.validateCredentials(undefined, roleChaining, expectedAccountIds),
+        'validateCredentials',
+      );
+      sourceAccountId = await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId');
     }
 
     if (AccessKeyId || roleChaining) {
       // Validate that the SDK can actually pick up credentials.
       // This validates cases where this action is using existing environment credentials,
       // and cases where the user intended to provide input credentials but the secrets inputs resolved to empty strings.
-      await credentialsClient.validateCredentials(AccessKeyId, roleChaining, expectedAccountIds);
-      sourceAccountId = await exportAccountId(credentialsClient, maskAccountId);
+      await withRetry(
+        () => credentialsClient.validateCredentials(AccessKeyId, roleChaining, expectedAccountIds),
+        'validateCredentials',
+      );
+      sourceAccountId = await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId');
     }
     if (customTags && (useGitHubOIDCProvider() || webIdentityTokenFile)) {
       core.warning(
@@ -210,27 +215,23 @@ export async function run() {
     if (roleToAssume) {
       let roleCredentials: AssumeRoleCommandOutput;
       do {
-        roleCredentials = await retryAndBackoff(
-          async () => {
-            return assumeRole({
-              credentialsClient,
-              sourceAccountId,
-              roleToAssume,
-              roleExternalId,
-              roleDuration,
-              roleSessionName,
-              roleSkipSessionTagging,
-              transitiveTagKeys,
-              webIdentityTokenFile,
-              webIdentityToken,
-              inlineSessionPolicy,
-              managedSessionPolicies,
-              customTags,
-            });
-          },
-          !disableRetry,
-          maxRetries,
-        );
+        roleCredentials = await withRetry(async () => {
+          return assumeRole({
+            credentialsClient,
+            sourceAccountId,
+            roleToAssume,
+            roleExternalId,
+            roleDuration,
+            roleSessionName,
+            roleSkipSessionTagging,
+            transitiveTagKeys,
+            webIdentityTokenFile,
+            webIdentityToken,
+            inlineSessionPolicy,
+            managedSessionPolicies,
+            customTags,
+          });
+        }, 'AssumeRole');
       } while (specialCharacterWorkaround && !verifyKeys(roleCredentials.Credentials));
       core.info(`Authenticated as assumedRoleId ${roleCredentials.AssumedRoleUser?.AssumedRoleId}`);
       exportCredentials(roleCredentials.Credentials, outputCredentials, outputEnvCredentials);
@@ -241,14 +242,18 @@ export async function run() {
       // If we are using a profile, don't validate credentials yet (since they most likely won't be in the environment).
       // Wait until after creds are written to the profile file to try validation.
       if ((!process.env.GITHUB_ACTIONS || AccessKeyId) && !awsProfile) {
-        await credentialsClient.validateCredentials(
-          roleCredentials.Credentials?.AccessKeyId,
-          roleChaining,
-          expectedAccountIds,
+        await withRetry(
+          () =>
+            credentialsClient.validateCredentials(
+              roleCredentials.Credentials?.AccessKeyId,
+              roleChaining,
+              expectedAccountIds,
+            ),
+          'validateCredentials',
         );
       }
       if (outputEnvCredentials) {
-        await exportAccountId(credentialsClient, maskAccountId);
+        await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId');
       }
 
       // Write profile files if profile mode is enabled
@@ -261,10 +266,14 @@ export async function run() {
         // We then validate the credentials to make sure they work.
         if (AccessKeyId || !process.env.GITHUB_ACTIONS) {
           writeProfileFiles(awsProfile, roleCredentials.Credentials, region, true);
-          await credentialsClient.validateCredentials(
-            roleCredentials.Credentials.AccessKeyId,
-            roleChaining,
-            expectedAccountIds,
+          await withRetry(
+            () =>
+              credentialsClient.validateCredentials(
+                roleCredentials.Credentials?.AccessKeyId,
+                roleChaining,
+                expectedAccountIds,
+              ),
+            'validateCredentials',
           );
         } else {
           writeProfileFiles(awsProfile, roleCredentials.Credentials, region, overwriteAwsProfile);
diff --git a/test/helpers.test.ts b/test/helpers.test.ts
index 09e768a..cd4c29e 100644
--- a/test/helpers.test.ts
+++ b/test/helpers.test.ts
@@ -24,6 +24,29 @@ describe('Configure AWS Credentials helpers', {}, () => {
     await expect(helpers.retryAndBackoff(fn, false)).rejects.toMatch('i am not retryable');
     expect(fn).toHaveBeenCalledTimes(1);
   });
+  it('retries and logs with label at info level', {}, async () => {
+    helpers.withsleep(() => Promise.resolve());
+    const fn = vi.fn().mockRejectedValueOnce(new Error('transient')).mockResolvedValueOnce('success');
+    const result = await helpers.retryAndBackoff(fn, true, 3, 0, 50, 'TestOp');
+    expect(result).toBe('success');
+    expect(fn).toHaveBeenCalledTimes(2);
+    expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry TestOp: attempt 1 of 3 failed'));
+    helpers.reset();
+  });
+  it('logs max retries reached with label', {}, async () => {
+    helpers.withsleep(() => Promise.resolve());
+    const fn = vi.fn().mockRejectedValue(new Error('persistent'));
+    await expect(helpers.retryAndBackoff(fn, true, 2, 0, 50, 'TestOp')).rejects.toThrow('persistent');
+    expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry TestOp: reached max retries (2)'));
+    helpers.reset();
+  });
+  it('retries without a label (backward compat)', {}, async () => {
+    helpers.withsleep(() => Promise.resolve());
+    const fn = vi.fn().mockRejectedValueOnce(new Error('transient')).mockResolvedValueOnce('ok');
+    await helpers.retryAndBackoff(fn, true, 3);
+    expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry: attempt 1 of 3 failed'));
+    helpers.reset();
+  });
   it('can output creds when told to', {}, () => {
     helpers.exportCredentials(
       { AccessKeyId: 'test', SecretAccessKey: 'test', SessionToken: 'test', Expiration: new Date(8640000000000000) },
diff --git a/test/index.test.ts b/test/index.test.ts
index 0ae9193..bdccc1f 100644
--- a/test/index.test.ts
+++ b/test/index.test.ts
@@ -7,8 +7,9 @@ import {
 } from '@aws-sdk/client-sts';
 import { mockClient } from 'aws-sdk-client-mock';
 import { fs, vol } from 'memfs';
-import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { CredentialsClient } from '../src/CredentialsClient';
+import * as helpers from '../src/helpers';
 import { run } from '../src/index';
 import * as profileManager from '../src/profileManager';
 import mocks from './mockinputs.test';
@@ -24,11 +25,17 @@ describe('Configure AWS Credentials', {}, () => {
     mockedSTSClient.reset();
     vi.mocked(core.getInput).mockReturnValue('');
     vi.mocked(core.getMultilineInput).mockReturnValue([]);
+    // Inject no-op sleep to avoid real delays during retries in tests
+    helpers.withsleep(() => Promise.resolve());
     // Remove any existing environment variables before each test to prevent the
     // SDK from picking them up
     process.env = { ...mocks.envs };
   });
 
+  afterEach(() => {
+    helpers.reset();
+  });
+
   describe('GitHub OIDC Authentication', {}, () => {
     beforeEach(() => {
       vi.clearAllMocks();
@@ -317,7 +324,12 @@ describe('Configure AWS Credentials', {}, () => {
 
   describe('Odd inputs', {}, () => {
     it('fails when github env vars are missing', {}, async () => {
-      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS));
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_ASSUMEROLE_INPUTS));
+      mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockResolvedValue({
+        accessKeyId: 'MYAWSACCESSKEYID',
+      });
       delete process.env.GITHUB_REPOSITORY;
       delete process.env.GITHUB_SHA;
       await run();
@@ -369,6 +381,10 @@ describe('Configure AWS Credentials', {}, () => {
     });
     it('fails if doing OIDC without the ACTIONS_ID_TOKEN_REQUEST_TOKEN', {}, async () => {
       vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS));
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue(
+        new Error('No credentials available'),
+      );
       delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN;
       await run();
       expect(core.setFailed).toHaveBeenCalled();
@@ -376,6 +392,10 @@ describe('Configure AWS Credentials', {}, () => {
     it("gets new creds if told to reuse existing but they're invalid", {}, async () => {
       vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.USE_EXISTING_CREDENTIALS_INPUTS));
       mockedSTSClient.on(GetCallerIdentityCommand).rejects();
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue(
+        new Error('No credentials available'),
+      );
       await run();
       expect(core.notice).toHaveBeenCalledWith('No valid credentials exist. Running as normal.');
     });
@@ -1140,4 +1160,81 @@ describe('Configure AWS Credentials', {}, () => {
       expect(core.setFailed).toHaveBeenCalledWith(expect.stringContaining('whitespace'));
     });
   });
+
+  describe('Retry Behavior', {}, () => {
+    it('retries exportAccountId on transient GetCallerIdentity failure', async () => {
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS));
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockResolvedValue({
+        accessKeyId: 'MYAWSACCESSKEYID',
+      });
+      mockedSTSClient
+        .on(GetCallerIdentityCommand)
+        .rejectsOnce(new Error('throttled'))
+        .resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      await run();
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry exportAccountId'));
+      expect(core.setFailed).not.toHaveBeenCalled();
+    });
+
+    it('retries validateCredentials on transient loadCredentials failure', async () => {
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS));
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials')
+        .mockRejectedValueOnce(new Error('network glitch'))
+        .mockResolvedValue({ accessKeyId: 'MYAWSACCESSKEYID' });
+      mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      await run();
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry validateCredentials'));
+      expect(core.setFailed).not.toHaveBeenCalled();
+    });
+
+    it('respects disable-retry for validateCredentials', async () => {
+      vi.mocked(core.getInput).mockImplementation(
+        mocks.getInput({
+          ...mocks.IAM_USER_INPUTS,
+          'disable-retry': 'true',
+        }),
+      );
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue(
+        new Error('network glitch'),
+      );
+      await run();
+      expect(core.setFailed).toHaveBeenCalled();
+      expect(core.info).not.toHaveBeenCalledWith(expect.stringContaining('Retry'));
+    });
+
+    it('retries exportAccountId after role assumption (issue #1681)', async () => {
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS));
+      vi.mocked(core.getIDToken).mockResolvedValue('testoidctoken');
+      mockedSTSClient.on(AssumeRoleWithWebIdentityCommand).resolves(mocks.outputs.STS_CREDENTIALS);
+      mockedSTSClient
+        .on(GetCallerIdentityCommand)
+        .rejectsOnce(new Error('The security token included in the request is invalid'))
+        .resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = 'fake-token';
+      await run();
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry exportAccountId'));
+      expect(core.info).toHaveBeenCalledWith(
+        expect.stringContaining('The security token included in the request is invalid'),
+      );
+      expect(core.setFailed).not.toHaveBeenCalled();
+    });
+
+    it('retries AssumeRole and shows info-level retry messages', async () => {
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS));
+      vi.mocked(core.getIDToken).mockResolvedValue('testoidctoken');
+      mockedSTSClient
+        .on(AssumeRoleWithWebIdentityCommand)
+        .rejectsOnce(new Error('Rate exceeded'))
+        .resolves(mocks.outputs.STS_CREDENTIALS);
+      mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = 'fake-token';
+      await run();
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry AssumeRole'));
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Rate exceeded'));
+      expect(core.setFailed).not.toHaveBeenCalled();
+    });
+  });
 });