feat: add more retry logic and better logging (#1764)

Wraps exportAccountId and validateCredentials calls in retryAndBackoff. Closes #1681. Adds a label parameter to retryAndBackoff for better info-level log messages.
2026-06-05 14:18:19 +00:00 · 2026-05-11 10:00:14 -07:00 · 2026-05-11 10:00:14 -07:00 · 540d0c13ae
commit 540d0c13ae
parent 07ada0fe07
4 changed files with 178 additions and 47 deletions
--- a/src/helpers.ts
+++ b/src/helpers.ts
@ -189,6 +189,7 @@ export async function retryAndBackoff<T>(
  maxRetries = 12,
  retries = 0,
  base = 50,
+  label?: string,
 ): Promise<T> {
  try {
    return await fn();
@ -200,20 +201,21 @@ export async function retryAndBackoff<T>(
    // It's retryable, so sleep and retry.
    const delay = Math.random() * (2 ** retries * base);
    const nextRetry = retries + 1;
+    const opName = label ? ` ${label}` : '';

-    core.debug(
-      `retryAndBackoff: attempt ${nextRetry} of ${maxRetries} failed: ${errorMessage(err)}. ` +
+    core.info(
+      `Retry${opName}: attempt ${nextRetry} of ${maxRetries} failed: ${errorMessage(err)}. ` +
        `Retrying after ${Math.floor(delay)}ms.`,
    );

    await sleep(delay);

    if (nextRetry >= maxRetries) {
-      core.debug('retryAndBackoff: reached max retries; giving up.');
+      core.info(`Retry${opName}: reached max retries (${maxRetries}); giving up.`);
      throw err;
    }

-    return await retryAndBackoff(fn, isRetryable, maxRetries, nextRetry, base);
+    return await retryAndBackoff(fn, isRetryable, maxRetries, nextRetry, base, label);
  }
 }

--- a/src/index.ts
+++ b/src/index.ts
@ -90,6 +90,9 @@ export async function run() {
      maxRetries = 1;
    }

+    const withRetry = <T>(fn: () => Promise<T>, label: string): Promise<T> =>
+      retryAndBackoff(fn, !disableRetry, maxRetries, 0, 50, label);
+
    // Logic to decide whether to attempt to use OIDC or not
    const useGitHubOIDCProvider = () => {
      if (forceSkipOidc) return false;
@ -161,13 +164,9 @@ export async function run() {
    // Else, export credentials provided as input
    if (useGitHubOIDCProvider()) {
      try {
-        webIdentityToken = await retryAndBackoff(
-          async () => {
-            return core.getIDToken(audience);
-          },
-          !disableRetry,
-          maxRetries,
-        );
+        webIdentityToken = await withRetry(async () => {
+          return core.getIDToken(audience);
+        }, 'getIDToken');
      } catch (error) {
        throw new Error(`getIDToken call failed: ${errorMessage(error)}`);
      }
@ -188,16 +187,22 @@ export async function run() {
      }
    } else if (!webIdentityTokenFile && !roleChaining) {
      // Proceed only if credentials can be picked up
-      await credentialsClient.validateCredentials(undefined, roleChaining, expectedAccountIds);
-      sourceAccountId = await exportAccountId(credentialsClient, maskAccountId);
+      await withRetry(
+        () => credentialsClient.validateCredentials(undefined, roleChaining, expectedAccountIds),
+        'validateCredentials',
+      );
+      sourceAccountId = await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId');
    }

    if (AccessKeyId || roleChaining) {
      // Validate that the SDK can actually pick up credentials.
      // This validates cases where this action is using existing environment credentials,
      // and cases where the user intended to provide input credentials but the secrets inputs resolved to empty strings.
-      await credentialsClient.validateCredentials(AccessKeyId, roleChaining, expectedAccountIds);
-      sourceAccountId = await exportAccountId(credentialsClient, maskAccountId);
+      await withRetry(
+        () => credentialsClient.validateCredentials(AccessKeyId, roleChaining, expectedAccountIds),
+        'validateCredentials',
+      );
+      sourceAccountId = await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId');
    }
    if (customTags && (useGitHubOIDCProvider() || webIdentityTokenFile)) {
      core.warning(
@ -210,27 +215,23 @@ export async function run() {
    if (roleToAssume) {
      let roleCredentials: AssumeRoleCommandOutput;
      do {
-        roleCredentials = await retryAndBackoff(
-          async () => {
-            return assumeRole({
-              credentialsClient,
-              sourceAccountId,
-              roleToAssume,
-              roleExternalId,
-              roleDuration,
-              roleSessionName,
-              roleSkipSessionTagging,
-              transitiveTagKeys,
-              webIdentityTokenFile,
-              webIdentityToken,
-              inlineSessionPolicy,
-              managedSessionPolicies,
-              customTags,
-            });
-          },
-          !disableRetry,
-          maxRetries,
-        );
+        roleCredentials = await withRetry(async () => {
+          return assumeRole({
+            credentialsClient,
+            sourceAccountId,
+            roleToAssume,
+            roleExternalId,
+            roleDuration,
+            roleSessionName,
+            roleSkipSessionTagging,
+            transitiveTagKeys,
+            webIdentityTokenFile,
+            webIdentityToken,
+            inlineSessionPolicy,
+            managedSessionPolicies,
+            customTags,
+          });
+        }, 'AssumeRole');
      } while (specialCharacterWorkaround && !verifyKeys(roleCredentials.Credentials));
      core.info(`Authenticated as assumedRoleId ${roleCredentials.AssumedRoleUser?.AssumedRoleId}`);
      exportCredentials(roleCredentials.Credentials, outputCredentials, outputEnvCredentials);
@ -241,14 +242,18 @@ export async function run() {
      // If we are using a profile, don't validate credentials yet (since they most likely won't be in the environment).
      // Wait until after creds are written to the profile file to try validation.
      if ((!process.env.GITHUB_ACTIONS || AccessKeyId) && !awsProfile) {
-        await credentialsClient.validateCredentials(
-          roleCredentials.Credentials?.AccessKeyId,
-          roleChaining,
-          expectedAccountIds,
+        await withRetry(
+          () =>
+            credentialsClient.validateCredentials(
+              roleCredentials.Credentials?.AccessKeyId,
+              roleChaining,
+              expectedAccountIds,
+            ),
+          'validateCredentials',
        );
      }
      if (outputEnvCredentials) {
-        await exportAccountId(credentialsClient, maskAccountId);
+        await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId');
      }

      // Write profile files if profile mode is enabled
@ -261,10 +266,14 @@ export async function run() {
        // We then validate the credentials to make sure they work.
        if (AccessKeyId || !process.env.GITHUB_ACTIONS) {
          writeProfileFiles(awsProfile, roleCredentials.Credentials, region, true);
-          await credentialsClient.validateCredentials(
-            roleCredentials.Credentials.AccessKeyId,
-            roleChaining,
-            expectedAccountIds,
+          await withRetry(
+            () =>
+              credentialsClient.validateCredentials(
+                roleCredentials.Credentials?.AccessKeyId,
+                roleChaining,
+                expectedAccountIds,
+              ),
+            'validateCredentials',
          );
        } else {
          writeProfileFiles(awsProfile, roleCredentials.Credentials, region, overwriteAwsProfile);
--- a/test/helpers.test.ts
+++ b/test/helpers.test.ts
@ -24,6 +24,29 @@ describe('Configure AWS Credentials helpers', {}, () => {
    await expect(helpers.retryAndBackoff(fn, false)).rejects.toMatch('i am not retryable');
    expect(fn).toHaveBeenCalledTimes(1);
  });
+  it('retries and logs with label at info level', {}, async () => {
+    helpers.withsleep(() => Promise.resolve());
+    const fn = vi.fn().mockRejectedValueOnce(new Error('transient')).mockResolvedValueOnce('success');
+    const result = await helpers.retryAndBackoff(fn, true, 3, 0, 50, 'TestOp');
+    expect(result).toBe('success');
+    expect(fn).toHaveBeenCalledTimes(2);
+    expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry TestOp: attempt 1 of 3 failed'));
+    helpers.reset();
+  });
+  it('logs max retries reached with label', {}, async () => {
+    helpers.withsleep(() => Promise.resolve());
+    const fn = vi.fn().mockRejectedValue(new Error('persistent'));
+    await expect(helpers.retryAndBackoff(fn, true, 2, 0, 50, 'TestOp')).rejects.toThrow('persistent');
+    expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry TestOp: reached max retries (2)'));
+    helpers.reset();
+  });
+  it('retries without a label (backward compat)', {}, async () => {
+    helpers.withsleep(() => Promise.resolve());
+    const fn = vi.fn().mockRejectedValueOnce(new Error('transient')).mockResolvedValueOnce('ok');
+    await helpers.retryAndBackoff(fn, true, 3);
+    expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry: attempt 1 of 3 failed'));
+    helpers.reset();
+  });
  it('can output creds when told to', {}, () => {
    helpers.exportCredentials(
      { AccessKeyId: 'test', SecretAccessKey: 'test', SessionToken: 'test', Expiration: new Date(8640000000000000) },
--- a/test/index.test.ts
+++ b/test/index.test.ts
@ -7,8 +7,9 @@ import {
 } from '@aws-sdk/client-sts';
 import { mockClient } from 'aws-sdk-client-mock';
 import { fs, vol } from 'memfs';
-import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { CredentialsClient } from '../src/CredentialsClient';
+import * as helpers from '../src/helpers';
 import { run } from '../src/index';
 import * as profileManager from '../src/profileManager';
 import mocks from './mockinputs.test';
@ -24,11 +25,17 @@ describe('Configure AWS Credentials', {}, () => {
    mockedSTSClient.reset();
    vi.mocked(core.getInput).mockReturnValue('');
    vi.mocked(core.getMultilineInput).mockReturnValue([]);
+    // Inject no-op sleep to avoid real delays during retries in tests
+    helpers.withsleep(() => Promise.resolve());
    // Remove any existing environment variables before each test to prevent the
    // SDK from picking them up
    process.env = { ...mocks.envs };
  });

+  afterEach(() => {
+    helpers.reset();
+  });
+
  describe('GitHub OIDC Authentication', {}, () => {
    beforeEach(() => {
      vi.clearAllMocks();
@ -317,7 +324,12 @@ describe('Configure AWS Credentials', {}, () => {

  describe('Odd inputs', {}, () => {
    it('fails when github env vars are missing', {}, async () => {
-      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS));
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_ASSUMEROLE_INPUTS));
+      mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockResolvedValue({
+        accessKeyId: 'MYAWSACCESSKEYID',
+      });
      delete process.env.GITHUB_REPOSITORY;
      delete process.env.GITHUB_SHA;
      await run();
@ -369,6 +381,10 @@ describe('Configure AWS Credentials', {}, () => {
    });
    it('fails if doing OIDC without the ACTIONS_ID_TOKEN_REQUEST_TOKEN', {}, async () => {
      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS));
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue(
+        new Error('No credentials available'),
+      );
      delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN;
      await run();
      expect(core.setFailed).toHaveBeenCalled();
@ -376,6 +392,10 @@ describe('Configure AWS Credentials', {}, () => {
    it("gets new creds if told to reuse existing but they're invalid", {}, async () => {
      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.USE_EXISTING_CREDENTIALS_INPUTS));
      mockedSTSClient.on(GetCallerIdentityCommand).rejects();
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue(
+        new Error('No credentials available'),
+      );
      await run();
      expect(core.notice).toHaveBeenCalledWith('No valid credentials exist. Running as normal.');
    });
@ -1140,4 +1160,81 @@ describe('Configure AWS Credentials', {}, () => {
      expect(core.setFailed).toHaveBeenCalledWith(expect.stringContaining('whitespace'));
    });
  });
+
+  describe('Retry Behavior', {}, () => {
+    it('retries exportAccountId on transient GetCallerIdentity failure', async () => {
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS));
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockResolvedValue({
+        accessKeyId: 'MYAWSACCESSKEYID',
+      });
+      mockedSTSClient
+        .on(GetCallerIdentityCommand)
+        .rejectsOnce(new Error('throttled'))
+        .resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      await run();
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry exportAccountId'));
+      expect(core.setFailed).not.toHaveBeenCalled();
+    });
+
+    it('retries validateCredentials on transient loadCredentials failure', async () => {
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS));
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials')
+        .mockRejectedValueOnce(new Error('network glitch'))
+        .mockResolvedValue({ accessKeyId: 'MYAWSACCESSKEYID' });
+      mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      await run();
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry validateCredentials'));
+      expect(core.setFailed).not.toHaveBeenCalled();
+    });
+
+    it('respects disable-retry for validateCredentials', async () => {
+      vi.mocked(core.getInput).mockImplementation(
+        mocks.getInput({
+          ...mocks.IAM_USER_INPUTS,
+          'disable-retry': 'true',
+        }),
+      );
+      // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method
+      vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue(
+        new Error('network glitch'),
+      );
+      await run();
+      expect(core.setFailed).toHaveBeenCalled();
+      expect(core.info).not.toHaveBeenCalledWith(expect.stringContaining('Retry'));
+    });
+
+    it('retries exportAccountId after role assumption (issue #1681)', async () => {
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS));
+      vi.mocked(core.getIDToken).mockResolvedValue('testoidctoken');
+      mockedSTSClient.on(AssumeRoleWithWebIdentityCommand).resolves(mocks.outputs.STS_CREDENTIALS);
+      mockedSTSClient
+        .on(GetCallerIdentityCommand)
+        .rejectsOnce(new Error('The security token included in the request is invalid'))
+        .resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = 'fake-token';
+      await run();
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry exportAccountId'));
+      expect(core.info).toHaveBeenCalledWith(
+        expect.stringContaining('The security token included in the request is invalid'),
+      );
+      expect(core.setFailed).not.toHaveBeenCalled();
+    });
+
+    it('retries AssumeRole and shows info-level retry messages', async () => {
+      vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS));
+      vi.mocked(core.getIDToken).mockResolvedValue('testoidctoken');
+      mockedSTSClient
+        .on(AssumeRoleWithWebIdentityCommand)
+        .rejectsOnce(new Error('Rate exceeded'))
+        .resolves(mocks.outputs.STS_CREDENTIALS);
+      mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY });
+      process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = 'fake-token';
+      await run();
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry AssumeRole'));
+      expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Rate exceeded'));
+      expect(core.setFailed).not.toHaveBeenCalled();
+    });
+  });
 });