From bc8d695026c46b0d90ea883bcc7fafb742cf503c Mon Sep 17 00:00:00 2001 From: Tom Keller Date: Fri, 8 May 2026 11:01:28 -0700 Subject: [PATCH] feat: add more retry logic and better logging Wraps exportAccountId and validateCredentials calls in retryAndBackoff. Closes #1681. Adds a label parameter to retryAndBackoff for better info-level log messages. --- src/helpers.ts | 10 +++-- src/index.ts | 91 ++++++++++++++++++++------------------ test/helpers.test.ts | 23 ++++++++++ test/index.test.ts | 101 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 178 insertions(+), 47 deletions(-) diff --git a/src/helpers.ts b/src/helpers.ts index 82e25d5..cf32029 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -189,6 +189,7 @@ export async function retryAndBackoff( maxRetries = 12, retries = 0, base = 50, + label?: string, ): Promise { try { return await fn(); @@ -200,20 +201,21 @@ export async function retryAndBackoff( // It's retryable, so sleep and retry. const delay = Math.random() * (2 ** retries * base); const nextRetry = retries + 1; + const opName = label ? ` ${label}` : ''; - core.debug( - `retryAndBackoff: attempt ${nextRetry} of ${maxRetries} failed: ${errorMessage(err)}. ` + + core.info( + `Retry${opName}: attempt ${nextRetry} of ${maxRetries} failed: ${errorMessage(err)}. ` + `Retrying after ${Math.floor(delay)}ms.`, ); await sleep(delay); if (nextRetry >= maxRetries) { - core.debug('retryAndBackoff: reached max retries; giving up.'); + core.info(`Retry${opName}: reached max retries (${maxRetries}); giving up.`); throw err; } - return await retryAndBackoff(fn, isRetryable, maxRetries, nextRetry, base); + return await retryAndBackoff(fn, isRetryable, maxRetries, nextRetry, base, label); } } diff --git a/src/index.ts b/src/index.ts index b87ef84..a53de0a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -90,6 +90,9 @@ export async function run() { maxRetries = 1; } + const withRetry = (fn: () => Promise, label: string): Promise => + retryAndBackoff(fn, !disableRetry, maxRetries, 0, 50, label); + // Logic to decide whether to attempt to use OIDC or not const useGitHubOIDCProvider = () => { if (forceSkipOidc) return false; @@ -161,13 +164,9 @@ export async function run() { // Else, export credentials provided as input if (useGitHubOIDCProvider()) { try { - webIdentityToken = await retryAndBackoff( - async () => { - return core.getIDToken(audience); - }, - !disableRetry, - maxRetries, - ); + webIdentityToken = await withRetry(async () => { + return core.getIDToken(audience); + }, 'getIDToken'); } catch (error) { throw new Error(`getIDToken call failed: ${errorMessage(error)}`); } @@ -188,16 +187,22 @@ export async function run() { } } else if (!webIdentityTokenFile && !roleChaining) { // Proceed only if credentials can be picked up - await credentialsClient.validateCredentials(undefined, roleChaining, expectedAccountIds); - sourceAccountId = await exportAccountId(credentialsClient, maskAccountId); + await withRetry( + () => credentialsClient.validateCredentials(undefined, roleChaining, expectedAccountIds), + 'validateCredentials', + ); + sourceAccountId = await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId'); } if (AccessKeyId || roleChaining) { // Validate that the SDK can actually pick up credentials. // This validates cases where this action is using existing environment credentials, // and cases where the user intended to provide input credentials but the secrets inputs resolved to empty strings. - await credentialsClient.validateCredentials(AccessKeyId, roleChaining, expectedAccountIds); - sourceAccountId = await exportAccountId(credentialsClient, maskAccountId); + await withRetry( + () => credentialsClient.validateCredentials(AccessKeyId, roleChaining, expectedAccountIds), + 'validateCredentials', + ); + sourceAccountId = await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId'); } if (customTags && (useGitHubOIDCProvider() || webIdentityTokenFile)) { core.warning( @@ -210,27 +215,23 @@ export async function run() { if (roleToAssume) { let roleCredentials: AssumeRoleCommandOutput; do { - roleCredentials = await retryAndBackoff( - async () => { - return assumeRole({ - credentialsClient, - sourceAccountId, - roleToAssume, - roleExternalId, - roleDuration, - roleSessionName, - roleSkipSessionTagging, - transitiveTagKeys, - webIdentityTokenFile, - webIdentityToken, - inlineSessionPolicy, - managedSessionPolicies, - customTags, - }); - }, - !disableRetry, - maxRetries, - ); + roleCredentials = await withRetry(async () => { + return assumeRole({ + credentialsClient, + sourceAccountId, + roleToAssume, + roleExternalId, + roleDuration, + roleSessionName, + roleSkipSessionTagging, + transitiveTagKeys, + webIdentityTokenFile, + webIdentityToken, + inlineSessionPolicy, + managedSessionPolicies, + customTags, + }); + }, 'AssumeRole'); } while (specialCharacterWorkaround && !verifyKeys(roleCredentials.Credentials)); core.info(`Authenticated as assumedRoleId ${roleCredentials.AssumedRoleUser?.AssumedRoleId}`); exportCredentials(roleCredentials.Credentials, outputCredentials, outputEnvCredentials); @@ -241,14 +242,18 @@ export async function run() { // If we are using a profile, don't validate credentials yet (since they most likely won't be in the environment). // Wait until after creds are written to the profile file to try validation. if ((!process.env.GITHUB_ACTIONS || AccessKeyId) && !awsProfile) { - await credentialsClient.validateCredentials( - roleCredentials.Credentials?.AccessKeyId, - roleChaining, - expectedAccountIds, + await withRetry( + () => + credentialsClient.validateCredentials( + roleCredentials.Credentials?.AccessKeyId, + roleChaining, + expectedAccountIds, + ), + 'validateCredentials', ); } if (outputEnvCredentials) { - await exportAccountId(credentialsClient, maskAccountId); + await withRetry(() => exportAccountId(credentialsClient, maskAccountId), 'exportAccountId'); } // Write profile files if profile mode is enabled @@ -261,10 +266,14 @@ export async function run() { // We then validate the credentials to make sure they work. if (AccessKeyId || !process.env.GITHUB_ACTIONS) { writeProfileFiles(awsProfile, roleCredentials.Credentials, region, true); - await credentialsClient.validateCredentials( - roleCredentials.Credentials.AccessKeyId, - roleChaining, - expectedAccountIds, + await withRetry( + () => + credentialsClient.validateCredentials( + roleCredentials.Credentials?.AccessKeyId, + roleChaining, + expectedAccountIds, + ), + 'validateCredentials', ); } else { writeProfileFiles(awsProfile, roleCredentials.Credentials, region, overwriteAwsProfile); diff --git a/test/helpers.test.ts b/test/helpers.test.ts index 09e768a..cd4c29e 100644 --- a/test/helpers.test.ts +++ b/test/helpers.test.ts @@ -24,6 +24,29 @@ describe('Configure AWS Credentials helpers', {}, () => { await expect(helpers.retryAndBackoff(fn, false)).rejects.toMatch('i am not retryable'); expect(fn).toHaveBeenCalledTimes(1); }); + it('retries and logs with label at info level', {}, async () => { + helpers.withsleep(() => Promise.resolve()); + const fn = vi.fn().mockRejectedValueOnce(new Error('transient')).mockResolvedValueOnce('success'); + const result = await helpers.retryAndBackoff(fn, true, 3, 0, 50, 'TestOp'); + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry TestOp: attempt 1 of 3 failed')); + helpers.reset(); + }); + it('logs max retries reached with label', {}, async () => { + helpers.withsleep(() => Promise.resolve()); + const fn = vi.fn().mockRejectedValue(new Error('persistent')); + await expect(helpers.retryAndBackoff(fn, true, 2, 0, 50, 'TestOp')).rejects.toThrow('persistent'); + expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry TestOp: reached max retries (2)')); + helpers.reset(); + }); + it('retries without a label (backward compat)', {}, async () => { + helpers.withsleep(() => Promise.resolve()); + const fn = vi.fn().mockRejectedValueOnce(new Error('transient')).mockResolvedValueOnce('ok'); + await helpers.retryAndBackoff(fn, true, 3); + expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry: attempt 1 of 3 failed')); + helpers.reset(); + }); it('can output creds when told to', {}, () => { helpers.exportCredentials( { AccessKeyId: 'test', SecretAccessKey: 'test', SessionToken: 'test', Expiration: new Date(8640000000000000) }, diff --git a/test/index.test.ts b/test/index.test.ts index 0ae9193..bdccc1f 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -7,8 +7,9 @@ import { } from '@aws-sdk/client-sts'; import { mockClient } from 'aws-sdk-client-mock'; import { fs, vol } from 'memfs'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { CredentialsClient } from '../src/CredentialsClient'; +import * as helpers from '../src/helpers'; import { run } from '../src/index'; import * as profileManager from '../src/profileManager'; import mocks from './mockinputs.test'; @@ -24,11 +25,17 @@ describe('Configure AWS Credentials', {}, () => { mockedSTSClient.reset(); vi.mocked(core.getInput).mockReturnValue(''); vi.mocked(core.getMultilineInput).mockReturnValue([]); + // Inject no-op sleep to avoid real delays during retries in tests + helpers.withsleep(() => Promise.resolve()); // Remove any existing environment variables before each test to prevent the // SDK from picking them up process.env = { ...mocks.envs }; }); + afterEach(() => { + helpers.reset(); + }); + describe('GitHub OIDC Authentication', {}, () => { beforeEach(() => { vi.clearAllMocks(); @@ -317,7 +324,12 @@ describe('Configure AWS Credentials', {}, () => { describe('Odd inputs', {}, () => { it('fails when github env vars are missing', {}, async () => { - vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS)); + vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_ASSUMEROLE_INPUTS)); + mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY }); + // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method + vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockResolvedValue({ + accessKeyId: 'MYAWSACCESSKEYID', + }); delete process.env.GITHUB_REPOSITORY; delete process.env.GITHUB_SHA; await run(); @@ -369,6 +381,10 @@ describe('Configure AWS Credentials', {}, () => { }); it('fails if doing OIDC without the ACTIONS_ID_TOKEN_REQUEST_TOKEN', {}, async () => { vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS)); + // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method + vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue( + new Error('No credentials available'), + ); delete process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN; await run(); expect(core.setFailed).toHaveBeenCalled(); @@ -376,6 +392,10 @@ describe('Configure AWS Credentials', {}, () => { it("gets new creds if told to reuse existing but they're invalid", {}, async () => { vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.USE_EXISTING_CREDENTIALS_INPUTS)); mockedSTSClient.on(GetCallerIdentityCommand).rejects(); + // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method + vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue( + new Error('No credentials available'), + ); await run(); expect(core.notice).toHaveBeenCalledWith('No valid credentials exist. Running as normal.'); }); @@ -1140,4 +1160,81 @@ describe('Configure AWS Credentials', {}, () => { expect(core.setFailed).toHaveBeenCalledWith(expect.stringContaining('whitespace')); }); }); + + describe('Retry Behavior', {}, () => { + it('retries exportAccountId on transient GetCallerIdentity failure', async () => { + vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS)); + // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method + vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockResolvedValue({ + accessKeyId: 'MYAWSACCESSKEYID', + }); + mockedSTSClient + .on(GetCallerIdentityCommand) + .rejectsOnce(new Error('throttled')) + .resolves({ ...mocks.outputs.GET_CALLER_IDENTITY }); + await run(); + expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry exportAccountId')); + expect(core.setFailed).not.toHaveBeenCalled(); + }); + + it('retries validateCredentials on transient loadCredentials failure', async () => { + vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.IAM_USER_INPUTS)); + // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method + vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials') + .mockRejectedValueOnce(new Error('network glitch')) + .mockResolvedValue({ accessKeyId: 'MYAWSACCESSKEYID' }); + mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY }); + await run(); + expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry validateCredentials')); + expect(core.setFailed).not.toHaveBeenCalled(); + }); + + it('respects disable-retry for validateCredentials', async () => { + vi.mocked(core.getInput).mockImplementation( + mocks.getInput({ + ...mocks.IAM_USER_INPUTS, + 'disable-retry': 'true', + }), + ); + // biome-ignore lint/suspicious/noExplicitAny: any required to mock private method + vi.spyOn(CredentialsClient.prototype as any, 'loadCredentials').mockRejectedValue( + new Error('network glitch'), + ); + await run(); + expect(core.setFailed).toHaveBeenCalled(); + expect(core.info).not.toHaveBeenCalledWith(expect.stringContaining('Retry')); + }); + + it('retries exportAccountId after role assumption (issue #1681)', async () => { + vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS)); + vi.mocked(core.getIDToken).mockResolvedValue('testoidctoken'); + mockedSTSClient.on(AssumeRoleWithWebIdentityCommand).resolves(mocks.outputs.STS_CREDENTIALS); + mockedSTSClient + .on(GetCallerIdentityCommand) + .rejectsOnce(new Error('The security token included in the request is invalid')) + .resolves({ ...mocks.outputs.GET_CALLER_IDENTITY }); + process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = 'fake-token'; + await run(); + expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry exportAccountId')); + expect(core.info).toHaveBeenCalledWith( + expect.stringContaining('The security token included in the request is invalid'), + ); + expect(core.setFailed).not.toHaveBeenCalled(); + }); + + it('retries AssumeRole and shows info-level retry messages', async () => { + vi.mocked(core.getInput).mockImplementation(mocks.getInput(mocks.GH_OIDC_INPUTS)); + vi.mocked(core.getIDToken).mockResolvedValue('testoidctoken'); + mockedSTSClient + .on(AssumeRoleWithWebIdentityCommand) + .rejectsOnce(new Error('Rate exceeded')) + .resolves(mocks.outputs.STS_CREDENTIALS); + mockedSTSClient.on(GetCallerIdentityCommand).resolves({ ...mocks.outputs.GET_CALLER_IDENTITY }); + process.env.ACTIONS_ID_TOKEN_REQUEST_TOKEN = 'fake-token'; + await run(); + expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Retry AssumeRole')); + expect(core.info).toHaveBeenCalledWith(expect.stringContaining('Rate exceeded')); + expect(core.setFailed).not.toHaveBeenCalled(); + }); + }); });