From 2ab324f87405add4538bb55dc5ddf7199580f8c3 Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Fri, 3 May 2024 21:08:46 +0400 Subject: [PATCH 01/11] Update retry helper --- src/Agent.Worker/RetryHelper.cs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Agent.Worker/RetryHelper.cs b/src/Agent.Worker/RetryHelper.cs index 9e3aabe1b0..bfc2707098 100644 --- a/src/Agent.Worker/RetryHelper.cs +++ b/src/Agent.Worker/RetryHelper.cs @@ -1,4 +1,5 @@ using System; +using System.Reflection; using System.Threading.Tasks; using Microsoft.TeamFoundation.DistributedTask.WebApi; @@ -17,7 +18,6 @@ public static int ExponentialDelay(int retryNumber) return (int)(Math.Pow(retryNumber + 1, 2) * 1000); } - public RetryHelper(IExecutionContext executionContext, int maxRetries = 3) { Debug = (str) => executionContext.Debug(str); @@ -47,19 +47,18 @@ public async Task Retry(Func> action, Func timeDelayInte } catch (Exception ex) { - if (!shouldRetryOnException(ex) || ExhaustedRetryCount(retryCounter)) + if (!shouldRetryOnException(ex) || ExhaustedRetryCount(retryCounter, action.Method)) { throw; } - Warning($"Intermittent failure attempting to call the restapis {action.Method}. Retry attempt {retryCounter}. Exception: {ex.Message} "); + Warning($"Intermittent failure attempting to call the method {action.Method}. Retry attempt {retryCounter}. Exception: {ex.Message} "); var delay = timeDelayInterval(retryCounter); await Task.Delay(delay); } retryCounter++; } } while (true); - } /// @@ -87,7 +86,7 @@ public async Task RetryStep(Func action, Func timeDelayInterval) Debug($"Invoking Method: {action.Method}. Attempt count: {retryCounter}"); await action(); - if (ExecutionContext.Result != TaskResult.Failed || ExhaustedRetryCount(retryCounter)) + if (ExecutionContext.Result != TaskResult.Failed || ExhaustedRetryCount(retryCounter, action.Method)) { return; } @@ -100,7 +99,7 @@ public async Task RetryStep(Func action, Func timeDelayInterval) } catch (Exception ex) { - if (!ShouldRetryStepOnException(ex) || ExhaustedRetryCount(retryCounter)) + if (!ShouldRetryStepOnException(ex) || ExhaustedRetryCount(retryCounter, action.Method)) { throw; } @@ -115,11 +114,11 @@ public async Task RetryStep(Func action, Func timeDelayInterval) } while (true); } - private bool ExhaustedRetryCount(int retryCount) + private bool ExhaustedRetryCount(int retryCount, MethodInfo methodInfo) { if (retryCount >= MaxRetries) { - Debug($"Failure attempting to call the restapi and retry counter is exhausted"); + Debug($"Failure attempting to call the {methodInfo} and retry counter is exhausted"); return true; } return false; From 3d6a973c6e1a0453c958cadc650a55403fca9b1d Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Mon, 6 May 2024 15:16:42 +0400 Subject: [PATCH 02/11] Add retry foc cloud-init logs --- src/Agent.Worker/DiagnosticLogManager.cs | 57 +++++++++++++++--------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/src/Agent.Worker/DiagnosticLogManager.cs b/src/Agent.Worker/DiagnosticLogManager.cs index 055c237bca..5a5e9d9088 100644 --- a/src/Agent.Worker/DiagnosticLogManager.cs +++ b/src/Agent.Worker/DiagnosticLogManager.cs @@ -145,7 +145,7 @@ public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, executionContext.Debug("Dumping cloud-init logs."); string logsFilePath = $"{HostContext.GetDiagDirectory()}/cloudinit-{jobStartTimeUtc.ToString("yyyyMMdd-HHmmss")}-logs.tar.gz"; - string resultLogs = await DumpCloudInitLogs(logsFilePath); + string resultLogs = await DumpCloudInitLogs(executionContext, logsFilePath); executionContext.Debug(resultLogs); if (File.Exists(logsFilePath)) @@ -336,7 +336,7 @@ private bool DumpAgentExtensionLogs(IExecutionContext executionContext, string s /// /// Path to collect cloud-init logs /// Returns the method execution logs - private async Task DumpCloudInitLogs(string logsFile) + private async Task DumpCloudInitLogs(IExecutionContext executionContext, string logsFile) { var builder = new StringBuilder(); string cloudInit = WhichUtil.Which("cloud-init", trace: Trace); @@ -349,28 +349,43 @@ private async Task DumpCloudInitLogs(string logsFile) try { - using (var processInvoker = HostContext.CreateService()) + using var processInvoker = HostContext.CreateService(); + processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs args) => { - processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs args) => - { - builder.AppendLine(args.Data); - }; + builder.AppendLine(args.Data); + }; + processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + { + builder.AppendLine(args.Data); + }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => { - builder.AppendLine(args.Data); - }; + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(20)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: cloudInit, + arguments: arguments, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (exception) => + { + if (exception is OperationCanceledException) + { + executionContext.Debug("Getting of cloud-init logs process failed by timeout. Retrying..."); + } - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: cloudInit, - arguments: arguments, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken)); - } + return true; + }); } catch (Exception ex) { @@ -486,7 +501,7 @@ private async Task GetEnvironmentContent(int agentId, string agentName, } return await GetEnvironmentContentNonWindows(agentId, agentName, steps); } - + [SupportedOSPlatform("windows")] private async Task GetEnvironmentContentWindows(int agentId, string agentName, IList steps) { From 7c3257ed0653b69f57df882934983050f602bd26 Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Mon, 6 May 2024 15:37:12 +0400 Subject: [PATCH 03/11] Pass exec context everywhere + retry GetPsVersionInfo --- src/Agent.Worker/DiagnosticLogManager.cs | 61 +++++++++++++++--------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/src/Agent.Worker/DiagnosticLogManager.cs b/src/Agent.Worker/DiagnosticLogManager.cs index 5a5e9d9088..a04d46b97a 100644 --- a/src/Agent.Worker/DiagnosticLogManager.cs +++ b/src/Agent.Worker/DiagnosticLogManager.cs @@ -72,7 +72,7 @@ public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, executionContext.Debug("Creating diagnostic log environment file."); string environmentFile = Path.Combine(supportFilesFolder, "environment.txt"); - string content = await GetEnvironmentContent(agentId, agentName, message.Steps); + string content = await GetEnvironmentContent(executionContext, agentId, agentName, message.Steps); File.WriteAllText(environmentFile, content); // Create the capabilities file @@ -493,17 +493,17 @@ private List GetAgentDiagLogFiles(string diagFolder, DateTime jobStartTi return agentLogFiles; } - private async Task GetEnvironmentContent(int agentId, string agentName, IList steps) + private async Task GetEnvironmentContent(IExecutionContext executionContext, int agentId, string agentName, IList steps) { if (PlatformUtil.RunningOnWindows) { - return await GetEnvironmentContentWindows(agentId, agentName, steps); + return await GetEnvironmentContentWindows(executionContext, agentId, agentName, steps); } - return await GetEnvironmentContentNonWindows(agentId, agentName, steps); + return await GetEnvironmentContentNonWindows(executionContext, agentId, agentName, steps); } [SupportedOSPlatform("windows")] - private async Task GetEnvironmentContentWindows(int agentId, string agentName, IList steps) + private async Task GetEnvironmentContentWindows(IExecutionContext executionContext, int agentId, string agentName, IList steps) { var builder = new StringBuilder(); @@ -527,9 +527,9 @@ private async Task GetEnvironmentContentWindows(int agentId, string agen // $psversiontable builder.AppendLine("Powershell Version Info:"); - builder.AppendLine(await GetPsVersionInfo()); + builder.AppendLine(await GetPsVersionInfo(executionContext)); - builder.AppendLine(await GetLocalGroupMembership()); + builder.AppendLine(await GetLocalGroupMembership(executionContext)); return builder.ToString(); } @@ -565,7 +565,7 @@ private bool IsFirewallEnabled() } [SupportedOSPlatform("windows")] - private async Task GetPsVersionInfo() + private async Task GetPsVersionInfo(IExecutionContext executionContext) { var builder = new StringBuilder(); @@ -577,21 +577,38 @@ private async Task GetPsVersionInfo() { builder.AppendLine(args.Data); }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => { builder.AppendLine(args.Data); }; - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: powerShellExe, - arguments: arguments, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken)); + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => + { + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(20)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: powerShellExe, + arguments: arguments, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (exception) => + { + if (exception is OperationCanceledException) + { + executionContext.Debug("Getting of powershell version info process failed by timeout. Retrying..."); + } + + return true; + }); } return builder.ToString(); @@ -601,7 +618,7 @@ await processInvoker.ExecuteAsync( /// Gathers a list of local group memberships for the current user. /// [SupportedOSPlatform("windows")] - private async Task GetLocalGroupMembership() + private async Task GetLocalGroupMembership(IExecutionContext executionContext) { var builder = new StringBuilder(); @@ -644,7 +661,7 @@ await processInvoker.ExecuteAsync( return builder.ToString(); } - private async Task GetEnvironmentContentNonWindows(int agentId, string agentName, IList steps) + private async Task GetEnvironmentContentNonWindows(IExecutionContext executionContext, int agentId, string agentName, IList steps) { var builder = new StringBuilder(); @@ -653,7 +670,7 @@ private async Task GetEnvironmentContentNonWindows(int agentId, string a builder.AppendLine($"Agent Id: {agentId}"); builder.AppendLine($"Agent Name: {agentName}"); builder.AppendLine($"OS: {System.Runtime.InteropServices.RuntimeInformation.OSDescription}"); - builder.AppendLine($"User groups: {await GetUserGroupsOnNonWindows()}"); + builder.AppendLine($"User groups: {await GetUserGroupsOnNonWindows(executionContext)}"); builder.AppendLine("Steps:"); foreach (Pipelines.TaskStep task in steps.OfType()) @@ -668,7 +685,7 @@ private async Task GetEnvironmentContentNonWindows(int agentId, string a /// Get user groups on a non-windows platform using core utility "id". /// /// Returns the string with user groups - private async Task GetUserGroupsOnNonWindows() + private async Task GetUserGroupsOnNonWindows(IExecutionContext executionContext) { var idUtil = WhichUtil.Which("id"); var stringBuilder = new StringBuilder(); From f0123f3415c2f9edf5a5cff4f5055f53da0f3022 Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Mon, 6 May 2024 15:43:10 +0400 Subject: [PATCH 04/11] Cover GetLocalGroupMembership by retry logic --- src/Agent.Worker/DiagnosticLogManager.cs | 49 ++++++++++++++++-------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/Agent.Worker/DiagnosticLogManager.cs b/src/Agent.Worker/DiagnosticLogManager.cs index a04d46b97a..9bd8613bf1 100644 --- a/src/Agent.Worker/DiagnosticLogManager.cs +++ b/src/Agent.Worker/DiagnosticLogManager.cs @@ -630,28 +630,43 @@ private async Task GetLocalGroupMembership(IExecutionContext executionCo try { - using (var processInvoker = HostContext.CreateService()) + using var processInvoker = HostContext.CreateService(); + processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs args) => { - processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + builder.AppendLine(args.Data); + }; + processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + { + builder.AppendLine(args.Data); + }; + + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => { - builder.AppendLine(args.Data); - }; + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(20)); - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: powerShellExe, + arguments: arguments, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (exception) => { - builder.AppendLine(args.Data); - }; + if (exception is OperationCanceledException) + { + executionContext.Debug("Getting of local group membership process failed by timeout. Retrying..."); + } - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: powerShellExe, - arguments: arguments, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken)); - } + return true; + }); } catch (Exception ex) { From 078cfe8d85d62d9d8cbbf6ce32d0c7a495522937 Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Mon, 6 May 2024 16:02:35 +0400 Subject: [PATCH 05/11] Cover rest of executions in service --- src/Agent.Worker/DiagnosticLogManager.cs | 139 +++++++++++++++-------- 1 file changed, 94 insertions(+), 45 deletions(-) diff --git a/src/Agent.Worker/DiagnosticLogManager.cs b/src/Agent.Worker/DiagnosticLogManager.cs index 9bd8613bf1..24cba02047 100644 --- a/src/Agent.Worker/DiagnosticLogManager.cs +++ b/src/Agent.Worker/DiagnosticLogManager.cs @@ -197,7 +197,7 @@ public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, { try { - string packageVerificationResults = await GetPackageVerificationResult(debsums); + string packageVerificationResults = await GetPackageVerificationResult(executionContext, debsums); IEnumerable brokenPackagesInfo = packageVerificationResults .Split("\n") .Where((line) => !String.IsNullOrEmpty(line) && !line.EndsWith("OK")); @@ -381,7 +381,7 @@ await retryHelper.Retry( { if (exception is OperationCanceledException) { - executionContext.Debug("Getting of cloud-init logs process failed by timeout. Retrying..."); + executionContext.Debug("DumpCloudInitLogs process failed by timeout. Retrying..."); } return true; @@ -604,7 +604,7 @@ await retryHelper.Retry( { if (exception is OperationCanceledException) { - executionContext.Debug("Getting of powershell version info process failed by timeout. Retrying..."); + executionContext.Debug("GetPsVersionInfo process failed by timeout. Retrying..."); } return true; @@ -662,7 +662,7 @@ await retryHelper.Retry( { if (exception is OperationCanceledException) { - executionContext.Debug("Getting of local group membership process failed by timeout. Retrying..."); + executionContext.Debug("GetLocalGroupMembership process failed by timeout. Retrying..."); } return true; @@ -717,16 +717,33 @@ private async Task GetUserGroupsOnNonWindows(IExecutionContext execution stringBuilder.AppendLine(mes.Data); }; - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: idUtil, - arguments: "-nG", - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken) - ); + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => + { + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(20)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: idUtil, + arguments: "-nG", + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (exception) => + { + if (exception is OperationCanceledException) + { + executionContext.Debug("GetUserGroupsOnNonWindows process failed by timeout. Retrying..."); + } + + return true; + }); } } catch (Exception ex) @@ -750,49 +767,81 @@ private async Task DumpCurrentJobEventLogs(IExecutionContext executionContext, s Get-WinEvent -ListLog * | where {{ $_.RecordCount -gt 0 }} ` | ForEach-Object {{ Get-WinEvent -ErrorAction SilentlyContinue -FilterHashtable @{{ LogName=$_.LogName; StartTime='{startDate}'; EndTime='{endDate}'; }} }} ` | Export-CSV {logFile}"; - using (var processInvoker = HostContext.CreateService()) - { - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: powerShellExe, - arguments: arguments, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken)); - } + using var processInvoker = HostContext.CreateService(); + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => + { + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(20)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: powerShellExe, + arguments: arguments, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (exception) => + { + if (exception is OperationCanceledException) + { + executionContext.Debug("DumpCurrentJobEventLogs process failed by timeout. Retrying..."); + } + + return true; + }); } /// /// Git package verification result using the "debsums" utility. /// /// String with the "debsums" output - private async Task GetPackageVerificationResult(string debsumsPath) + private async Task GetPackageVerificationResult(IExecutionContext executionContext, string debsumsPath) { var stringBuilder = new StringBuilder(); - using (var processInvoker = HostContext.CreateService()) + using var processInvoker = HostContext.CreateService(); + processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + { + stringBuilder.AppendLine(mes.Data); + }; + processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => { - processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + stringBuilder.AppendLine(mes.Data); + }; + + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => { - stringBuilder.AppendLine(mes.Data); - }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(20)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: debsumsPath, + arguments: string.Empty, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (exception) => { - stringBuilder.AppendLine(mes.Data); - }; + if (exception is OperationCanceledException) + { + executionContext.Debug("GetPackageVerificationResult process failed by timeout. Retrying..."); + } + + return true; + }); - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: debsumsPath, - arguments: string.Empty, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken) - ); - } return stringBuilder.ToString(); } From d6c955fd8f2d421eae13467610dcefd49d0424a7 Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Mon, 6 May 2024 20:16:09 +0400 Subject: [PATCH 06/11] Move resource manager init under debug condition --- src/Agent.Worker/JobRunner.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Agent.Worker/JobRunner.cs b/src/Agent.Worker/JobRunner.cs index c24e441dc2..1c3aa89431 100644 --- a/src/Agent.Worker/JobRunner.cs +++ b/src/Agent.Worker/JobRunner.cs @@ -111,12 +111,12 @@ public async Task RunAsync(Pipelines.AgentJobRequestMessage message, jobContext.Start(); jobContext.Section(StringUtil.Loc("StepStarting", message.JobDisplayName)); - //Start Resource Diagnostics if enabled in the job message + //Start Resource Diagnostics if enabled in the job message jobContext.Variables.TryGetValue("system.debug", out var systemDebug); - resourceDiagnosticManager = HostContext.GetService(); if (string.Equals(systemDebug, "true", StringComparison.OrdinalIgnoreCase)) { + resourceDiagnosticManager = HostContext.GetService(); resourceDiagnosticManager.Setup(jobContext); _ = resourceDiagnosticManager.RunDebugResourceMonitor(); } From 71af91f0cd74481ade6a794cf73e23fbfa72689e Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Mon, 6 May 2024 20:44:46 +0400 Subject: [PATCH 07/11] Increase retry delay + update timeouts --- src/Agent.Worker/DiagnosticLogManager.cs | 83 ++++++++++++------------ 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/src/Agent.Worker/DiagnosticLogManager.cs b/src/Agent.Worker/DiagnosticLogManager.cs index 24cba02047..e519ef5f57 100644 --- a/src/Agent.Worker/DiagnosticLogManager.cs +++ b/src/Agent.Worker/DiagnosticLogManager.cs @@ -39,6 +39,8 @@ Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, // support.zip public sealed class DiagnosticLogManager : AgentService, IDiagnosticLogManager { + private const int FiveSecondsInMs = 5000; + public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, Pipelines.AgentJobRequestMessage message, DateTime jobStartTimeUtc) @@ -376,7 +378,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, (exception) => { if (exception is OperationCanceledException) @@ -587,7 +589,7 @@ await retryHelper.Retry( async () => { using var cts = new CancellationTokenSource(); - cts.CancelAfter(TimeSpan.FromSeconds(20)); + cts.CancelAfter(TimeSpan.FromSeconds(10)); return await processInvoker.ExecuteAsync( workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), @@ -599,7 +601,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, (exception) => { if (exception is OperationCanceledException) @@ -645,7 +647,7 @@ await retryHelper.Retry( async () => { using var cts = new CancellationTokenSource(); - cts.CancelAfter(TimeSpan.FromSeconds(20)); + cts.CancelAfter(TimeSpan.FromSeconds(45)); return await processInvoker.ExecuteAsync( workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), @@ -657,7 +659,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, (exception) => { if (exception is OperationCanceledException) @@ -706,45 +708,43 @@ private async Task GetUserGroupsOnNonWindows(IExecutionContext execution var stringBuilder = new StringBuilder(); try { - using (var processInvoker = HostContext.CreateService()) + using var processInvoker = HostContext.CreateService(); + processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => { - processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => - { - stringBuilder.AppendLine(mes.Data); - }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + stringBuilder.AppendLine(mes.Data); + }; + processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + { + stringBuilder.AppendLine(mes.Data); + }; + + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => { - stringBuilder.AppendLine(mes.Data); - }; + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(45)); - var retryHelper = new RetryHelper(executionContext, maxRetries: 3); - await retryHelper.Retry( - async () => - { - using var cts = new CancellationTokenSource(); - cts.CancelAfter(TimeSpan.FromSeconds(20)); - - return await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: idUtil, - arguments: "-nG", - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: cts.Token); - }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), - (exception) => + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: idUtil, + arguments: "-nG", + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, + (exception) => + { + if (exception is OperationCanceledException) { - if (exception is OperationCanceledException) - { - executionContext.Debug("GetUserGroupsOnNonWindows process failed by timeout. Retrying..."); - } + executionContext.Debug("GetUserGroupsOnNonWindows process failed by timeout. Retrying..."); + } - return true; - }); - } + return true; + }); } catch (Exception ex) { @@ -785,7 +785,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, (exception) => { if (exception is OperationCanceledException) @@ -831,7 +831,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter), + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, (exception) => { if (exception is OperationCanceledException) @@ -842,7 +842,6 @@ await retryHelper.Retry( return true; }); - return stringBuilder.ToString(); } } From f790289c9cd0bbe286fd42943afac26e79f9ee1c Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Mon, 6 May 2024 21:43:59 +0400 Subject: [PATCH 08/11] Increase timeout for GetPackageVerificationResult --- src/Agent.Worker/DiagnosticLogManager.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Agent.Worker/DiagnosticLogManager.cs b/src/Agent.Worker/DiagnosticLogManager.cs index e519ef5f57..16407ffcc6 100644 --- a/src/Agent.Worker/DiagnosticLogManager.cs +++ b/src/Agent.Worker/DiagnosticLogManager.cs @@ -773,7 +773,7 @@ await retryHelper.Retry( async () => { using var cts = new CancellationTokenSource(); - cts.CancelAfter(TimeSpan.FromSeconds(20)); + cts.CancelAfter(TimeSpan.FromSeconds(45)); return await processInvoker.ExecuteAsync( workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), From aa7c31f073c8d0da0a253f79e15b2396e37b3d16 Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Tue, 14 May 2024 18:05:42 +0400 Subject: [PATCH 09/11] Prettify debug check --- src/Agent.Worker/JobRunner.cs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Agent.Worker/JobRunner.cs b/src/Agent.Worker/JobRunner.cs index 1c3aa89431..9a8b8b9c0d 100644 --- a/src/Agent.Worker/JobRunner.cs +++ b/src/Agent.Worker/JobRunner.cs @@ -112,9 +112,7 @@ public async Task RunAsync(Pipelines.AgentJobRequestMessage message, jobContext.Section(StringUtil.Loc("StepStarting", message.JobDisplayName)); //Start Resource Diagnostics if enabled in the job message - jobContext.Variables.TryGetValue("system.debug", out var systemDebug); - - if (string.Equals(systemDebug, "true", StringComparison.OrdinalIgnoreCase)) + if (jobContext.WriteDebug) { resourceDiagnosticManager = HostContext.GetService(); resourceDiagnosticManager.Setup(jobContext); From ee290525d10dd45b7662ac572e335da74e514053 Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Tue, 14 May 2024 18:06:27 +0400 Subject: [PATCH 10/11] remove extra using --- src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs b/src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs index 7a1a56cd77..3fb9a0050f 100644 --- a/src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs +++ b/src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Text; namespace Microsoft.VisualStudio.Services.Agent.Util { From b34203a8c4910f4a0199a3b8bad06536182ceec6 Mon Sep 17 00:00:00 2001 From: Konstantin Tyukalov Date: Tue, 14 May 2024 18:11:29 +0400 Subject: [PATCH 11/11] Increase default timeout --- src/Agent.Worker/DiagnosticLogManager.cs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Agent.Worker/DiagnosticLogManager.cs b/src/Agent.Worker/DiagnosticLogManager.cs index 16407ffcc6..feb88bdafe 100644 --- a/src/Agent.Worker/DiagnosticLogManager.cs +++ b/src/Agent.Worker/DiagnosticLogManager.cs @@ -39,7 +39,7 @@ Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, // support.zip public sealed class DiagnosticLogManager : AgentService, IDiagnosticLogManager { - private const int FiveSecondsInMs = 5000; + private const int DefaultTimeoutIncreaseInMilliseconds = 10000; public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, Pipelines.AgentJobRequestMessage message, @@ -378,7 +378,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, (exception) => { if (exception is OperationCanceledException) @@ -601,7 +601,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, (exception) => { if (exception is OperationCanceledException) @@ -659,7 +659,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, (exception) => { if (exception is OperationCanceledException) @@ -735,7 +735,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, (exception) => { if (exception is OperationCanceledException) @@ -785,7 +785,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, (exception) => { if (exception is OperationCanceledException) @@ -831,7 +831,7 @@ await retryHelper.Retry( killProcessOnCancel: false, cancellationToken: cts.Token); }, - (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + FiveSecondsInMs, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, (exception) => { if (exception is OperationCanceledException)