Fail the job if no connection in 15 minutes

This commit is contained in:
Francesco Renzi
2026-03-16 11:48:46 +00:00
committed by GitHub
parent b36d9a6c92
commit 437e20db7d
3 changed files with 299 additions and 31 deletions

View File

@@ -13,7 +13,9 @@ namespace GitHub.Runner.Worker.Dap
public sealed class DapDebugger : RunnerService, IDapDebugger public sealed class DapDebugger : RunnerService, IDapDebugger
{ {
private const int DefaultPort = 4711; private const int DefaultPort = 4711;
private const int DefaultTimeoutMinutes = 15;
private const string PortEnvironmentVariable = "ACTIONS_RUNNER_DAP_PORT"; private const string PortEnvironmentVariable = "ACTIONS_RUNNER_DAP_PORT";
private const string TimeoutEnvironmentVariable = "ACTIONS_RUNNER_DAP_CONNECTION_TIMEOUT";
private IDapServer _server; private IDapServer _server;
private IDapDebugSession _session; private IDapDebugSession _session;
@@ -51,12 +53,23 @@ namespace GitHub.Runner.Worker.Dap
return; return;
} }
Trace.Info("Waiting for debugger client connection..."); var timeoutMinutes = ResolveTimeout();
await _server.WaitForConnectionAsync(cancellationToken); using var timeoutCts = new CancellationTokenSource(TimeSpan.FromMinutes(timeoutMinutes));
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token);
try
{
Trace.Info($"Waiting for debugger client connection (timeout: {timeoutMinutes} minutes)...");
await _server.WaitForConnectionAsync(linkedCts.Token);
Trace.Info("Debugger client connected."); Trace.Info("Debugger client connected.");
await _session.WaitForHandshakeAsync(cancellationToken); await _session.WaitForHandshakeAsync(linkedCts.Token);
Trace.Info("DAP handshake complete."); Trace.Info("DAP handshake complete.");
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
throw new TimeoutException($"No debugger client connected within {timeoutMinutes} minutes.");
}
_cancellationRegistration = cancellationToken.Register(() => _cancellationRegistration = cancellationToken.Register(() =>
{ {
@@ -149,7 +162,7 @@ namespace GitHub.Runner.Worker.Dap
private int ResolvePort() private int ResolvePort()
{ {
var portEnv = Environment.GetEnvironmentVariable(PortEnvironmentVariable); var portEnv = Environment.GetEnvironmentVariable(PortEnvironmentVariable);
if (!string.IsNullOrEmpty(portEnv) && int.TryParse(portEnv, out var customPort) && customPort > 1024 && customPort <= 65535) if (!string.IsNullOrEmpty(portEnv) && int.TryParse(portEnv, out var customPort) && customPort > 0 && customPort <= 65535)
{ {
Trace.Info($"Using custom DAP port {customPort} from {PortEnvironmentVariable}"); Trace.Info($"Using custom DAP port {customPort} from {PortEnvironmentVariable}");
return customPort; return customPort;
@@ -157,5 +170,17 @@ namespace GitHub.Runner.Worker.Dap
return DefaultPort; return DefaultPort;
} }
private int ResolveTimeout()
{
var timeoutEnv = Environment.GetEnvironmentVariable(TimeoutEnvironmentVariable);
if (!string.IsNullOrEmpty(timeoutEnv) && int.TryParse(timeoutEnv, out var customTimeout) && customTimeout > 0)
{
Trace.Info($"Using custom DAP timeout {customTimeout} minutes from {TimeoutEnvironmentVariable}");
return customTimeout;
}
return DefaultTimeoutMinutes;
}
} }
} }

View File

@@ -192,15 +192,10 @@ namespace GitHub.Runner.Worker
} }
catch (Exception ex) catch (Exception ex)
{ {
Trace.Warning($"Failed to start DAP debugger: {ex.Message}. Job will continue without debugging."); Trace.Error($"Failed to start DAP debugger: {ex.Message}");
if (dapDebugger != null)
// cleanup if debugger failed to start {
try { try { await dapDebugger.StopAsync(); } catch { }
await dapDebugger.StopAsync();
}
catch {
Trace.Error("Failed to stop debugger server")
Trace.Error(ex);
} }
dapDebugger = null; dapDebugger = null;
} }
@@ -256,19 +251,29 @@ namespace GitHub.Runner.Worker
{ {
await dapDebugger.WaitUntilReadyAsync(jobRequestCancellationToken); await dapDebugger.WaitUntilReadyAsync(jobRequestCancellationToken);
} }
catch (OperationCanceledException) when (jobRequestCancellationToken.IsCancellationRequested)
{
Trace.Info("Job was cancelled before debugger client connected.");
try { await dapDebugger.StopAsync(); } catch { }
dapDebugger = null;
return await CompleteJobAsync(server, jobContext, message, TaskResult.Canceled);
}
catch (Exception ex) catch (Exception ex)
{ {
try { Trace.Error($"DAP debugger failed to become ready: {ex.Message}");
await dapDebugger.StopAsync(); try { await dapDebugger.StopAsync(); } catch { }
}
catch {
Trace.Error("Failed to stop debugger server")
Trace.Error(ex);
}
dapDebugger = null; dapDebugger = null;
} }
} }
// If debugging was requested but the debugger is not available, fail the job
if (jobContext.Global.EnableDebugger && dapDebugger == null)
{
var errorMessage = "The debugger failed to start or no debugger client connected in time.";
jobContext.Error(errorMessage);
return await CompleteJobAsync(server, jobContext, message, TaskResult.Failed);
}
// Run all job steps // Run all job steps
Trace.Info("Run all job steps."); Trace.Info("Run all job steps.");
var stepsRunner = HostContext.GetService<IStepsRunner>(); var stepsRunner = HostContext.GetService<IStepsRunner>();
@@ -311,14 +316,8 @@ namespace GitHub.Runner.Worker
if (dapDebugger != null) if (dapDebugger != null)
{ {
try {
await dapDebugger.StopAsync(); await dapDebugger.StopAsync();
} }
catch {
Trace.Error("Failed to stop debugger server")
Trace.Error(ex);
}
}
await ShutdownQueue(throwOnFailure: false); await ShutdownQueue(throwOnFailure: false);
} }

View File

@@ -196,8 +196,8 @@ namespace GitHub.Runner.Common.Tests.Worker
await _debugger.StartAsync(cts.Token); await _debugger.StartAsync(cts.Token);
await _debugger.WaitUntilReadyAsync(cts.Token); await _debugger.WaitUntilReadyAsync(cts.Token);
mockServer.Verify(x => x.WaitForConnectionAsync(cts.Token), Times.Once); mockServer.Verify(x => x.WaitForConnectionAsync(It.IsAny<CancellationToken>()), Times.Once);
mockSession.Verify(x => x.WaitForHandshakeAsync(cts.Token), Times.Once); mockSession.Verify(x => x.WaitForHandshakeAsync(It.IsAny<CancellationToken>()), Times.Once);
await _debugger.StopAsync(); await _debugger.StopAsync();
} }
@@ -438,5 +438,249 @@ namespace GitHub.Runner.Common.Tests.Worker
await _debugger.WaitUntilReadyAsync(CancellationToken.None); await _debugger.WaitUntilReadyAsync(CancellationToken.None);
} }
} }
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async Task WaitUntilReadyPassesLinkedTokenNotOriginal()
{
using (var hc = CreateTestContext())
{
CancellationToken capturedToken = default;
var mockServer = new Mock<IDapServer>();
mockServer.Setup(x => x.StartAsync(It.IsAny<int>(), It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.WaitForConnectionAsync(It.IsAny<CancellationToken>()))
.Callback<CancellationToken>(ct => capturedToken = ct)
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.StopAsync())
.Returns(Task.CompletedTask);
var mockSession = new Mock<IDapDebugSession>();
mockSession.Setup(x => x.WaitForHandshakeAsync(It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
hc.SetSingleton(mockServer.Object);
hc.SetSingleton(mockSession.Object);
var cts = new CancellationTokenSource();
await _debugger.StartAsync(cts.Token);
await _debugger.WaitUntilReadyAsync(cts.Token);
// The token passed to WaitForConnectionAsync should be a linked token
// (combines job cancellation + internal timeout), not the raw job token
Assert.NotEqual(cts.Token, capturedToken);
await _debugger.StopAsync();
}
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async Task WaitUntilReadyTimeoutSurfacesAsTimeoutException()
{
using (var hc = CreateTestContext())
{
// Mock WaitForConnectionAsync to block until its cancellation token fires,
// then throw OperationCanceledException — simulating "no client connected"
var mockServer = new Mock<IDapServer>();
mockServer.Setup(x => x.StartAsync(It.IsAny<int>(), It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.WaitForConnectionAsync(It.IsAny<CancellationToken>()))
.Returns<CancellationToken>(async ct =>
{
var tcs = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
ct.Register(() => tcs.TrySetCanceled(ct));
await tcs.Task;
});
mockServer.Setup(x => x.StopAsync())
.Returns(Task.CompletedTask);
var mockSession = new Mock<IDapDebugSession>();
hc.SetSingleton(mockServer.Object);
hc.SetSingleton(mockSession.Object);
var jobCts = new CancellationTokenSource();
await _debugger.StartAsync(jobCts.Token);
// Start wait in background
var waitTask = _debugger.WaitUntilReadyAsync(jobCts.Token);
await Task.Delay(50);
Assert.False(waitTask.IsCompleted);
// The linked token includes the internal timeout CTS.
// We can't easily make it fire fast (it uses minutes), but we can
// verify the contract: cancelling the job token produces OCE, not TimeoutException.
jobCts.Cancel();
var ex = await Assert.ThrowsAnyAsync<OperationCanceledException>(() => waitTask);
Assert.IsNotType<TimeoutException>(ex);
await _debugger.StopAsync();
}
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async Task WaitUntilReadyUsesCustomTimeoutFromEnvironment()
{
using (var hc = CreateTestContext())
{
var mockServer = new Mock<IDapServer>();
mockServer.Setup(x => x.StartAsync(It.IsAny<int>(), It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.WaitForConnectionAsync(It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.StopAsync())
.Returns(Task.CompletedTask);
var mockSession = new Mock<IDapDebugSession>();
mockSession.Setup(x => x.WaitForHandshakeAsync(It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
hc.SetSingleton(mockServer.Object);
hc.SetSingleton(mockSession.Object);
Environment.SetEnvironmentVariable("ACTIONS_RUNNER_DAP_CONNECTION_TIMEOUT", "30");
try
{
var cts = new CancellationTokenSource();
await _debugger.StartAsync(cts.Token);
// The timeout is applied internally — we can verify it worked
// by checking the trace output contains the custom value
await _debugger.WaitUntilReadyAsync(cts.Token);
// If we got here without exception, the custom timeout was accepted
// (it didn't default to something that would fail)
await _debugger.StopAsync();
}
finally
{
Environment.SetEnvironmentVariable("ACTIONS_RUNNER_DAP_CONNECTION_TIMEOUT", null);
}
}
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async Task WaitUntilReadyIgnoresInvalidTimeoutFromEnvironment()
{
using (var hc = CreateTestContext())
{
var mockServer = new Mock<IDapServer>();
mockServer.Setup(x => x.StartAsync(It.IsAny<int>(), It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.WaitForConnectionAsync(It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.StopAsync())
.Returns(Task.CompletedTask);
var mockSession = new Mock<IDapDebugSession>();
mockSession.Setup(x => x.WaitForHandshakeAsync(It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
hc.SetSingleton(mockServer.Object);
hc.SetSingleton(mockSession.Object);
Environment.SetEnvironmentVariable("ACTIONS_RUNNER_DAP_CONNECTION_TIMEOUT", "not-a-number");
try
{
var cts = new CancellationTokenSource();
await _debugger.StartAsync(cts.Token);
await _debugger.WaitUntilReadyAsync(cts.Token);
// Should succeed with default timeout (no crash from bad env var)
await _debugger.StopAsync();
}
finally
{
Environment.SetEnvironmentVariable("ACTIONS_RUNNER_DAP_CONNECTION_TIMEOUT", null);
}
}
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async Task WaitUntilReadyIgnoresZeroTimeoutFromEnvironment()
{
using (var hc = CreateTestContext())
{
var mockServer = new Mock<IDapServer>();
mockServer.Setup(x => x.StartAsync(It.IsAny<int>(), It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.WaitForConnectionAsync(It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.StopAsync())
.Returns(Task.CompletedTask);
var mockSession = new Mock<IDapDebugSession>();
mockSession.Setup(x => x.WaitForHandshakeAsync(It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
hc.SetSingleton(mockServer.Object);
hc.SetSingleton(mockSession.Object);
Environment.SetEnvironmentVariable("ACTIONS_RUNNER_DAP_CONNECTION_TIMEOUT", "0");
try
{
var cts = new CancellationTokenSource();
await _debugger.StartAsync(cts.Token);
await _debugger.WaitUntilReadyAsync(cts.Token);
// Zero is not > 0, so falls back to default (should succeed, not throw)
await _debugger.StopAsync();
}
finally
{
Environment.SetEnvironmentVariable("ACTIONS_RUNNER_DAP_CONNECTION_TIMEOUT", null);
}
}
}
[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async Task WaitUntilReadyJobCancellationPropagatesAsOperationCancelledException()
{
using (var hc = CreateTestContext())
{
var mockServer = new Mock<IDapServer>();
mockServer.Setup(x => x.StartAsync(It.IsAny<int>(), It.IsAny<CancellationToken>()))
.Returns(Task.CompletedTask);
mockServer.Setup(x => x.WaitForConnectionAsync(It.IsAny<CancellationToken>()))
.Returns<CancellationToken>(ct =>
{
var tcs = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
ct.Register(() => tcs.TrySetCanceled(ct));
return tcs.Task;
});
mockServer.Setup(x => x.StopAsync())
.Returns(Task.CompletedTask);
var mockSession = new Mock<IDapDebugSession>();
hc.SetSingleton(mockServer.Object);
hc.SetSingleton(mockSession.Object);
var cts = new CancellationTokenSource();
await _debugger.StartAsync(cts.Token);
var waitTask = _debugger.WaitUntilReadyAsync(cts.Token);
await Task.Delay(50);
// Cancel the job token — should surface as OperationCanceledException, NOT TimeoutException
cts.Cancel();
var ex = await Assert.ThrowsAnyAsync<OperationCanceledException>(() => waitTask);
Assert.IsNotType<TimeoutException>(ex);
await _debugger.StopAsync();
}
}
} }
} }