mirror of
https://github.com/wwiinnddyy/LanMountainDesktop.git
synced 2026-06-20 23:54:26 +08:00
feat: implement launcher orchestrator and startup monitoring infrastructure for host lifecycle management
This commit is contained in:
@@ -208,13 +208,15 @@ internal sealed class HostLaunchService
|
||||
|
||||
private static async Task EnsureAirAppRuntimeStartedAsync(string appRoot, string? dataRoot)
|
||||
{
|
||||
Logger.Info("HOST LAUNCH: Attempting to pre-start AirApp Runtime...");
|
||||
try
|
||||
{
|
||||
await new AirAppRuntimeBridge(appRoot, dataRoot).EnsureStartedAsync().ConfigureAwait(false);
|
||||
Logger.Info("HOST LAUNCH: AirApp Runtime pre-start completed.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"AirApp Runtime pre-start failed; Host fallback remains available. Error='{ex.Message}'.");
|
||||
Logger.Warn($"HOST LAUNCH: AirApp Runtime pre-start failed; Host fallback remains available. Error='{ex.Message}'");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,6 +251,11 @@ internal sealed class HostLaunchService
|
||||
|
||||
try
|
||||
{
|
||||
Logger.Info($"ATTEMPTING HOST START: Path='{plan.HostPath}'; WorkingDir='{plan.WorkingDirectory}'; Mode='{startMode}'");
|
||||
Logger.Info($" Arguments: {HostLaunchPlanBuilder.FormatArgumentsForLog(plan.Arguments)}");
|
||||
Logger.Info($" File exists: {File.Exists(plan.HostPath)}");
|
||||
Logger.Info($" Working dir exists: {Directory.Exists(plan.WorkingDirectory)}");
|
||||
|
||||
var process = Process.Start(startInfo);
|
||||
Logger.Info(
|
||||
$"Host launch requested. Mode='{startMode}'; RetryTag='{retryTag ?? "<none>"}'; Path='{plan.HostPath}'; " +
|
||||
@@ -257,15 +264,30 @@ internal sealed class HostLaunchService
|
||||
|
||||
if (process is null)
|
||||
{
|
||||
Logger.Error($"CRITICAL: Process.Start returned null! Path='{plan.HostPath}'; Mode='{startMode}'");
|
||||
Console.Error.WriteLine($"[CRITICAL] Process.Start returned null for path: {plan.HostPath}");
|
||||
return HostStartAttempt.StartFailed(startMode, "process_start_returned_null", plan);
|
||||
}
|
||||
|
||||
await Task.Yield();
|
||||
// 等待一小段时间,检查进程是否立即退出
|
||||
await Task.Delay(500).ConfigureAwait(false);
|
||||
|
||||
if (process.HasExited)
|
||||
{
|
||||
Logger.Error($"CRITICAL: Host process exited immediately! ExitCode={process.ExitCode}; Path='{plan.HostPath}'");
|
||||
Console.Error.WriteLine($"[CRITICAL] Host process exited immediately with code {process.ExitCode}");
|
||||
return HostStartAttempt.StartFailed(startMode, $"process_exited_immediately_code_{process.ExitCode}", plan);
|
||||
}
|
||||
|
||||
Logger.Info($"Host process started successfully and is running. PID={process.Id}");
|
||||
return HostStartAttempt.Started(startMode, process, plan);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error($"Host start failed. Mode='{startMode}'.", ex);
|
||||
Logger.Error($"CRITICAL: Host start exception! Path='{plan.HostPath}'; Mode='{startMode}'; Exception={ex.GetType().Name}; Message='{ex.Message}'", ex);
|
||||
Console.Error.WriteLine($"[CRITICAL] Host start failed: {ex.Message}");
|
||||
Console.Error.WriteLine($"[CRITICAL] Path: {plan.HostPath}");
|
||||
Console.Error.WriteLine($"[CRITICAL] Exception: {ex}");
|
||||
return HostStartAttempt.StartFailed(startMode, ex.GetType().Name, plan);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ internal sealed class HostStartupMonitor
|
||||
]).ConfigureAwait(false);
|
||||
if (!connected)
|
||||
{
|
||||
Logger.Info("Host public IPC is not ready yet. Launcher will keep monitoring the host process and retry.");
|
||||
Logger.Info("Host public IPC is not ready yet after initial connection attempts. Launcher will keep monitoring the host process and retry periodically.");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -106,6 +106,8 @@ internal sealed class HostStartupMonitor
|
||||
var nextShellStatusPollAt = DateTimeOffset.UtcNow + StartupTimeoutPolicy.ShellStatusPollInterval;
|
||||
var ipcReconnectAttemptIndex = 0;
|
||||
var activationRetryAttempted = false;
|
||||
var lastIpcConnectionFailureReported = DateTimeOffset.MinValue;
|
||||
var ipcConnectionFailureCount = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
@@ -224,6 +226,7 @@ internal sealed class HostStartupMonitor
|
||||
if (connected)
|
||||
{
|
||||
ipcConnected = true;
|
||||
Logger.Info($"Host public IPC reconnected successfully after {ipcConnectionFailureCount} failed attempts.");
|
||||
var shellSuccess = await RefreshShellStatusAsync("Host public IPC reconnected; waiting for desktop shell.")
|
||||
.ConfigureAwait(false);
|
||||
if (shellSuccess is not null)
|
||||
@@ -232,6 +235,18 @@ internal sealed class HostStartupMonitor
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ipcConnectionFailureCount++;
|
||||
// 每 30 秒报告一次 IPC 连接失败
|
||||
if ((now - lastIpcConnectionFailureReported).TotalSeconds >= 30)
|
||||
{
|
||||
lastIpcConnectionFailureReported = now;
|
||||
var elapsed = (now - startedAt).TotalSeconds;
|
||||
Logger.Warn($"Host public IPC connection still unavailable after {elapsed:0}s and {ipcConnectionFailureCount} reconnect attempts. Host process is alive (PID={request.HostProcess.Id}).");
|
||||
request.Reporter.Report("diagnostic", $"正在等待主应用响应... (已尝试 {ipcConnectionFailureCount} 次)");
|
||||
}
|
||||
}
|
||||
|
||||
nextReconnectAttemptAt = DateTimeOffset.UtcNow + StartupTimeoutPolicy.IpcReconnectInterval;
|
||||
}
|
||||
@@ -263,6 +278,16 @@ internal sealed class HostStartupMonitor
|
||||
nextCheckpointAt = softTimeoutAt;
|
||||
}
|
||||
|
||||
if (!ipcConnected && nextReconnectAttemptAt < nextCheckpointAt)
|
||||
{
|
||||
nextCheckpointAt = nextReconnectAttemptAt;
|
||||
}
|
||||
|
||||
if (ipcConnected && nextShellStatusPollAt < nextCheckpointAt)
|
||||
{
|
||||
nextCheckpointAt = nextShellStatusPollAt;
|
||||
}
|
||||
|
||||
var delay = nextCheckpointAt - now;
|
||||
if (delay > TimeSpan.FromSeconds(1))
|
||||
{
|
||||
@@ -351,11 +376,11 @@ internal sealed class HostStartupMonitor
|
||||
if (!connected && !request.HostProcess.HasExited)
|
||||
{
|
||||
request.AttemptRegistry.MarkOwnedWaitingForShell("Host process is still running, but public IPC is not ready yet.");
|
||||
request.PublishCoordinatorStatus(true, false, true);
|
||||
request.PublishCoordinatorStatus(true, true, false);
|
||||
return new Outcome(
|
||||
true,
|
||||
"startup_pending",
|
||||
"Host process is still running; Launcher will not start another process while public IPC finishes startup.",
|
||||
false,
|
||||
"ipc_connection_failed",
|
||||
$"Host process is still running after {StartupTimeoutPolicy.HardTimeout.TotalSeconds:0} seconds, but public IPC connection could not be established. This may indicate the host is stuck during initialization.",
|
||||
recoveryActivationAttempted,
|
||||
request.ComposeLaunchDetails(true, recoveryActivationAttempted));
|
||||
}
|
||||
|
||||
@@ -89,6 +89,14 @@ internal sealed class StartupAttemptRegistry
|
||||
ExecuteWithLock(() =>
|
||||
{
|
||||
var existing = LoadUnsafe();
|
||||
|
||||
// 清理过期的记录
|
||||
if (existing is not null && IsStaleAttempt(existing))
|
||||
{
|
||||
Logger.Info($"Cleaning up stale startup attempt record. AttemptId='{existing.AttemptId}'; State='{existing.State}'; Age={(DateTimeOffset.UtcNow - existing.UpdatedAtUtc).TotalMinutes:0.1}min.");
|
||||
existing = null;
|
||||
}
|
||||
|
||||
if (existing is not null && IsCoordinatorLive(existing))
|
||||
{
|
||||
active = Clone(existing);
|
||||
@@ -145,6 +153,34 @@ internal sealed class StartupAttemptRegistry
|
||||
return reserved is not null;
|
||||
}
|
||||
|
||||
private static bool IsStaleAttempt(StartupAttemptRecord record)
|
||||
{
|
||||
// 记录超过 10 分钟且状态为终结或非活跃状态
|
||||
if (DateTimeOffset.UtcNow - record.UpdatedAtUtc > TimeSpan.FromMinutes(10))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// 进程已死且协调器心跳超时
|
||||
if (record.CoordinatorPid > 0 &&
|
||||
!TryGetLiveProcess(record.CoordinatorPid, out _) &&
|
||||
DateTimeOffset.UtcNow - record.HeartbeatAtUtc > TimeSpan.FromMinutes(2))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// 主进程已死且协调器已死
|
||||
if (record.HostPid > 0 &&
|
||||
!TryGetLiveProcess(record.HostPid, out _) &&
|
||||
record.CoordinatorPid > 0 &&
|
||||
!TryGetLiveProcess(record.CoordinatorPid, out _))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public StartupAttemptRecord? GetOwnedAttempt()
|
||||
{
|
||||
StartupAttemptRecord? result = null;
|
||||
|
||||
@@ -2,22 +2,26 @@ namespace LanMountainDesktop.Launcher.Startup;
|
||||
|
||||
internal static class StartupTimeoutPolicy
|
||||
{
|
||||
public static readonly TimeSpan SoftTimeout = TimeSpan.FromSeconds(30);
|
||||
public static readonly TimeSpan HardTimeout = TimeSpan.FromSeconds(120);
|
||||
public static readonly TimeSpan SoftTimeout = TimeSpan.FromSeconds(45);
|
||||
public static readonly TimeSpan HardTimeout = TimeSpan.FromSeconds(180);
|
||||
|
||||
/// <summary>Initial Public IPC connect attempt (AOT cold start may be slower).</summary>
|
||||
public static readonly TimeSpan InitialIpcConnectTimeout = TimeSpan.FromMilliseconds(1200);
|
||||
/// <summary>Initial Public IPC connect attempt (AOT cold start is significantly slower).</summary>
|
||||
public static readonly TimeSpan InitialIpcConnectTimeout = TimeSpan.FromMilliseconds(3000);
|
||||
|
||||
/// <summary>Subsequent reconnect attempts use increasing per-try timeouts.</summary>
|
||||
public static readonly TimeSpan[] IpcReconnectAttemptTimeouts =
|
||||
[
|
||||
TimeSpan.FromMilliseconds(800),
|
||||
TimeSpan.FromMilliseconds(1500),
|
||||
TimeSpan.FromMilliseconds(3000),
|
||||
TimeSpan.FromMilliseconds(5000)
|
||||
TimeSpan.FromMilliseconds(5000),
|
||||
TimeSpan.FromMilliseconds(8000),
|
||||
TimeSpan.FromMilliseconds(10000)
|
||||
];
|
||||
|
||||
public static readonly TimeSpan ExistingHostProbeTimeout = TimeSpan.FromMilliseconds(900);
|
||||
public static readonly TimeSpan ExistingHostProbeTimeout = TimeSpan.FromMilliseconds(1500);
|
||||
public static readonly TimeSpan ShellStatusPollInterval = TimeSpan.FromSeconds(1);
|
||||
public static readonly TimeSpan IpcReconnectInterval = TimeSpan.FromSeconds(2);
|
||||
public static readonly TimeSpan IpcReconnectInterval = TimeSpan.FromSeconds(3);
|
||||
|
||||
/// <summary>Maximum time to wait for host process exit after it starts (for early-exit detection).</summary>
|
||||
public static readonly TimeSpan HostEarlyExitWindow = TimeSpan.FromSeconds(5);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user