From 2793be68d4e6dcb7d3b1ed4d12458f0641121b44 Mon Sep 17 00:00:00 2001 From: lincube Date: Sun, 14 Jun 2026 12:59:36 +0800 Subject: [PATCH] feat: implement launcher orchestrator and startup monitoring infrastructure for host lifecycle management --- .../Deployment/DeploymentLocator.cs | 37 ++++++++++++++ .../Shell/AirAppRuntimeBridge.cs | 50 ++++++++++++++---- .../Shell/LauncherOrchestrator.cs | 12 ++++- .../Startup/HostLaunchService.cs | 28 ++++++++-- .../Startup/HostStartupMonitor.cs | 35 +++++++++++-- .../Startup/StartupAttemptRegistry.cs | 36 +++++++++++++ .../Startup/StartupTimeoutPolicy.cs | 20 +++++--- LanMountainDesktop/App.axaml.cs | 51 ++++++++++++++++++- 8 files changed, 239 insertions(+), 30 deletions(-) diff --git a/LanMountainDesktop.Launcher/Deployment/DeploymentLocator.cs b/LanMountainDesktop.Launcher/Deployment/DeploymentLocator.cs index 935e381..6dc0835 100644 --- a/LanMountainDesktop.Launcher/Deployment/DeploymentLocator.cs +++ b/LanMountainDesktop.Launcher/Deployment/DeploymentLocator.cs @@ -87,31 +87,68 @@ internal sealed class DeploymentLocator var explicitAppRoot = context.ExplicitAppRoot; var devModeConfigIgnored = !context.IsDebugMode && Views.ErrorWindow.CheckDevModeEnabled(); + Logger.Info($"=== HOST RESOLUTION START ==="); + Logger.Info($" AppRoot: {_appRoot}"); + Logger.Info($" Executable: {executable}"); + Logger.Info($" IsDebugMode: {context.IsDebugMode}"); + Logger.Info($" ExplicitAppRoot: {explicitAppRoot ?? ""}"); + Logger.Info($" LauncherBaseDirectory: {AppContext.BaseDirectory}"); + string? resolvedPath; string? source; if (!string.IsNullOrWhiteSpace(explicitAppRoot)) { + Logger.Info($"Trying explicit app root: {explicitAppRoot}"); var explicitRoot = Path.GetFullPath(explicitAppRoot); resolvedPath = TryResolveExplicitAppRoot(explicitRoot, executable, searchedPaths, out source); } else { + Logger.Info("Trying published or portable host..."); resolvedPath = TryResolvePublishedOrPortableHost(executable, searchedPaths, out source); } if (resolvedPath is null && context.IsDebugMode) { + Logger.Info("Debug mode: trying debug host paths..."); resolvedPath = TryResolveDebugHost(executable, searchedPaths, out source); } if (resolvedPath is null) { + Logger.Warn("Standard resolution failed, trying legacy fallback..."); resolvedPath = ResolveHostExecutablePathLegacy(); if (!string.IsNullOrWhiteSpace(resolvedPath)) { searchedPaths.Add(Path.GetFullPath(resolvedPath)); source = "legacy_fallback"; + Logger.Info($"Legacy fallback found: {resolvedPath}"); + } + } + + Logger.Info($"=== HOST RESOLUTION RESULT ==="); + Logger.Info($" Success: {!string.IsNullOrWhiteSpace(resolvedPath)}"); + Logger.Info($" ResolvedPath: {resolvedPath ?? ""}"); + Logger.Info($" Source: {source ?? ""}"); + Logger.Info($" SearchedPaths ({searchedPaths.Count}):"); + foreach (var path in searchedPaths.Take(10)) + { + Logger.Info($" - {path}"); + } + if (searchedPaths.Count > 10) + { + Logger.Info($" ... and {searchedPaths.Count - 10} more"); + } + + if (string.IsNullOrWhiteSpace(resolvedPath)) + { + Logger.Error("CRITICAL: Could not resolve host executable path!"); + Console.Error.WriteLine("[CRITICAL] Could not find main application executable!"); + Console.Error.WriteLine($"[CRITICAL] Searched {searchedPaths.Count} locations:"); + foreach (var path in searchedPaths.Take(5)) + { + Console.Error.WriteLine($"[CRITICAL] - {path}"); } } diff --git a/LanMountainDesktop.Launcher/Shell/AirAppRuntimeBridge.cs b/LanMountainDesktop.Launcher/Shell/AirAppRuntimeBridge.cs index 9a1dddb..5141c00 100644 --- a/LanMountainDesktop.Launcher/Shell/AirAppRuntimeBridge.cs +++ b/LanMountainDesktop.Launcher/Shell/AirAppRuntimeBridge.cs @@ -19,12 +19,15 @@ internal sealed class AirAppRuntimeBridge public async Task EnsureStartedAsync() { + Logger.Info($"AIRAPP: Checking if AirApp Runtime is available. AppRoot='{_appRoot}'"); + if (await TryGetStatusAsync().ConfigureAwait(false) is not null) { - Logger.Info("AirApp Runtime is already available."); + Logger.Info("AIRAPP: AirApp Runtime is already available."); return; } + Logger.Info("AIRAPP: Starting AirApp Runtime..."); Process? process; try { @@ -36,24 +39,28 @@ internal sealed class AirAppRuntimeBridge } catch (Exception ex) { - Logger.Warn($"AirApp Runtime start request failed. AppRoot='{_appRoot}'; Error='{ex.Message}'."); + Logger.Warn($"AIRAPP: AirApp Runtime start request failed. AppRoot='{_appRoot}'; Error='{ex.Message}'"); return; } - Logger.Info($"AirApp Runtime start requested. Pid={(process is null ? -1 : process.Id)}; AppRoot='{_appRoot}'."); + Logger.Info($"AIRAPP: AirApp Runtime start requested. Pid={(process is null ? -1 : process.Id)}; AppRoot='{_appRoot}'."); for (var attempt = 1; attempt <= ConnectAttempts; attempt++) { + Logger.Info($"AIRAPP: Attempt {attempt}/{ConnectAttempts} - Checking IPC connection..."); + if (await TryGetStatusAsync().ConfigureAwait(false) is not null) { - Logger.Info("AirApp Runtime IPC is ready."); + Logger.Info("AIRAPP: AirApp Runtime IPC is ready."); return; } - await Task.Delay(TimeSpan.FromMilliseconds(250 * attempt)).ConfigureAwait(false); + var delayMs = 250 * attempt; + Logger.Info($"AIRAPP: IPC not ready, waiting {delayMs}ms before retry..."); + await Task.Delay(TimeSpan.FromMilliseconds(delayMs)).ConfigureAwait(false); } - Logger.Warn("AirApp Runtime did not become ready after pre-start; Host fallback remains available."); + Logger.Warn("AIRAPP: AirApp Runtime did not become ready after pre-start; Host fallback remains available."); } public async Task AttachHostAsync(int hostProcessId) @@ -65,10 +72,15 @@ internal sealed class AirAppRuntimeBridge try { + using var cts = new CancellationTokenSource(); using var client = new LanMountainDesktopIpcClient(); - await client.ConnectAsync(IpcConstants.AirAppRuntimePipeName).ConfigureAwait(false); + + var connectTask = client.ConnectAsync(IpcConstants.AirAppRuntimePipeName); + await connectTask.WaitAsync(TimeSpan.FromSeconds(3), cts.Token).ConfigureAwait(false); + var proxy = client.CreateProxy(); - var result = await proxy.AttachHostAsync(hostProcessId).ConfigureAwait(false); + var attachTask = proxy.AttachHostAsync(hostProcessId); + var result = await attachTask.WaitAsync(TimeSpan.FromSeconds(3), cts.Token).ConfigureAwait(false); Logger.Info($"AirApp Runtime host attach completed. Accepted={result.Accepted}; Code='{result.Code}'; HostPid={hostProcessId}."); } catch (Exception ex) @@ -81,13 +93,29 @@ internal sealed class AirAppRuntimeBridge { try { + using var cts = new CancellationTokenSource(); using var client = new LanMountainDesktopIpcClient(); - await client.ConnectAsync(IpcConstants.AirAppRuntimePipeName).ConfigureAwait(false); + + var connectTask = client.ConnectAsync(IpcConstants.AirAppRuntimePipeName); + await connectTask.WaitAsync(TimeSpan.FromSeconds(2), cts.Token).ConfigureAwait(false); + var proxy = client.CreateProxy(); - return await proxy.GetStatusAsync().ConfigureAwait(false); + var statusTask = proxy.GetStatusAsync(); + return await statusTask.WaitAsync(TimeSpan.FromSeconds(2), cts.Token).ConfigureAwait(false); } - catch + catch (TimeoutException) { + Logger.Info("AIRAPP: TryGetStatusAsync timed out (2s)."); + return null; + } + catch (OperationCanceledException) + { + Logger.Info("AIRAPP: TryGetStatusAsync cancelled."); + return null; + } + catch (Exception ex) + { + Logger.Info($"AIRAPP: TryGetStatusAsync failed: {ex.GetType().Name} - {ex.Message}"); return null; } } diff --git a/LanMountainDesktop.Launcher/Shell/LauncherOrchestrator.cs b/LanMountainDesktop.Launcher/Shell/LauncherOrchestrator.cs index 889da79..993e24a 100644 --- a/LanMountainDesktop.Launcher/Shell/LauncherOrchestrator.cs +++ b/LanMountainDesktop.Launcher/Shell/LauncherOrchestrator.cs @@ -144,8 +144,18 @@ internal sealed class LauncherOrchestrator return; } + if (!softTimeoutShown) + { + // 用户在软超时前关闭窗口,提示确认 + Logger.Info("Splash window was closed manually before soft timeout. Cancelling startup attempt."); + _startupAttemptRegistry.MarkOwnedFailed(lastStage, "User cancelled startup before soft timeout."); + // 取消后续监控 + successTcs.TrySetCanceled(); + return; + } + _startupAttemptRegistry.MarkOwnedDetachedWaiting(); - Logger.Warn("Splash window was closed manually. Launcher will continue monitoring the current startup attempt."); + Logger.Warn("Splash window was closed manually after soft timeout. Launcher will continue monitoring the current startup attempt in detached mode."); }; splashWindow.Closed += splashClosedHandler; diff --git a/LanMountainDesktop.Launcher/Startup/HostLaunchService.cs b/LanMountainDesktop.Launcher/Startup/HostLaunchService.cs index c302790..9fdc9d0 100644 --- a/LanMountainDesktop.Launcher/Startup/HostLaunchService.cs +++ b/LanMountainDesktop.Launcher/Startup/HostLaunchService.cs @@ -208,13 +208,15 @@ internal sealed class HostLaunchService private static async Task EnsureAirAppRuntimeStartedAsync(string appRoot, string? dataRoot) { + Logger.Info("HOST LAUNCH: Attempting to pre-start AirApp Runtime..."); try { await new AirAppRuntimeBridge(appRoot, dataRoot).EnsureStartedAsync().ConfigureAwait(false); + Logger.Info("HOST LAUNCH: AirApp Runtime pre-start completed."); } catch (Exception ex) { - Logger.Warn($"AirApp Runtime pre-start failed; Host fallback remains available. Error='{ex.Message}'."); + Logger.Warn($"HOST LAUNCH: AirApp Runtime pre-start failed; Host fallback remains available. Error='{ex.Message}'"); } } @@ -249,6 +251,11 @@ internal sealed class HostLaunchService try { + Logger.Info($"ATTEMPTING HOST START: Path='{plan.HostPath}'; WorkingDir='{plan.WorkingDirectory}'; Mode='{startMode}'"); + Logger.Info($" Arguments: {HostLaunchPlanBuilder.FormatArgumentsForLog(plan.Arguments)}"); + Logger.Info($" File exists: {File.Exists(plan.HostPath)}"); + Logger.Info($" Working dir exists: {Directory.Exists(plan.WorkingDirectory)}"); + var process = Process.Start(startInfo); Logger.Info( $"Host launch requested. Mode='{startMode}'; RetryTag='{retryTag ?? ""}'; Path='{plan.HostPath}'; " + @@ -257,15 +264,30 @@ internal sealed class HostLaunchService if (process is null) { + Logger.Error($"CRITICAL: Process.Start returned null! Path='{plan.HostPath}'; Mode='{startMode}'"); + Console.Error.WriteLine($"[CRITICAL] Process.Start returned null for path: {plan.HostPath}"); return HostStartAttempt.StartFailed(startMode, "process_start_returned_null", plan); } - await Task.Yield(); + // 等待一小段时间,检查进程是否立即退出 + await Task.Delay(500).ConfigureAwait(false); + + if (process.HasExited) + { + Logger.Error($"CRITICAL: Host process exited immediately! ExitCode={process.ExitCode}; Path='{plan.HostPath}'"); + Console.Error.WriteLine($"[CRITICAL] Host process exited immediately with code {process.ExitCode}"); + return HostStartAttempt.StartFailed(startMode, $"process_exited_immediately_code_{process.ExitCode}", plan); + } + + Logger.Info($"Host process started successfully and is running. PID={process.Id}"); return HostStartAttempt.Started(startMode, process, plan); } catch (Exception ex) { - Logger.Error($"Host start failed. Mode='{startMode}'.", ex); + Logger.Error($"CRITICAL: Host start exception! Path='{plan.HostPath}'; Mode='{startMode}'; Exception={ex.GetType().Name}; Message='{ex.Message}'", ex); + Console.Error.WriteLine($"[CRITICAL] Host start failed: {ex.Message}"); + Console.Error.WriteLine($"[CRITICAL] Path: {plan.HostPath}"); + Console.Error.WriteLine($"[CRITICAL] Exception: {ex}"); return HostStartAttempt.StartFailed(startMode, ex.GetType().Name, plan); } } diff --git a/LanMountainDesktop.Launcher/Startup/HostStartupMonitor.cs b/LanMountainDesktop.Launcher/Startup/HostStartupMonitor.cs index 5413ea4..81fa4dc 100644 --- a/LanMountainDesktop.Launcher/Startup/HostStartupMonitor.cs +++ b/LanMountainDesktop.Launcher/Startup/HostStartupMonitor.cs @@ -86,7 +86,7 @@ internal sealed class HostStartupMonitor ]).ConfigureAwait(false); if (!connected) { - Logger.Info("Host public IPC is not ready yet. Launcher will keep monitoring the host process and retry."); + Logger.Info("Host public IPC is not ready yet after initial connection attempts. Launcher will keep monitoring the host process and retry periodically."); } else { @@ -106,6 +106,8 @@ internal sealed class HostStartupMonitor var nextShellStatusPollAt = DateTimeOffset.UtcNow + StartupTimeoutPolicy.ShellStatusPollInterval; var ipcReconnectAttemptIndex = 0; var activationRetryAttempted = false; + var lastIpcConnectionFailureReported = DateTimeOffset.MinValue; + var ipcConnectionFailureCount = 0; while (true) { @@ -224,6 +226,7 @@ internal sealed class HostStartupMonitor if (connected) { ipcConnected = true; + Logger.Info($"Host public IPC reconnected successfully after {ipcConnectionFailureCount} failed attempts."); var shellSuccess = await RefreshShellStatusAsync("Host public IPC reconnected; waiting for desktop shell.") .ConfigureAwait(false); if (shellSuccess is not null) @@ -232,6 +235,18 @@ internal sealed class HostStartupMonitor continue; } } + else + { + ipcConnectionFailureCount++; + // 每 30 秒报告一次 IPC 连接失败 + if ((now - lastIpcConnectionFailureReported).TotalSeconds >= 30) + { + lastIpcConnectionFailureReported = now; + var elapsed = (now - startedAt).TotalSeconds; + Logger.Warn($"Host public IPC connection still unavailable after {elapsed:0}s and {ipcConnectionFailureCount} reconnect attempts. Host process is alive (PID={request.HostProcess.Id})."); + request.Reporter.Report("diagnostic", $"正在等待主应用响应... (已尝试 {ipcConnectionFailureCount} 次)"); + } + } nextReconnectAttemptAt = DateTimeOffset.UtcNow + StartupTimeoutPolicy.IpcReconnectInterval; } @@ -263,6 +278,16 @@ internal sealed class HostStartupMonitor nextCheckpointAt = softTimeoutAt; } + if (!ipcConnected && nextReconnectAttemptAt < nextCheckpointAt) + { + nextCheckpointAt = nextReconnectAttemptAt; + } + + if (ipcConnected && nextShellStatusPollAt < nextCheckpointAt) + { + nextCheckpointAt = nextShellStatusPollAt; + } + var delay = nextCheckpointAt - now; if (delay > TimeSpan.FromSeconds(1)) { @@ -351,11 +376,11 @@ internal sealed class HostStartupMonitor if (!connected && !request.HostProcess.HasExited) { request.AttemptRegistry.MarkOwnedWaitingForShell("Host process is still running, but public IPC is not ready yet."); - request.PublishCoordinatorStatus(true, false, true); + request.PublishCoordinatorStatus(true, true, false); return new Outcome( - true, - "startup_pending", - "Host process is still running; Launcher will not start another process while public IPC finishes startup.", + false, + "ipc_connection_failed", + $"Host process is still running after {StartupTimeoutPolicy.HardTimeout.TotalSeconds:0} seconds, but public IPC connection could not be established. This may indicate the host is stuck during initialization.", recoveryActivationAttempted, request.ComposeLaunchDetails(true, recoveryActivationAttempted)); } diff --git a/LanMountainDesktop.Launcher/Startup/StartupAttemptRegistry.cs b/LanMountainDesktop.Launcher/Startup/StartupAttemptRegistry.cs index ecea348..beeb1e9 100644 --- a/LanMountainDesktop.Launcher/Startup/StartupAttemptRegistry.cs +++ b/LanMountainDesktop.Launcher/Startup/StartupAttemptRegistry.cs @@ -89,6 +89,14 @@ internal sealed class StartupAttemptRegistry ExecuteWithLock(() => { var existing = LoadUnsafe(); + + // 清理过期的记录 + if (existing is not null && IsStaleAttempt(existing)) + { + Logger.Info($"Cleaning up stale startup attempt record. AttemptId='{existing.AttemptId}'; State='{existing.State}'; Age={(DateTimeOffset.UtcNow - existing.UpdatedAtUtc).TotalMinutes:0.1}min."); + existing = null; + } + if (existing is not null && IsCoordinatorLive(existing)) { active = Clone(existing); @@ -145,6 +153,34 @@ internal sealed class StartupAttemptRegistry return reserved is not null; } + private static bool IsStaleAttempt(StartupAttemptRecord record) + { + // 记录超过 10 分钟且状态为终结或非活跃状态 + if (DateTimeOffset.UtcNow - record.UpdatedAtUtc > TimeSpan.FromMinutes(10)) + { + return true; + } + + // 进程已死且协调器心跳超时 + if (record.CoordinatorPid > 0 && + !TryGetLiveProcess(record.CoordinatorPid, out _) && + DateTimeOffset.UtcNow - record.HeartbeatAtUtc > TimeSpan.FromMinutes(2)) + { + return true; + } + + // 主进程已死且协调器已死 + if (record.HostPid > 0 && + !TryGetLiveProcess(record.HostPid, out _) && + record.CoordinatorPid > 0 && + !TryGetLiveProcess(record.CoordinatorPid, out _)) + { + return true; + } + + return false; + } + public StartupAttemptRecord? GetOwnedAttempt() { StartupAttemptRecord? result = null; diff --git a/LanMountainDesktop.Launcher/Startup/StartupTimeoutPolicy.cs b/LanMountainDesktop.Launcher/Startup/StartupTimeoutPolicy.cs index cad7844..09c376d 100644 --- a/LanMountainDesktop.Launcher/Startup/StartupTimeoutPolicy.cs +++ b/LanMountainDesktop.Launcher/Startup/StartupTimeoutPolicy.cs @@ -2,22 +2,26 @@ namespace LanMountainDesktop.Launcher.Startup; internal static class StartupTimeoutPolicy { - public static readonly TimeSpan SoftTimeout = TimeSpan.FromSeconds(30); - public static readonly TimeSpan HardTimeout = TimeSpan.FromSeconds(120); + public static readonly TimeSpan SoftTimeout = TimeSpan.FromSeconds(45); + public static readonly TimeSpan HardTimeout = TimeSpan.FromSeconds(180); - /// Initial Public IPC connect attempt (AOT cold start may be slower). - public static readonly TimeSpan InitialIpcConnectTimeout = TimeSpan.FromMilliseconds(1200); + /// Initial Public IPC connect attempt (AOT cold start is significantly slower). + public static readonly TimeSpan InitialIpcConnectTimeout = TimeSpan.FromMilliseconds(3000); /// Subsequent reconnect attempts use increasing per-try timeouts. public static readonly TimeSpan[] IpcReconnectAttemptTimeouts = [ - TimeSpan.FromMilliseconds(800), TimeSpan.FromMilliseconds(1500), TimeSpan.FromMilliseconds(3000), - TimeSpan.FromMilliseconds(5000) + TimeSpan.FromMilliseconds(5000), + TimeSpan.FromMilliseconds(8000), + TimeSpan.FromMilliseconds(10000) ]; - public static readonly TimeSpan ExistingHostProbeTimeout = TimeSpan.FromMilliseconds(900); + public static readonly TimeSpan ExistingHostProbeTimeout = TimeSpan.FromMilliseconds(1500); public static readonly TimeSpan ShellStatusPollInterval = TimeSpan.FromSeconds(1); - public static readonly TimeSpan IpcReconnectInterval = TimeSpan.FromSeconds(2); + public static readonly TimeSpan IpcReconnectInterval = TimeSpan.FromSeconds(3); + + /// Maximum time to wait for host process exit after it starts (for early-exit detection). + public static readonly TimeSpan HostEarlyExitWindow = TimeSpan.FromSeconds(5); } diff --git a/LanMountainDesktop/App.axaml.cs b/LanMountainDesktop/App.axaml.cs index b65e87d..595b0ce 100644 --- a/LanMountainDesktop/App.axaml.cs +++ b/LanMountainDesktop/App.axaml.cs @@ -246,6 +246,9 @@ public partial class App : Application ReportStartupProgress(StartupStage.Initializing, 10, "Initializing application..."); ReportStartupProgress(StartupStage.LoadingSettings, 20, "Loading settings..."); } + + // 启动心跳线程,确保启动器能检测到主应用的活跃状态 + _ = StartLauncherHeartbeatAsync(); } catch (Exception ex) { @@ -253,6 +256,39 @@ public partial class App : Application } } + private async Task StartLauncherHeartbeatAsync() + { + try + { + // 每 5 秒发送一次心跳,防止启动器认为主应用已卡死 + while (!IsShutdownInProgress && _publicIpcHostService is not null) + { + await Task.Delay(TimeSpan.FromSeconds(5)); + + // 如果还未报告 Ready,发送心跳进度 + if (!_mainWindowOpened && !IsShutdownInProgress) + { + // 静默心跳,不记录日志 + QueueOrSendLauncherProgress(new StartupProgressMessage + { + Stage = StartupStage.Initializing, + ProgressPercent = 15, + Message = "Application is initializing...", + Timestamp = DateTimeOffset.UtcNow + }, logSuccess: false); + } + else + { + break; // 主窗口已打开,停止心跳 + } + } + } + catch (Exception ex) + { + AppLogger.Warn("LauncherIpc", $"Heartbeat thread failed: {ex.Message}"); + } + } + private void ReportStartupProgress(StartupStage stage, int percent, string message) { QueueOrSendLauncherProgress(new StartupProgressMessage @@ -1824,11 +1860,22 @@ public partial class App : Application _publicIpcHostService.Start(); AppLogger.Info( "PublicIpc", - $"Public IPC host started. PipeName='{IpcConstants.DefaultPipeName}'; Version='{versionInfo.Version}'; Codename='{versionInfo.Codename}'."); + $"Public IPC host started successfully. PipeName='{IpcConstants.DefaultPipeName}'; Version='{versionInfo.Version}'; Codename='{versionInfo.Codename}'."); } catch (Exception ex) { - AppLogger.Warn("PublicIpc", "Failed to initialize public IPC host.", ex); + AppLogger.Error("PublicIpc", "CRITICAL: Failed to initialize public IPC host. Launcher will not be able to connect to this process.", ex); + + // 尝试通过标准错误输出告知启动器 + try + { + Console.Error.WriteLine($"[CRITICAL] Public IPC host initialization failed: {ex.Message}"); + Console.Error.WriteLine("[CRITICAL] The launcher will not be able to connect to this process."); + } + catch + { + // 忽略控制台写入失败 + } } }