mirror of
https://github.com/wwiinnddyy/LanMountainDesktop.git
synced 2026-06-20 23:54:26 +08:00
feat: implement launcher orchestrator and startup monitoring infrastructure for host lifecycle management
This commit is contained in:
@@ -87,31 +87,68 @@ internal sealed class DeploymentLocator
|
||||
var explicitAppRoot = context.ExplicitAppRoot;
|
||||
var devModeConfigIgnored = !context.IsDebugMode && Views.ErrorWindow.CheckDevModeEnabled();
|
||||
|
||||
Logger.Info($"=== HOST RESOLUTION START ===");
|
||||
Logger.Info($" AppRoot: {_appRoot}");
|
||||
Logger.Info($" Executable: {executable}");
|
||||
Logger.Info($" IsDebugMode: {context.IsDebugMode}");
|
||||
Logger.Info($" ExplicitAppRoot: {explicitAppRoot ?? "<none>"}");
|
||||
Logger.Info($" LauncherBaseDirectory: {AppContext.BaseDirectory}");
|
||||
|
||||
string? resolvedPath;
|
||||
string? source;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(explicitAppRoot))
|
||||
{
|
||||
Logger.Info($"Trying explicit app root: {explicitAppRoot}");
|
||||
var explicitRoot = Path.GetFullPath(explicitAppRoot);
|
||||
resolvedPath = TryResolveExplicitAppRoot(explicitRoot, executable, searchedPaths, out source);
|
||||
}
|
||||
else
|
||||
{
|
||||
Logger.Info("Trying published or portable host...");
|
||||
resolvedPath = TryResolvePublishedOrPortableHost(executable, searchedPaths, out source);
|
||||
}
|
||||
|
||||
if (resolvedPath is null && context.IsDebugMode)
|
||||
{
|
||||
Logger.Info("Debug mode: trying debug host paths...");
|
||||
resolvedPath = TryResolveDebugHost(executable, searchedPaths, out source);
|
||||
}
|
||||
|
||||
if (resolvedPath is null)
|
||||
{
|
||||
Logger.Warn("Standard resolution failed, trying legacy fallback...");
|
||||
resolvedPath = ResolveHostExecutablePathLegacy();
|
||||
if (!string.IsNullOrWhiteSpace(resolvedPath))
|
||||
{
|
||||
searchedPaths.Add(Path.GetFullPath(resolvedPath));
|
||||
source = "legacy_fallback";
|
||||
Logger.Info($"Legacy fallback found: {resolvedPath}");
|
||||
}
|
||||
}
|
||||
|
||||
Logger.Info($"=== HOST RESOLUTION RESULT ===");
|
||||
Logger.Info($" Success: {!string.IsNullOrWhiteSpace(resolvedPath)}");
|
||||
Logger.Info($" ResolvedPath: {resolvedPath ?? "<NOT FOUND>"}");
|
||||
Logger.Info($" Source: {source ?? "<none>"}");
|
||||
Logger.Info($" SearchedPaths ({searchedPaths.Count}):");
|
||||
foreach (var path in searchedPaths.Take(10))
|
||||
{
|
||||
Logger.Info($" - {path}");
|
||||
}
|
||||
if (searchedPaths.Count > 10)
|
||||
{
|
||||
Logger.Info($" ... and {searchedPaths.Count - 10} more");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(resolvedPath))
|
||||
{
|
||||
Logger.Error("CRITICAL: Could not resolve host executable path!");
|
||||
Console.Error.WriteLine("[CRITICAL] Could not find main application executable!");
|
||||
Console.Error.WriteLine($"[CRITICAL] Searched {searchedPaths.Count} locations:");
|
||||
foreach (var path in searchedPaths.Take(5))
|
||||
{
|
||||
Console.Error.WriteLine($"[CRITICAL] - {path}");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,12 +19,15 @@ internal sealed class AirAppRuntimeBridge
|
||||
|
||||
public async Task EnsureStartedAsync()
|
||||
{
|
||||
Logger.Info($"AIRAPP: Checking if AirApp Runtime is available. AppRoot='{_appRoot}'");
|
||||
|
||||
if (await TryGetStatusAsync().ConfigureAwait(false) is not null)
|
||||
{
|
||||
Logger.Info("AirApp Runtime is already available.");
|
||||
Logger.Info("AIRAPP: AirApp Runtime is already available.");
|
||||
return;
|
||||
}
|
||||
|
||||
Logger.Info("AIRAPP: Starting AirApp Runtime...");
|
||||
Process? process;
|
||||
try
|
||||
{
|
||||
@@ -36,24 +39,28 @@ internal sealed class AirAppRuntimeBridge
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"AirApp Runtime start request failed. AppRoot='{_appRoot}'; Error='{ex.Message}'.");
|
||||
Logger.Warn($"AIRAPP: AirApp Runtime start request failed. AppRoot='{_appRoot}'; Error='{ex.Message}'");
|
||||
return;
|
||||
}
|
||||
|
||||
Logger.Info($"AirApp Runtime start requested. Pid={(process is null ? -1 : process.Id)}; AppRoot='{_appRoot}'.");
|
||||
Logger.Info($"AIRAPP: AirApp Runtime start requested. Pid={(process is null ? -1 : process.Id)}; AppRoot='{_appRoot}'.");
|
||||
|
||||
for (var attempt = 1; attempt <= ConnectAttempts; attempt++)
|
||||
{
|
||||
Logger.Info($"AIRAPP: Attempt {attempt}/{ConnectAttempts} - Checking IPC connection...");
|
||||
|
||||
if (await TryGetStatusAsync().ConfigureAwait(false) is not null)
|
||||
{
|
||||
Logger.Info("AirApp Runtime IPC is ready.");
|
||||
Logger.Info("AIRAPP: AirApp Runtime IPC is ready.");
|
||||
return;
|
||||
}
|
||||
|
||||
await Task.Delay(TimeSpan.FromMilliseconds(250 * attempt)).ConfigureAwait(false);
|
||||
var delayMs = 250 * attempt;
|
||||
Logger.Info($"AIRAPP: IPC not ready, waiting {delayMs}ms before retry...");
|
||||
await Task.Delay(TimeSpan.FromMilliseconds(delayMs)).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
Logger.Warn("AirApp Runtime did not become ready after pre-start; Host fallback remains available.");
|
||||
Logger.Warn("AIRAPP: AirApp Runtime did not become ready after pre-start; Host fallback remains available.");
|
||||
}
|
||||
|
||||
public async Task AttachHostAsync(int hostProcessId)
|
||||
@@ -65,10 +72,15 @@ internal sealed class AirAppRuntimeBridge
|
||||
|
||||
try
|
||||
{
|
||||
using var cts = new CancellationTokenSource();
|
||||
using var client = new LanMountainDesktopIpcClient();
|
||||
await client.ConnectAsync(IpcConstants.AirAppRuntimePipeName).ConfigureAwait(false);
|
||||
|
||||
var connectTask = client.ConnectAsync(IpcConstants.AirAppRuntimePipeName);
|
||||
await connectTask.WaitAsync(TimeSpan.FromSeconds(3), cts.Token).ConfigureAwait(false);
|
||||
|
||||
var proxy = client.CreateProxy<IAirAppRuntimeControlService>();
|
||||
var result = await proxy.AttachHostAsync(hostProcessId).ConfigureAwait(false);
|
||||
var attachTask = proxy.AttachHostAsync(hostProcessId);
|
||||
var result = await attachTask.WaitAsync(TimeSpan.FromSeconds(3), cts.Token).ConfigureAwait(false);
|
||||
Logger.Info($"AirApp Runtime host attach completed. Accepted={result.Accepted}; Code='{result.Code}'; HostPid={hostProcessId}.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -81,13 +93,29 @@ internal sealed class AirAppRuntimeBridge
|
||||
{
|
||||
try
|
||||
{
|
||||
using var cts = new CancellationTokenSource();
|
||||
using var client = new LanMountainDesktopIpcClient();
|
||||
await client.ConnectAsync(IpcConstants.AirAppRuntimePipeName).ConfigureAwait(false);
|
||||
|
||||
var connectTask = client.ConnectAsync(IpcConstants.AirAppRuntimePipeName);
|
||||
await connectTask.WaitAsync(TimeSpan.FromSeconds(2), cts.Token).ConfigureAwait(false);
|
||||
|
||||
var proxy = client.CreateProxy<IAirAppRuntimeControlService>();
|
||||
return await proxy.GetStatusAsync().ConfigureAwait(false);
|
||||
var statusTask = proxy.GetStatusAsync();
|
||||
return await statusTask.WaitAsync(TimeSpan.FromSeconds(2), cts.Token).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
catch (TimeoutException)
|
||||
{
|
||||
Logger.Info("AIRAPP: TryGetStatusAsync timed out (2s).");
|
||||
return null;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
Logger.Info("AIRAPP: TryGetStatusAsync cancelled.");
|
||||
return null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Info($"AIRAPP: TryGetStatusAsync failed: {ex.GetType().Name} - {ex.Message}");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,8 +144,18 @@ internal sealed class LauncherOrchestrator
|
||||
return;
|
||||
}
|
||||
|
||||
if (!softTimeoutShown)
|
||||
{
|
||||
// 用户在软超时前关闭窗口,提示确认
|
||||
Logger.Info("Splash window was closed manually before soft timeout. Cancelling startup attempt.");
|
||||
_startupAttemptRegistry.MarkOwnedFailed(lastStage, "User cancelled startup before soft timeout.");
|
||||
// 取消后续监控
|
||||
successTcs.TrySetCanceled();
|
||||
return;
|
||||
}
|
||||
|
||||
_startupAttemptRegistry.MarkOwnedDetachedWaiting();
|
||||
Logger.Warn("Splash window was closed manually. Launcher will continue monitoring the current startup attempt.");
|
||||
Logger.Warn("Splash window was closed manually after soft timeout. Launcher will continue monitoring the current startup attempt in detached mode.");
|
||||
};
|
||||
splashWindow.Closed += splashClosedHandler;
|
||||
|
||||
|
||||
@@ -208,13 +208,15 @@ internal sealed class HostLaunchService
|
||||
|
||||
private static async Task EnsureAirAppRuntimeStartedAsync(string appRoot, string? dataRoot)
|
||||
{
|
||||
Logger.Info("HOST LAUNCH: Attempting to pre-start AirApp Runtime...");
|
||||
try
|
||||
{
|
||||
await new AirAppRuntimeBridge(appRoot, dataRoot).EnsureStartedAsync().ConfigureAwait(false);
|
||||
Logger.Info("HOST LAUNCH: AirApp Runtime pre-start completed.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"AirApp Runtime pre-start failed; Host fallback remains available. Error='{ex.Message}'.");
|
||||
Logger.Warn($"HOST LAUNCH: AirApp Runtime pre-start failed; Host fallback remains available. Error='{ex.Message}'");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,6 +251,11 @@ internal sealed class HostLaunchService
|
||||
|
||||
try
|
||||
{
|
||||
Logger.Info($"ATTEMPTING HOST START: Path='{plan.HostPath}'; WorkingDir='{plan.WorkingDirectory}'; Mode='{startMode}'");
|
||||
Logger.Info($" Arguments: {HostLaunchPlanBuilder.FormatArgumentsForLog(plan.Arguments)}");
|
||||
Logger.Info($" File exists: {File.Exists(plan.HostPath)}");
|
||||
Logger.Info($" Working dir exists: {Directory.Exists(plan.WorkingDirectory)}");
|
||||
|
||||
var process = Process.Start(startInfo);
|
||||
Logger.Info(
|
||||
$"Host launch requested. Mode='{startMode}'; RetryTag='{retryTag ?? "<none>"}'; Path='{plan.HostPath}'; " +
|
||||
@@ -257,15 +264,30 @@ internal sealed class HostLaunchService
|
||||
|
||||
if (process is null)
|
||||
{
|
||||
Logger.Error($"CRITICAL: Process.Start returned null! Path='{plan.HostPath}'; Mode='{startMode}'");
|
||||
Console.Error.WriteLine($"[CRITICAL] Process.Start returned null for path: {plan.HostPath}");
|
||||
return HostStartAttempt.StartFailed(startMode, "process_start_returned_null", plan);
|
||||
}
|
||||
|
||||
await Task.Yield();
|
||||
// 等待一小段时间,检查进程是否立即退出
|
||||
await Task.Delay(500).ConfigureAwait(false);
|
||||
|
||||
if (process.HasExited)
|
||||
{
|
||||
Logger.Error($"CRITICAL: Host process exited immediately! ExitCode={process.ExitCode}; Path='{plan.HostPath}'");
|
||||
Console.Error.WriteLine($"[CRITICAL] Host process exited immediately with code {process.ExitCode}");
|
||||
return HostStartAttempt.StartFailed(startMode, $"process_exited_immediately_code_{process.ExitCode}", plan);
|
||||
}
|
||||
|
||||
Logger.Info($"Host process started successfully and is running. PID={process.Id}");
|
||||
return HostStartAttempt.Started(startMode, process, plan);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error($"Host start failed. Mode='{startMode}'.", ex);
|
||||
Logger.Error($"CRITICAL: Host start exception! Path='{plan.HostPath}'; Mode='{startMode}'; Exception={ex.GetType().Name}; Message='{ex.Message}'", ex);
|
||||
Console.Error.WriteLine($"[CRITICAL] Host start failed: {ex.Message}");
|
||||
Console.Error.WriteLine($"[CRITICAL] Path: {plan.HostPath}");
|
||||
Console.Error.WriteLine($"[CRITICAL] Exception: {ex}");
|
||||
return HostStartAttempt.StartFailed(startMode, ex.GetType().Name, plan);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ internal sealed class HostStartupMonitor
|
||||
]).ConfigureAwait(false);
|
||||
if (!connected)
|
||||
{
|
||||
Logger.Info("Host public IPC is not ready yet. Launcher will keep monitoring the host process and retry.");
|
||||
Logger.Info("Host public IPC is not ready yet after initial connection attempts. Launcher will keep monitoring the host process and retry periodically.");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -106,6 +106,8 @@ internal sealed class HostStartupMonitor
|
||||
var nextShellStatusPollAt = DateTimeOffset.UtcNow + StartupTimeoutPolicy.ShellStatusPollInterval;
|
||||
var ipcReconnectAttemptIndex = 0;
|
||||
var activationRetryAttempted = false;
|
||||
var lastIpcConnectionFailureReported = DateTimeOffset.MinValue;
|
||||
var ipcConnectionFailureCount = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
@@ -224,6 +226,7 @@ internal sealed class HostStartupMonitor
|
||||
if (connected)
|
||||
{
|
||||
ipcConnected = true;
|
||||
Logger.Info($"Host public IPC reconnected successfully after {ipcConnectionFailureCount} failed attempts.");
|
||||
var shellSuccess = await RefreshShellStatusAsync("Host public IPC reconnected; waiting for desktop shell.")
|
||||
.ConfigureAwait(false);
|
||||
if (shellSuccess is not null)
|
||||
@@ -232,6 +235,18 @@ internal sealed class HostStartupMonitor
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ipcConnectionFailureCount++;
|
||||
// 每 30 秒报告一次 IPC 连接失败
|
||||
if ((now - lastIpcConnectionFailureReported).TotalSeconds >= 30)
|
||||
{
|
||||
lastIpcConnectionFailureReported = now;
|
||||
var elapsed = (now - startedAt).TotalSeconds;
|
||||
Logger.Warn($"Host public IPC connection still unavailable after {elapsed:0}s and {ipcConnectionFailureCount} reconnect attempts. Host process is alive (PID={request.HostProcess.Id}).");
|
||||
request.Reporter.Report("diagnostic", $"正在等待主应用响应... (已尝试 {ipcConnectionFailureCount} 次)");
|
||||
}
|
||||
}
|
||||
|
||||
nextReconnectAttemptAt = DateTimeOffset.UtcNow + StartupTimeoutPolicy.IpcReconnectInterval;
|
||||
}
|
||||
@@ -263,6 +278,16 @@ internal sealed class HostStartupMonitor
|
||||
nextCheckpointAt = softTimeoutAt;
|
||||
}
|
||||
|
||||
if (!ipcConnected && nextReconnectAttemptAt < nextCheckpointAt)
|
||||
{
|
||||
nextCheckpointAt = nextReconnectAttemptAt;
|
||||
}
|
||||
|
||||
if (ipcConnected && nextShellStatusPollAt < nextCheckpointAt)
|
||||
{
|
||||
nextCheckpointAt = nextShellStatusPollAt;
|
||||
}
|
||||
|
||||
var delay = nextCheckpointAt - now;
|
||||
if (delay > TimeSpan.FromSeconds(1))
|
||||
{
|
||||
@@ -351,11 +376,11 @@ internal sealed class HostStartupMonitor
|
||||
if (!connected && !request.HostProcess.HasExited)
|
||||
{
|
||||
request.AttemptRegistry.MarkOwnedWaitingForShell("Host process is still running, but public IPC is not ready yet.");
|
||||
request.PublishCoordinatorStatus(true, false, true);
|
||||
request.PublishCoordinatorStatus(true, true, false);
|
||||
return new Outcome(
|
||||
true,
|
||||
"startup_pending",
|
||||
"Host process is still running; Launcher will not start another process while public IPC finishes startup.",
|
||||
false,
|
||||
"ipc_connection_failed",
|
||||
$"Host process is still running after {StartupTimeoutPolicy.HardTimeout.TotalSeconds:0} seconds, but public IPC connection could not be established. This may indicate the host is stuck during initialization.",
|
||||
recoveryActivationAttempted,
|
||||
request.ComposeLaunchDetails(true, recoveryActivationAttempted));
|
||||
}
|
||||
|
||||
@@ -89,6 +89,14 @@ internal sealed class StartupAttemptRegistry
|
||||
ExecuteWithLock(() =>
|
||||
{
|
||||
var existing = LoadUnsafe();
|
||||
|
||||
// 清理过期的记录
|
||||
if (existing is not null && IsStaleAttempt(existing))
|
||||
{
|
||||
Logger.Info($"Cleaning up stale startup attempt record. AttemptId='{existing.AttemptId}'; State='{existing.State}'; Age={(DateTimeOffset.UtcNow - existing.UpdatedAtUtc).TotalMinutes:0.1}min.");
|
||||
existing = null;
|
||||
}
|
||||
|
||||
if (existing is not null && IsCoordinatorLive(existing))
|
||||
{
|
||||
active = Clone(existing);
|
||||
@@ -145,6 +153,34 @@ internal sealed class StartupAttemptRegistry
|
||||
return reserved is not null;
|
||||
}
|
||||
|
||||
private static bool IsStaleAttempt(StartupAttemptRecord record)
|
||||
{
|
||||
// 记录超过 10 分钟且状态为终结或非活跃状态
|
||||
if (DateTimeOffset.UtcNow - record.UpdatedAtUtc > TimeSpan.FromMinutes(10))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// 进程已死且协调器心跳超时
|
||||
if (record.CoordinatorPid > 0 &&
|
||||
!TryGetLiveProcess(record.CoordinatorPid, out _) &&
|
||||
DateTimeOffset.UtcNow - record.HeartbeatAtUtc > TimeSpan.FromMinutes(2))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// 主进程已死且协调器已死
|
||||
if (record.HostPid > 0 &&
|
||||
!TryGetLiveProcess(record.HostPid, out _) &&
|
||||
record.CoordinatorPid > 0 &&
|
||||
!TryGetLiveProcess(record.CoordinatorPid, out _))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public StartupAttemptRecord? GetOwnedAttempt()
|
||||
{
|
||||
StartupAttemptRecord? result = null;
|
||||
|
||||
@@ -2,22 +2,26 @@ namespace LanMountainDesktop.Launcher.Startup;
|
||||
|
||||
internal static class StartupTimeoutPolicy
|
||||
{
|
||||
public static readonly TimeSpan SoftTimeout = TimeSpan.FromSeconds(30);
|
||||
public static readonly TimeSpan HardTimeout = TimeSpan.FromSeconds(120);
|
||||
public static readonly TimeSpan SoftTimeout = TimeSpan.FromSeconds(45);
|
||||
public static readonly TimeSpan HardTimeout = TimeSpan.FromSeconds(180);
|
||||
|
||||
/// <summary>Initial Public IPC connect attempt (AOT cold start may be slower).</summary>
|
||||
public static readonly TimeSpan InitialIpcConnectTimeout = TimeSpan.FromMilliseconds(1200);
|
||||
/// <summary>Initial Public IPC connect attempt (AOT cold start is significantly slower).</summary>
|
||||
public static readonly TimeSpan InitialIpcConnectTimeout = TimeSpan.FromMilliseconds(3000);
|
||||
|
||||
/// <summary>Subsequent reconnect attempts use increasing per-try timeouts.</summary>
|
||||
public static readonly TimeSpan[] IpcReconnectAttemptTimeouts =
|
||||
[
|
||||
TimeSpan.FromMilliseconds(800),
|
||||
TimeSpan.FromMilliseconds(1500),
|
||||
TimeSpan.FromMilliseconds(3000),
|
||||
TimeSpan.FromMilliseconds(5000)
|
||||
TimeSpan.FromMilliseconds(5000),
|
||||
TimeSpan.FromMilliseconds(8000),
|
||||
TimeSpan.FromMilliseconds(10000)
|
||||
];
|
||||
|
||||
public static readonly TimeSpan ExistingHostProbeTimeout = TimeSpan.FromMilliseconds(900);
|
||||
public static readonly TimeSpan ExistingHostProbeTimeout = TimeSpan.FromMilliseconds(1500);
|
||||
public static readonly TimeSpan ShellStatusPollInterval = TimeSpan.FromSeconds(1);
|
||||
public static readonly TimeSpan IpcReconnectInterval = TimeSpan.FromSeconds(2);
|
||||
public static readonly TimeSpan IpcReconnectInterval = TimeSpan.FromSeconds(3);
|
||||
|
||||
/// <summary>Maximum time to wait for host process exit after it starts (for early-exit detection).</summary>
|
||||
public static readonly TimeSpan HostEarlyExitWindow = TimeSpan.FromSeconds(5);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user