From 861fcbddfc62cd51863872dfa84ef36df0f63264 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Wed, 9 Jul 2025 16:51:15 -0700 Subject: [PATCH] [CAS] Delay CAS initialization on server side after daemon starts For auto CAS validation and recovery, it is important that server does not open the CAS until daemon replies so daemon has the chance to validate and recovery from broken CAS if needed. Otherwise, this is going to be a dead lock on daemon waiting for server to close the CAS before deletion. rdar://155342429 (cherry picked from commit 4095d2d70818e9c08f1a2b2c6c5bf89eb04b440d) --- clang/test/CAS/daemon-cas-recovery.c | 13 +++ clang/test/CAS/depscan-cas-log.c | 6 +- clang/tools/driver/cc1depscan_main.cpp | 129 +++++++++++++------------ 3 files changed, 84 insertions(+), 64 deletions(-) create mode 100644 clang/test/CAS/daemon-cas-recovery.c diff --git a/clang/test/CAS/daemon-cas-recovery.c b/clang/test/CAS/daemon-cas-recovery.c new file mode 100644 index 0000000000000..8198d7b2556f6 --- /dev/null +++ b/clang/test/CAS/daemon-cas-recovery.c @@ -0,0 +1,13 @@ +// REQUIRES: system-darwin, clang-cc1daemon + +// RUN: rm -rf %t && mkdir -p %t + +/// Construct a malformed CAS to recovery from. +// RUN: echo "abc" | llvm-cas --cas %t/cas --make-blob --data - +// RUN: rm %t/cas/v1.1/v9.data +// RUN: not llvm-cas --cas %t/cas --validate --check-hash + +// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_FORCE_VALIDATION=1 %clang-cache \ +// RUN: %clang -fsyntax-only -x c %s + +int func(void); diff --git a/clang/test/CAS/depscan-cas-log.c b/clang/test/CAS/depscan-cas-log.c index f00634343f44a..088ebd1d99c58 100644 --- a/clang/test/CAS/depscan-cas-log.c +++ b/clang/test/CAS/depscan-cas-log.c @@ -13,8 +13,8 @@ // CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v9.index' // CHECK: [[PID1]] {{[0-9]*}}: create subtrie -// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v9.index' // Even a minimal compilation involves at least 9 records for the cache key. -// CHECK-COUNT-9: [[PID2]] {{[0-9]*}}: create record +// CHECK-COUNT-9: [[PID1]] {{[0-9]*}}: create record -// CHECK: [[PID1]] {{[0-9]*}}: close mmap '{{.*}}v9.index' +// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v9.index' +// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}v9.index' diff --git a/clang/tools/driver/cc1depscan_main.cpp b/clang/tools/driver/cc1depscan_main.cpp index 697704c54cd4f..82893bb5c007c 100644 --- a/clang/tools/driver/cc1depscan_main.cpp +++ b/clang/tools/driver/cc1depscan_main.cpp @@ -357,13 +357,14 @@ makeDepscanDaemonPath(StringRef Mode, const DepscanSharing &Sharing) { return std::nullopt; } -static Expected scanAndUpdateCC1Inline( - const char *Exec, ArrayRef InputArgs, - StringRef WorkingDirectory, SmallVectorImpl &OutputArgs, - bool ProduceIncludeTree, bool &DiagnosticErrorOccurred, - llvm::function_ref SaveArg, - const CASOptions &CASOpts, std::shared_ptr DB, - std::shared_ptr Cache); +static int +scanAndUpdateCC1Inline(const char *Exec, ArrayRef InputArgs, + StringRef WorkingDirectory, + SmallVectorImpl &OutputArgs, + bool ProduceIncludeTree, + llvm::function_ref SaveArg, + const CASOptions &CASOpts, DiagnosticsEngine &Diag, + std::optional &RootID); static Expected scanAndUpdateCC1InlineWithTool( tooling::dependencies::DependencyScanningTool &Tool, @@ -372,14 +373,17 @@ static Expected scanAndUpdateCC1InlineWithTool( SmallVectorImpl &OutputArgs, llvm::cas::ObjectStore &DB, llvm::function_ref SaveArg); -static llvm::Expected scanAndUpdateCC1UsingDaemon( +static int scanAndUpdateCC1UsingDaemon( const char *Exec, ArrayRef OldArgs, StringRef WorkingDirectory, SmallVectorImpl &NewArgs, - std::string &DiagnosticOutput, StringRef Path, - const DepscanSharing &Sharing, + StringRef Path, const DepscanSharing &Sharing, DiagnosticsEngine &Diag, llvm::function_ref SaveArg, - llvm::cas::ObjectStore &CAS) { + const CASOptions &CASOpts, std::optional &Root) { using namespace clang::cc1depscand; + auto reportScanFailure = [&](Error E) { + Diag.Report(diag::err_cas_depscan_failed) << std::move(E); + return 1; + }; // FIXME: Skip some of this if -fcas-fs has been passed. @@ -389,12 +393,12 @@ static llvm::Expected scanAndUpdateCC1UsingDaemon( ? ScanDaemon::connectToDaemonAndShakeHands(Path) : ScanDaemon::constructAndShakeHands(Path, Exec, Sharing); if (!Daemon) - return Daemon.takeError(); + return reportScanFailure(Daemon.takeError()); CC1DepScanDProtocol Comms(*Daemon); // llvm::dbgs() << "sending request...\n"; if (auto E = Comms.putCommand(WorkingDirectory, OldArgs)) - return std::move(E); + return reportScanFailure(std::move(E)); llvm::BumpPtrAllocator Alloc; llvm::StringSaver Saver(Alloc); @@ -403,23 +407,32 @@ static llvm::Expected scanAndUpdateCC1UsingDaemon( StringRef FailedReason; StringRef RootID; StringRef DiagOut; - if (auto E = Comms.getScanResult(Saver, Result, FailedReason, RootID, - RawNewArgs, DiagOut)) { - DiagnosticOutput = DiagOut; - return std::move(E); - } - DiagnosticOutput = DiagOut; + auto E = Comms.getScanResult(Saver, Result, FailedReason, RootID, RawNewArgs, + DiagOut); + // Send the diagnostics to std::err. + llvm::errs() << DiagOut; + if (E) + return reportScanFailure(std::move(E)); if (Result != CC1DepScanDProtocol::SuccessResult) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "depscan daemon failed: " + FailedReason); + return reportScanFailure( + llvm::createStringError("depscan daemon failed: " + FailedReason)); // FIXME: Avoid this duplication. NewArgs.resize(RawNewArgs.size()); for (int I = 0, E = RawNewArgs.size(); I != E; ++I) NewArgs[I] = SaveArg(RawNewArgs[I]); - return CAS.parseID(RootID); + // Create CAS after daemon returns the result so daemon can perform corrupted + // CAS recovery. + auto [CAS, _] = CASOpts.getOrCreateDatabases(Diag); + if (!CAS) + return 1; + + if (auto E = CAS->parseID(RootID).moveInto(Root)) + return reportScanFailure(std::move(E)); + + return 0; } // FIXME: This is a copy of Command::writeResponseFile. Command is too deeply @@ -446,8 +459,6 @@ static int scanAndUpdateCC1(const char *Exec, ArrayRef OldArgs, DiagnosticsEngine &Diag, const llvm::opt::ArgList &Args, const CASOptions &CASOpts, - std::shared_ptr DB, - std::shared_ptr Cache, std::optional &RootID) { using namespace clang::driver; @@ -513,25 +524,14 @@ static int scanAndUpdateCC1(const char *Exec, ArrayRef OldArgs, if (ProduceIncludeTree) Sharing.CASArgs.push_back("-fdepscan-include-tree"); - std::string DiagnosticOutput; - bool DiagnosticErrorOccurred = false; - auto ScanAndUpdate = [&]() { - if (std::optional DaemonPath = - makeDepscanDaemonPath(Mode, Sharing)) - return scanAndUpdateCC1UsingDaemon(Exec, OldArgs, WorkingDirectory, - NewArgs, DiagnosticOutput, *DaemonPath, - Sharing, SaveArg, *DB); - return scanAndUpdateCC1Inline(Exec, OldArgs, WorkingDirectory, NewArgs, - ProduceIncludeTree, DiagnosticErrorOccurred, - SaveArg, CASOpts, DB, Cache); - }; - if (llvm::Error E = ScanAndUpdate().moveInto(RootID)) { - Diag.Report(diag::err_cas_depscan_failed) << std::move(E); - if (!DiagnosticOutput.empty()) - llvm::errs() << DiagnosticOutput; - return 1; - } - return DiagnosticErrorOccurred; + if (auto DaemonPath = makeDepscanDaemonPath(Mode, Sharing)) + return scanAndUpdateCC1UsingDaemon(Exec, OldArgs, WorkingDirectory, NewArgs, + *DaemonPath, Sharing, Diag, SaveArg, + CASOpts, RootID); + + return scanAndUpdateCC1Inline(Exec, OldArgs, WorkingDirectory, NewArgs, + ProduceIncludeTree, SaveArg, CASOpts, Diag, + RootID); } int cc1depscan_main(ArrayRef Argv, const char *Argv0, @@ -591,12 +591,8 @@ int cc1depscan_main(ArrayRef Argv, const char *Argv0, CompilerInvocation::ParseCASArgs(CASOpts, ParsedCC1Args, Diags); CASOpts.ensurePersistentCAS(); - auto [CAS, Cache] = CASOpts.getOrCreateDatabases(Diags); - if (!CAS || !Cache) - return 1; - if (int Ret = scanAndUpdateCC1(Argv0, CC1Args->getValues(), NewArgs, Diags, - Args, CASOpts, CAS, Cache, RootID)) + Args, CASOpts, RootID)) return Ret; // FIXME: Use OutputBackend to OnDisk only now. @@ -841,7 +837,8 @@ void ScanServer::start(bool Exclusive, ArrayRef CASArgs) { ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded( CASPath, /*CheckHash=*/true, /*AllowRecovery=*/true, - /*Force=*/false, findLLVMCasBinary(Argv0, LLVMCasStorage))); + /*Force=*/getenv("LLVM_CAS_FORCE_VALIDATION"), + findLLVMCasBinary(Argv0, LLVMCasStorage))); }); // Check the pidfile. @@ -1106,13 +1103,18 @@ static Expected scanAndUpdateCC1InlineWithTool( return *Root; } -static Expected scanAndUpdateCC1Inline( - const char *Exec, ArrayRef InputArgs, - StringRef WorkingDirectory, SmallVectorImpl &OutputArgs, - bool ProduceIncludeTree, bool &DiagnosticErrorOccurred, - llvm::function_ref SaveArg, - const CASOptions &CASOpts, std::shared_ptr DB, - std::shared_ptr Cache) { +static int +scanAndUpdateCC1Inline(const char *Exec, ArrayRef InputArgs, + StringRef WorkingDirectory, + SmallVectorImpl &OutputArgs, + bool ProduceIncludeTree, + llvm::function_ref SaveArg, + const CASOptions &CASOpts, DiagnosticsEngine &Diag, + std::optional &RootID) { + auto [DB, Cache] = CASOpts.getOrCreateDatabases(Diag); + if (!DB || !Cache) + return 1; + IntrusiveRefCntPtr FS; if (!ProduceIncludeTree) FS = llvm::cantFail(llvm::cas::createCachingOnDiskFileSystem(*DB)); @@ -1136,10 +1138,15 @@ static Expected scanAndUpdateCC1Inline( auto DiagsConsumer = std::make_unique( llvm::errs(), DiagOpts.get(), false); - auto Result = scanAndUpdateCC1InlineWithTool( - Tool, *DiagsConsumer, /*VerboseOS*/ nullptr, Exec, InputArgs, - WorkingDirectory, OutputArgs, *DB, SaveArg); - DiagnosticErrorOccurred = DiagsConsumer->getNumErrors() != 0; - return Result; + auto E = scanAndUpdateCC1InlineWithTool( + Tool, *DiagsConsumer, /*VerboseOS*/ nullptr, Exec, InputArgs, + WorkingDirectory, OutputArgs, *DB, SaveArg) + .moveInto(RootID); + if (E) { + Diag.Report(diag::err_cas_depscan_failed) << std::move(E); + return 1; + } + + return DiagsConsumer->getNumErrors() != 0; } #endif /* LLVM_ON_UNIX */