From 9c256815e477e70d3acef57c0820e05a6b0e16cc Mon Sep 17 00:00:00 2001 From: Oliver Giles Date: Sat, 24 Feb 2018 18:53:11 +0200 Subject: [PATCH] resolves #29: graceful shutdown on SIGINT/SIGTERM: 1. stop accepting new connections 2. send SIGTERM to all child tasks 3. wait for processes to end 4. drop all websockets --- src/interface.h | 6 ++++ src/laminar.cpp | 32 +++++------------ src/laminar.h | 3 +- src/run.cpp | 25 +++++++++++--- src/run.h | 4 ++- src/server.cpp | 82 ++++++++++++++++++++++++++++++-------------- src/server.h | 9 +++-- test/test-run.cpp | 32 ++++++++++++++--- test/test-server.cpp | 2 ++ 9 files changed, 132 insertions(+), 63 deletions(-) diff --git a/src/interface.h b/src/interface.h index 313894c..4061233 100644 --- a/src/interface.h +++ b/src/interface.h @@ -125,6 +125,12 @@ struct LaminarInterface { // string. This shouldn't be used, because the sysadmin should have // configured a real webserver to serve these things. virtual std::string getCustomCss() = 0; + + // Abort all running jobs + virtual void abortAll() = 0; + + // Callback for laminar to reap child processes. + virtual void reapChildren() = 0; }; #endif // LAMINAR_INTERFACE_H_ diff --git a/src/laminar.cpp b/src/laminar.cpp index bf2c624..67fe822 100644 --- a/src/laminar.cpp +++ b/src/laminar.cpp @@ -22,7 +22,6 @@ #include "log.h" #include -#include #include #include @@ -365,31 +364,10 @@ void Laminar::run() { const char* listen_http = getenv("LAMINAR_BIND_HTTP") ?: INTADDR_HTTP_DEFAULT; srv = new Server(*this, listen_rpc, listen_http); - - // handle SIGCHLD - sigset_t mask; - sigemptyset(&mask); - sigaddset(&mask, SIGCHLD); - sigprocmask(SIG_BLOCK, &mask, nullptr); - int sigchld = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); - srv->addDescriptor(sigchld, [this](const char* buf, size_t){ - const struct signalfd_siginfo* siginfo = reinterpret_cast(buf); - // TODO: re-enable assertion when the cause for its triggering - // is discovered and solved - //KJ_ASSERT(siginfo->ssi_signo == SIGCHLD); - if(siginfo->ssi_signo == SIGCHLD) { - reapAdvance(); - assignNewJobs(); - } else { - LLOG(ERROR, "Unexpected signo", siginfo->ssi_signo); - } - }); - srv->start(); } void Laminar::stop() { - clients.clear(); srv->stop(); } @@ -523,7 +501,7 @@ void Laminar::handleRunLog(std::shared_ptr run, std::string s) { // Reaps a zombie and steps the corresponding Run to its next state. // Should be called on SIGCHLD -void Laminar::reapAdvance() { +void Laminar::reapChildren() { int ret = 0; pid_t pid; constexpr int bufsz = 1024; @@ -548,6 +526,14 @@ void Laminar::reapAdvance() { if(completed) run->complete(); } + + assignNewJobs(); +} + +void Laminar::abortAll() { + for(std::shared_ptr run : activeJobs) { + run->abort(); + } } bool Laminar::nodeCanQueue(const Node& node, const Run& run) const { diff --git a/src/laminar.h b/src/laminar.h index 3a0cbcd..44015df 100644 --- a/src/laminar.h +++ b/src/laminar.h @@ -57,10 +57,11 @@ public: bool setParam(std::string job, uint buildNum, std::string param, std::string value) override; bool getArtefact(std::string path, std::string& result) override; std::string getCustomCss() override; + void reapChildren() override; + void abortAll() override; private: bool loadConfiguration(); - void reapAdvance(); void assignNewJobs(); bool stepRun(std::shared_ptr run); void handleRunLog(std::shared_ptr run, std::string log); diff --git a/src/run.cpp b/src/run.cpp index f660195..ec96151 100644 --- a/src/run.cpp +++ b/src/run.cpp @@ -58,9 +58,6 @@ std::string Run::reason() const { } bool Run::step() { - if(!currentScript.path.empty() && procStatus != 0) - result = RunState::FAILED; - if(scripts.size()) { currentScript = scripts.front(); scripts.pop(); @@ -75,6 +72,9 @@ bool Run::step() { sigaddset(&mask, SIGCHLD); sigprocmask(SIG_UNBLOCK, &mask, nullptr); + // set pgid == pid for easy killing on abort + setpgid(0, 0); + close(pfd[0]); dup2(pfd[1], 1); dup2(pfd[1], 2); @@ -127,14 +127,31 @@ bool Run::step() { return true; } } + void Run::addScript(std::string scriptPath, std::string scriptWorkingDir) { scripts.push({scriptPath, scriptWorkingDir}); } + void Run::addEnv(std::string path) { env.push_back(path); } + +void Run::abort() { + // clear all pending scripts + std::queue