Skip to content

Commit

Permalink
Implement GNU jobserver posix client support
Browse files Browse the repository at this point in the history
The core principle of a jobserver is simple:
before starting a new job (edge in ninja-speak),
a token must be acquired from an external entity as approval.

Once a job is finished, the token is returned to represent a free job slot.
In the case of GNU Make, this external entity is the parent process
which has executed Ninja and is managing the load capacity for
all subprocesses which it has spawned. Introducing client support
for this model allows Ninja to give load capacity management
to it's parent process, allowing it to control the number of
subprocesses that Ninja spawns at any given time.

This functionality is desirable when Ninja is part of a bigger build,
such as Yocto/OpenEmbedded, Openwrt/Linux, Buildroot, and Android.
Here, multiple compile jobs are executed in parallel
in order to maximize cpu utilization, but if each compile job in Ninja
uses all available cores, the system is overloaded.

This implementation instantiates the client in real_main()
and passes pointers to the Jobserver class into other classes.
All tokens are returned whenever the CommandRunner aborts,
and the current number of tokens compared to the current number
of running subprocesses controls the available load capacity,
used to determine how many new tokens to attempt to acquire
in order to try to start another job for each loop to find work.

Jobserver related functions are defined as no-op for Windows
pending Windows-specific support for the jobserver.

Co-authored-by: Martin Hundebøll <martin@geanix.com>
Co-developed-by: Martin Hundebøll <martin@geanix.com>
Signed-off-by: Martin Hundebøll <martin@geanix.com>
Signed-off-by: Michael Pratt <mcpratt@pm.me>
  • Loading branch information
mcprat and hundeboll committed Sep 18, 2024
1 parent 4b7d399 commit 9d67962
Show file tree
Hide file tree
Showing 9 changed files with 376 additions and 44 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,10 @@ if(WIN32)
# errors by telling windows.h to not define those two.
add_compile_definitions(NOMINMAX)
else()
target_sources(libninja PRIVATE src/subprocess-posix.cc)
target_sources(libninja PRIVATE
src/jobserver-posix.cc
src/subprocess-posix.cc
)
if(CMAKE_SYSTEM_NAME STREQUAL "OS400" OR CMAKE_SYSTEM_NAME STREQUAL "AIX")
target_sources(libninja PRIVATE src/getopt.c)
# Build getopt.c, which can be compiled as either C or C++, as C++
Expand Down
1 change: 1 addition & 0 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ def has_re2c() -> bool:
objs += cxx('minidump-win32', variables=cxxvariables)
objs += cc('getopt')
else:
objs += cxx('jobserver-posix')
objs += cxx('subprocess-posix')
if platform.is_aix():
objs += cc('getopt')
Expand Down
44 changes: 38 additions & 6 deletions src/build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,16 @@ Edge* Plan::FindWork() {
return NULL;

Edge* work = ready_.top();

// Only initiate work if the jobserver client can acquire a token.
if (builder_ && builder_->jobserver_ &&
builder_->jobserver_->Enabled()) {
int job_tokens = builder_->jobserver_->Tokens();
work->job_token_ = builder_->jobserver_->Acquire();
if (job_tokens == builder_->jobserver_->Tokens())
return NULL;
}

ready_.pop();
return work;
}
Expand Down Expand Up @@ -199,6 +209,10 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) {
edge->pool()->EdgeFinished(*edge);
edge->pool()->RetrieveReadyEdges(&ready_);

// If jobserver is used, return the token for this job.
if (builder_ && builder_->jobserver_)
builder_->jobserver_->Release(&edge->job_token_);

// The rest of this function only applies to successful commands.
if (result != kEdgeSucceeded)
return true;
Expand Down Expand Up @@ -592,14 +606,18 @@ void Plan::Dump() const {
}

struct RealCommandRunner : public CommandRunner {
explicit RealCommandRunner(const BuildConfig& config) : config_(config) {}
explicit RealCommandRunner(const BuildConfig& config, Jobserver* jobserver) :
config_(config), jobserver_(jobserver) {}

size_t CanRunMore() const override;
bool StartCommand(Edge* edge) override;
bool WaitForCommand(Result* result) override;
vector<Edge*> GetActiveEdges() override;
void ClearJobTokens(const std::vector<Edge*>&) override;
void Abort() override;

const BuildConfig& config_;
Jobserver* jobserver_;
SubprocessSet subprocs_;
map<const Subprocess*, Edge*> subproc_to_edge_;
};
Expand All @@ -612,7 +630,13 @@ vector<Edge*> RealCommandRunner::GetActiveEdges() {
return edges;
}

void RealCommandRunner::ClearJobTokens(const std::vector<Edge*> &edges) {
for (Edge* edge : edges)
jobserver_->Release(&edge->job_token_);
}

void RealCommandRunner::Abort() {
ClearJobTokens(GetActiveEdges());
subprocs_.Clear();
}

Expand All @@ -628,6 +652,14 @@ size_t RealCommandRunner::CanRunMore() const {
capacity = load_capacity;
}

// When initialized, behave as if the implicit token is acquired already.
// Otherwise, this happens after a token is released but before it is replaced,
// so the base capacity is represented by job_tokens + 1 when positive.
// Add an extra loop on capacity for each job in order to get an extra token.
int job_tokens = jobserver_->Tokens();
if (job_tokens)
capacity = abs(job_tokens) - subproc_number + 2;

if (capacity < 0)
capacity = 0;

Expand Down Expand Up @@ -667,10 +699,10 @@ bool RealCommandRunner::WaitForCommand(Result* result) {
return true;
}

Builder::Builder(State* state, const BuildConfig& config, BuildLog* build_log,
DepsLog* deps_log, DiskInterface* disk_interface,
Status* status, int64_t start_time_millis)
: state_(state), config_(config), plan_(this), status_(status),
Builder::Builder(State* state, const BuildConfig& config, Jobserver* jobserver,
BuildLog* build_log, DepsLog* deps_log, DiskInterface* disk_interface,
Status* status, int64_t start_time_millis) : state_(state),
config_(config), jobserver_(jobserver), plan_(this), status_(status),
start_time_millis_(start_time_millis), disk_interface_(disk_interface),
explanations_(g_explaining ? new Explanations() : nullptr),
scan_(state, build_log, deps_log, disk_interface,
Expand Down Expand Up @@ -775,7 +807,7 @@ bool Builder::Build(string* err) {
if (config_.dry_run)
command_runner_.reset(new DryRunCommandRunner);
else
command_runner_.reset(new RealCommandRunner(config_));
command_runner_.reset(new RealCommandRunner(config_, jobserver_));
}

// We are about to start the build process.
Expand Down
9 changes: 6 additions & 3 deletions src/build.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "depfile_parser.h"
#include "exit_status.h"
#include "graph.h"
#include "jobserver.h"
#include "util.h" // int64_t

struct BuildLog;
Expand Down Expand Up @@ -161,6 +162,7 @@ struct CommandRunner {
virtual bool WaitForCommand(Result* result) = 0;

virtual std::vector<Edge*> GetActiveEdges() { return std::vector<Edge*>(); }
virtual void ClearJobTokens(const std::vector<Edge*>&) {}
virtual void Abort() {}
};

Expand All @@ -187,9 +189,9 @@ struct BuildConfig {

/// Builder wraps the build process: starting commands, updating status.
struct Builder {
Builder(State* state, const BuildConfig& config, BuildLog* build_log,
DepsLog* deps_log, DiskInterface* disk_interface, Status* status,
int64_t start_time_millis);
Builder(State* state, const BuildConfig& config, Jobserver* jobserver,
BuildLog* build_log, DepsLog* deps_log, DiskInterface* disk_interface,
Status* status, int64_t start_time_millis);
~Builder();

/// Clean up after interrupted commands by deleting output files.
Expand Down Expand Up @@ -224,6 +226,7 @@ struct Builder {

State* state_;
const BuildConfig& config_;
Jobserver* jobserver_;
Plan plan_;
std::unique_ptr<CommandRunner> command_runner_;
Status* status_;
Expand Down
Loading

0 comments on commit 9d67962

Please sign in to comment.