From 91257454169cfc19c503b34ed98cb7599b4124b4 Mon Sep 17 00:00:00 2001 From: Harald Sitter Date: Mon, 26 Feb 2018 16:19:15 +0100 Subject: [PATCH] batch updates to the temporary db when publishing updates with contents generation were super syscall-heavy. for each path in a package (so at least 2-4, but ordinarily >4) we'd do a db.Put in ContentsIndex which results in one syscall.Write. so, for every package in a published repo we'd have to do *at least* 2 but ordinarily >4 syscalls. this gets abysmally slow very quickly depending on the available system specs. instead, start a batch inside each package and finish it when we are done with the package. this should keep the memory footprint negligible, but reduce the write() calls from N to 1. on one of KDE's servers I have seen update publishing of 7600 packages go from ~28s to ~9s when using batch putting on an HDD. on my local system the same set of packages go from ~14s to ~6s on an SSD. (all inodes in cache in both cases) --- deb/publish.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/deb/publish.go b/deb/publish.go index d86409b12..d298abe2a 100644 --- a/deb/publish.go +++ b/deb/publish.go @@ -599,6 +599,13 @@ func (p *PublishedRepo) Publish(packagePool aptly.PackagePool, publishedStorageP } } + // Start a db batch. If we fill contents data we'll need + // to push each path of the package into the database. + // We'll want this batched so as to avoid an excessive + // amount of write() calls. + tempDB.StartBatch() + defer tempDB.FinishBatch() + for _, arch := range p.Architectures { if pkg.MatchesArchitecture(arch) { var bufWriter *bufio.Writer