Skip to content

Commit

Permalink
LUCENE-10599: Improve LogMergePolicy's handling of maxMergeSize. (#935)
Browse files Browse the repository at this point in the history
With this change, segments are more likely to be considered for merging until
they reach the max merge size. Before this change, LogMergePolicy would exclude
an entire window of `mergeFactor` segments from merging if this window had a
too large segment and other segments were on the same tier.
  • Loading branch information
jpountz committed Jun 9, 2022
1 parent 54c67db commit b1eddec
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 17 deletions.
4 changes: 3 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ Optimizations

Bug Fixes
---------------------
(No changes)

* LUCENE-10599: LogMergePolicy is more likely to keep merging segments until
they reach the maximum merge size. (Adrien Grand)

Other
---------------------
Expand Down
40 changes: 25 additions & 15 deletions lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
Original file line number Diff line number Diff line change
Expand Up @@ -579,23 +579,41 @@ public MergeSpecification findMerges(
// Finally, record all merges that are viable at this level:
int end = start + mergeFactor;
while (end <= 1 + upto) {
boolean anyTooLarge = false;
boolean anyMerging = false;
long mergeSize = 0;
long mergeDocs = 0;
for (int i = start; i < end; i++) {
final SegmentInfoAndLevel segLevel = levels.get(i);
final SegmentCommitInfo info = segLevel.info;
anyTooLarge |=
(size(info, mergeContext) >= maxMergeSize
|| sizeDocs(info, mergeContext) >= maxMergeDocs);
if (mergingSegments.contains(info)) {
anyMerging = true;
break;
}
long segmentSize = size(info, mergeContext);
long segmentDocs = sizeDocs(info, mergeContext);
if (mergeSize + segmentSize > maxMergeSize || mergeDocs + segmentDocs > maxMergeDocs) {
// This merge is full, stop adding more segments to it
if (i == start) {
// This segment alone is too large, return a singleton merge
if (verbose(mergeContext)) {
message(
" " + i + " is larger than the max merge size/docs; ignoring", mergeContext);
}
end = i + 1;
} else {
// Previous segments are under the max merge size, return them
end = i;
}
break;
}
mergeSize += segmentSize;
mergeDocs += segmentDocs;
}

if (anyMerging) {
// skip
} else if (!anyTooLarge) {
if (anyMerging || end - start <= 1) {
// skip: there is an ongoing merge at the current level or the computed merge has a single
// segment and this merge policy doesn't do singleton merges
} else {
if (spec == null) {
spec = new MergeSpecification();
}
Expand All @@ -615,14 +633,6 @@ public MergeSpecification findMerges(
mergeContext);
}
spec.add(new OneMerge(mergeInfos));
} else if (verbose(mergeContext)) {
message(
" "
+ start
+ " to "
+ end
+ ": contains segment over maxMergeSize or maxMergeDocs; skipping",
mergeContext);
}

start = end;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws
protected void assertMerge(MergePolicy policy, MergeSpecification merge) throws IOException {
LogMergePolicy lmp = (LogMergePolicy) policy;
for (OneMerge oneMerge : merge.merges) {
assertEquals(lmp.getMergeFactor(), oneMerge.segments.size());
assertTrue(oneMerge.segments.size() <= lmp.getMergeFactor());
}
}

Expand Down Expand Up @@ -188,6 +188,60 @@ public void testRejectUnbalancedMerges() throws IOException {
assertEquals(10, segmentInfos.info(1).info.maxDoc());
}

public void testPackLargeSegments() throws IOException {
LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
IOStats stats = new IOStats();
mergePolicy.setMaxMergeDocs(10_000);
AtomicLong segNameGenerator = new AtomicLong();
MergeContext mergeContext = new MockMergeContext(SegmentCommitInfo::getDelCount);
SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major);
// 10 segments below the max segment size, but larger than maxMergeSize/mergeFactor
for (int i = 0; i < 10; ++i) {
segmentInfos.add(
makeSegmentCommitInfo(
"_" + segNameGenerator.getAndIncrement(), 3_000, 0, 0, IndexWriter.SOURCE_MERGE));
}
MergeSpecification spec =
mergePolicy.findMerges(MergeTrigger.EXPLICIT, segmentInfos, mergeContext);
assertNotNull(spec);
for (OneMerge oneMerge : spec.merges) {
segmentInfos =
applyMerge(segmentInfos, oneMerge, "_" + segNameGenerator.getAndIncrement(), stats);
}
// LogMP packed 3 3k segments together
assertEquals(9_000, segmentInfos.info(0).info.maxDoc());
}

public void testIgnoreLargeSegments() throws IOException {
LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
IOStats stats = new IOStats();
mergePolicy.setMaxMergeDocs(10_000);
AtomicLong segNameGenerator = new AtomicLong();
MergeContext mergeContext = new MockMergeContext(SegmentCommitInfo::getDelCount);
SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major);
// 1 segment that reached the maximum segment size
segmentInfos.add(
makeSegmentCommitInfo(
"_" + segNameGenerator.getAndIncrement(), 11_000, 0, 0, IndexWriter.SOURCE_MERGE));
// and 10 segments below the max segment size, but within the same level
for (int i = 0; i < 10; ++i) {
segmentInfos.add(
makeSegmentCommitInfo(
"_" + segNameGenerator.getAndIncrement(), 2_000, 0, 0, IndexWriter.SOURCE_MERGE));
}
// LogMergePolicy used to have a bug that would make it exclude the first mergeFactor segments
// from merging if any of them was above the maximum merged size
MergeSpecification spec =
mergePolicy.findMerges(MergeTrigger.EXPLICIT, segmentInfos, mergeContext);
assertNotNull(spec);
for (OneMerge oneMerge : spec.merges) {
segmentInfos =
applyMerge(segmentInfos, oneMerge, "_" + segNameGenerator.getAndIncrement(), stats);
}
assertEquals(11_000, segmentInfos.info(0).info.maxDoc());
assertEquals(10_000, segmentInfos.info(1).info.maxDoc());
}

public void testFullFlushMerges() throws IOException {
AtomicLong segNameGenerator = new AtomicLong();
IOStats stats = new IOStats();
Expand Down

0 comments on commit b1eddec

Please sign in to comment.