Skip to content

Commit

Permalink
Fix valid cluster UUID logic for uncommitted cluster UUIDs
Browse files Browse the repository at this point in the history
Signed-off-by: Sooraj Sinha <soosinha@amazon.com>
  • Loading branch information
soosinha committed Oct 25, 2023
1 parent b5299f1 commit 4c046be
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -868,25 +868,31 @@ private Map<String, ClusterMetadataManifest> getLatestManifestForAllClusterUUIDs
* @return List of cluster UUIDs. The first element is the most recent cluster UUID in the chain
*/
private List<String> createClusterChain(final Map<String, ClusterMetadataManifest> manifestsByClusterUUID, final String clusterName) {
final Map<String, String> clusterUUIDGraph = manifestsByClusterUUID.values()
final List<ClusterMetadataManifest> validClusterManifests = manifestsByClusterUUID.values()
.stream()
.filter(this::isValidClusterUUID)
.collect(Collectors.toList());
final Map<String, String> clusterUUIDGraph = validClusterManifests.stream()
.collect(Collectors.toMap(ClusterMetadataManifest::getClusterUUID, ClusterMetadataManifest::getPreviousClusterUUID));
final List<String> validClusterUUIDs = manifestsByClusterUUID.values()
.stream()
.filter(m -> !isInvalidClusterUUID(m) && !clusterUUIDGraph.containsValue(m.getClusterUUID()))
final List<String> topLevelClusterUUIDs = validClusterManifests.stream()
.map(ClusterMetadataManifest::getClusterUUID)
.filter(clusterUUID -> !clusterUUIDGraph.containsValue(clusterUUID))
.collect(Collectors.toList());
if (validClusterUUIDs.isEmpty()) {
logger.info("There is no valid previous cluster UUID");

if (topLevelClusterUUIDs.isEmpty()) {
// This can occur only when there are no valid cluster UUIDs
assert validClusterManifests.isEmpty() : "There are no top level cluster UUIDs even when there are valid cluster UUIDs";
logger.info("There is no valid previous cluster UUID. All cluster UUIDs evaluated are: {}", manifestsByClusterUUID.keySet());
return Collections.emptyList();
}
if (validClusterUUIDs.size() > 1) {
if (topLevelClusterUUIDs.size() > 1) {
logger.info("Top level cluster UUIDs: {}", topLevelClusterUUIDs);
// If the valid cluster UUIDs are more that 1, it means there was some race condition where
// more then 2 cluster manager nodes tried to become active cluster manager and published
// 2 cluster UUIDs which followed the same previous UUID.
final Map<String, ClusterMetadataManifest> manifestsByClusterUUIDTrimmed = trimClusterUUIDs(
manifestsByClusterUUID,
validClusterUUIDs,
topLevelClusterUUIDs,
clusterName
);
if (manifestsByClusterUUID.size() == manifestsByClusterUUIDTrimmed.size()) {
Expand All @@ -895,19 +901,20 @@ private List<String> createClusterChain(final Map<String, ClusterMetadataManifes
Locale.ROOT,
"The system has ended into multiple valid cluster states in the remote store. "
+ "Please check their latest manifest to decide which one you want to keep. Valid Cluster UUIDs: - %s",
validClusterUUIDs
topLevelClusterUUIDs
)
);
}
return createClusterChain(manifestsByClusterUUIDTrimmed, clusterName);
}
final List<String> validChain = new ArrayList<>();
String currentUUID = validClusterUUIDs.get(0);
String currentUUID = topLevelClusterUUIDs.get(0);
while (currentUUID != null && !ClusterState.UNKNOWN_UUID.equals(currentUUID)) {
validChain.add(currentUUID);
// Getting the previous cluster UUID of a cluster UUID from the clusterUUID Graph
currentUUID = clusterUUIDGraph.get(currentUUID);
}
logger.info("Constructed cluster chain as : {}", validChain);
return validChain;
}

Expand All @@ -928,11 +935,7 @@ private Map<String, ClusterMetadataManifest> trimClusterUUIDs(
// Here we compare the manifest of current UUID to that of previous UUID
// In case currentUUID's latest manifest is same as previous UUIDs latest manifest,
// that means it was restored from previousUUID and no IndexMetadata update was performed on it.
if (ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) {
if (currentManifest.getIndices().isEmpty()) {
trimmedUUIDs.remove(clusterUUID);
}
} else {
if (!ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) {
ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID());
if (isMetadataEqual(currentManifest, previousManifest, clusterName)
&& isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) {
Expand Down Expand Up @@ -971,8 +974,8 @@ private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMeta
return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata);
}

private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) {
return !manifest.isClusterUUIDCommitted();
private boolean isValidClusterUUID(ClusterMetadataManifest manifest) {
return manifest.isClusterUUIDCommitted();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,7 @@ public void testGetValidPreviousClusterUUIDWithMultipleChains() throws IOExcepti
"cluster-uuid3",
"cluster-uuid1"
);
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean());
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean(), Collections.emptyMap());

remoteClusterStateService.start();
String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster");
Expand All @@ -927,6 +927,30 @@ public void testGetValidPreviousClusterUUIDWithInvalidMultipleChains() throws IO
assertThrows(IllegalStateException.class, () -> remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"));
}

public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOException {
Map<String, String> clusterUUIDsPointers = Map.of(
"cluster-uuid1",
ClusterState.UNKNOWN_UUID,
"cluster-uuid2",
"cluster-uuid1",
"cluster-uuid3",
"cluster-uuid2"
);
Map<String, Boolean> clusterUUIDCommitted = Map.of(
"cluster-uuid1",
true,
"cluster-uuid2",
true,
"cluster-uuid3",
false
);
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, clusterUUIDCommitted);

remoteClusterStateService.start();
String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster");
assertThat(previousClusterUUID, equalTo("cluster-uuid2"));
}

public void testDeleteStaleClusterUUIDs() throws IOException {
final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build();
ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder()
Expand Down Expand Up @@ -1122,10 +1146,14 @@ public void testGlobalMetadataUploadWaitTimeSetting() {
}

private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers) throws IOException {
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false);
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap());
}

private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers, Map<String, Boolean> clusterUUIDCommitted) throws IOException {
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, clusterUUIDCommitted);
}

private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers, boolean differGlobalMetadata)
private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers, boolean differGlobalMetadata, Map<String, Boolean> clusterUUIDCommitted)
throws IOException {
final BlobPath blobPath = mock(BlobPath.class);
when((blobStoreRepository.basePath())).thenReturn(blobPath);
Expand All @@ -1149,7 +1177,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> cluste
clusterUUIDsPointers.get("cluster-uuid1"),
randomAlphaOfLength(10),
uploadedIndexMetadataList1,
"test-metadata1"
"test-metadata1",
clusterUUIDCommitted.getOrDefault("cluster-uuid1", true)
);
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexMetadata indexMetadata1 = IndexMetadata.builder("index1")
Expand Down Expand Up @@ -1178,7 +1207,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> cluste
clusterUUIDsPointers.get("cluster-uuid2"),
randomAlphaOfLength(10),
uploadedIndexMetadataList2,
"test-metadata2"
"test-metadata2",
clusterUUIDCommitted.getOrDefault("cluster-uuid2", true)
);
IndexMetadata indexMetadata3 = IndexMetadata.builder("index1")
.settings(indexSettings)
Expand Down Expand Up @@ -1223,7 +1253,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> cluste
clusterUUIDsPointers.get("cluster-uuid3"),
randomAlphaOfLength(10),
uploadedIndexMetadataList3,
"test-metadata3"
"test-metadata3",
clusterUUIDCommitted.getOrDefault("cluster-uuid3", true)
);
mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3);
mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V1);
Expand Down Expand Up @@ -1251,7 +1282,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest(
String previousClusterUUID,
String stateUUID,
List<UploadedIndexMetadata> uploadedIndexMetadata,
String globalMetadataFileName
String globalMetadataFileName,
Boolean isUUIDCommitted
) {
return ClusterMetadataManifest.builder()
.indices(uploadedIndexMetadata)
Expand All @@ -1263,7 +1295,7 @@ private ClusterMetadataManifest generateClusterMetadataManifest(
.opensearchVersion(VersionUtils.randomOpenSearchVersion(random()))
.previousClusterUUID(previousClusterUUID)
.committed(true)
.clusterUUIDCommitted(true)
.clusterUUIDCommitted(isUUIDCommitted)
.globalMetadataFileName(globalMetadataFileName)
.codecVersion(ClusterMetadataManifest.CODEC_V1)
.build();
Expand Down

0 comments on commit 4c046be

Please sign in to comment.