Skip to content

Commit

Permalink
Handle case with nested list of objects (#477)
Browse files Browse the repository at this point in the history
* Handle case with nested list of objects

Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* fix validateEmbeddingsFieldValues Method

Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* spotless formatting

Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* Onboard jenkins prod docker images on github actions (#483)

Signed-off-by: Peter Zhu <zhujiaxi@amazon.com>
Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* Update dependency org.json:json to v20231013 (#481)

Co-authored-by: mend-for-github-com[bot] <50673670+mend-for-github-com[bot]@users.noreply.github.com>
Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* [Backport main manually][bug fix] Fix async actions are left in neural_sparse query (#438) (#479)

* [bug fix] Fix async actions are left in neural_sparse query (#438)

* add serialization and deserialization

Signed-off-by: zhichao-aws <zhichaog@amazon.com>

* hash, equals. + UT

Signed-off-by: zhichao-aws <zhichaog@amazon.com>

* tidy

Signed-off-by: zhichao-aws <zhichaog@amazon.com>

* add test

Signed-off-by: zhichao-aws <zhichaog@amazon.com>

---------

Signed-off-by: zhichao-aws <zhichaog@amazon.com>
(cherry picked from commit 51e6c00)

* rm max_token_score

Signed-off-by: zhichao-aws <zhichaog@amazon.com>

* add changelog

Signed-off-by: zhichao-aws <zhichaog@amazon.com>

* tidy

Signed-off-by: zhichao-aws <zhichaog@amazon.com>

---------

Signed-off-by: zhichao-aws <zhichaog@amazon.com>
Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* Fixed exception for case when Hybrid query being wrapped into bool query (#490)

* Adding null check for case when hybrid query wrapped into bool query

Signed-off-by: Martin Gaievski <gaievski@amazon.com>
Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* Fixed Hybrid query for cases when it's wrapped into other compound queries (#498)

* Fixed nested field case

Signed-off-by: Martin Gaievski <gaievski@amazon.com>
Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* Added the github action to copy the attached issues label to PR. (#504)

Signed-off-by: Navneet Verma <navneev@amazon.com>
Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* Added support for jdk-21 (#500)

* Added support for jdk-21

Signed-off-by: Martin Gaievski <gaievski@amazon.com>
Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>

* Add unit tests + small fixes

Signed-off-by: krishy91 <crgkc.r@gmail.com>

* fix indentation

Signed-off-by: krishy91 <crgkc.r@gmail.com>

* remove unused code + add 2nd level nesting test

Signed-off-by: krishy91 <crgkc.r@gmail.com>

* add integration test for list of nested objects

Signed-off-by: krishy91 <crgkc.r@gmail.com>

---------

Signed-off-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>
Signed-off-by: Peter Zhu <zhujiaxi@amazon.com>
Signed-off-by: zhichao-aws <zhichaog@amazon.com>
Signed-off-by: Martin Gaievski <gaievski@amazon.com>
Signed-off-by: Navneet Verma <navneev@amazon.com>
Signed-off-by: krishy91 <crgkc.r@gmail.com>
Co-authored-by: Gopala-Krishna.Char <gopala-krishna.char@intrafind.de>
Co-authored-by: Peter Zhu <zhujiaxi@amazon.com>
Co-authored-by: mend-for-github-com[bot] <50673670+mend-for-github-com[bot]@users.noreply.github.com>
Co-authored-by: zhichao-aws <zhichaog@amazon.com>
Co-authored-by: Martin Gaievski <gaievski@amazon.com>
Co-authored-by: Navneet Verma <navneev@amazon.com>
  • Loading branch information
7 people committed Mar 1, 2024
1 parent b97dbe8 commit ea49d3c
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.Objects;
import java.util.function.BiConsumer;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -173,13 +174,28 @@ private void buildMapWithProcessorKeyAndOriginalValueForMapType(
if (processorKey == null || sourceAndMetadataMap == null) return;
if (processorKey instanceof Map) {
Map<String, Object> next = new LinkedHashMap<>();
for (Map.Entry<String, Object> nestedFieldMapEntry : ((Map<String, Object>) processorKey).entrySet()) {
buildMapWithProcessorKeyAndOriginalValueForMapType(
nestedFieldMapEntry.getKey(),
nestedFieldMapEntry.getValue(),
(Map<String, Object>) sourceAndMetadataMap.get(parentKey),
next
);
if (sourceAndMetadataMap.get(parentKey) instanceof Map) {
for (Map.Entry<String, Object> nestedFieldMapEntry : ((Map<String, Object>) processorKey).entrySet()) {
buildMapWithProcessorKeyAndOriginalValueForMapType(
nestedFieldMapEntry.getKey(),
nestedFieldMapEntry.getValue(),
(Map<String, Object>) sourceAndMetadataMap.get(parentKey),
next
);
}
} else if (sourceAndMetadataMap.get(parentKey) instanceof List) {
for (Map.Entry<String, Object> nestedFieldMapEntry : ((Map<String, Object>) processorKey).entrySet()) {
List<Map<String, Object>> list = (List<Map<String, Object>>) sourceAndMetadataMap.get(parentKey);
List<Object> listOfStrings = list.stream().map(x -> x.get(nestedFieldMapEntry.getKey())).collect(Collectors.toList());
Map<String, Object> map = new LinkedHashMap<>();
map.put(nestedFieldMapEntry.getKey(), listOfStrings);
buildMapWithProcessorKeyAndOriginalValueForMapType(
nestedFieldMapEntry.getKey(),
nestedFieldMapEntry.getValue(),
map,
next
);
}
}
treeRes.put(parentKey, next);
} else {
Expand Down Expand Up @@ -212,7 +228,7 @@ private void validateNestedTypeValue(String sourceKey, Object sourceValue, Suppl
if (maxDepth > MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING.get(environment.settings())) {
throw new IllegalArgumentException("map type field [" + sourceKey + "] reached max depth limit, cannot process it");
} else if ((List.class.isAssignableFrom(sourceValue.getClass()))) {
validateListTypeValue(sourceKey, sourceValue);
validateListTypeValue(sourceKey, sourceValue, maxDepthSupplier);
} else if (Map.class.isAssignableFrom(sourceValue.getClass())) {
((Map) sourceValue).values()
.stream()
Expand All @@ -226,9 +242,11 @@ private void validateNestedTypeValue(String sourceKey, Object sourceValue, Suppl
}

@SuppressWarnings({ "rawtypes" })
private void validateListTypeValue(String sourceKey, Object sourceValue) {
private void validateListTypeValue(String sourceKey, Object sourceValue, Supplier<Integer> maxDepthSupplier) {
for (Object value : (List) sourceValue) {
if (value == null) {
if (value instanceof Map) {
validateNestedTypeValue(sourceKey, value, () -> maxDepthSupplier.get() + 1);
} else if (value == null) {
throw new IllegalArgumentException("list type field [" + sourceKey + "] has null, cannot process it");
} else if (!(value instanceof String)) {
throw new IllegalArgumentException("list type field [" + sourceKey + "] has non string value, cannot process it");
Expand Down Expand Up @@ -275,13 +293,20 @@ private void putNLPResultToSourceMapForMapType(
if (processorKey == null || sourceAndMetadataMap == null || sourceValue == null) return;
if (sourceValue instanceof Map) {
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
putNLPResultToSourceMapForMapType(
inputNestedMapEntry.getKey(),
inputNestedMapEntry.getValue(),
results,
indexWrapper,
(Map<String, Object>) sourceAndMetadataMap.get(processorKey)
);
if (sourceAndMetadataMap.get(processorKey) instanceof List) {
// build nlp output for list of nested objects
for (Map<String, Object> nestedElement : (List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey)) {
nestedElement.put(inputNestedMapEntry.getKey(), results.get(indexWrapper.index++));
}
} else {
putNLPResultToSourceMapForMapType(
inputNestedMapEntry.getKey(),
inputNestedMapEntry.getValue(),
results,
indexWrapper,
(Map<String, Object>) sourceAndMetadataMap.get(processorKey)
);
}
}
} else if (sourceValue instanceof String) {
sourceAndMetadataMap.put(processorKey, results.get(indexWrapper.index++));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,15 @@ private void ingestDocument() throws Exception {
+ " \"favorites\": {\n"
+ " \"game\": \"overwatch\",\n"
+ " \"movie\": null\n"
+ " }\n"
+ " },\n"
+ " \"nested_passages\": [\n"
+ " {\n"
+ " \"text\": \"hello\"\n"
+ " },\n"
+ " {\n"
+ " \"text\": \"world\"\n"
+ " }\n"
+ " ]\n"
+ "}\n";
Response response = makeRequest(
client(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Arrays;
import java.util.function.BiConsumer;
import java.util.function.Supplier;

Expand Down Expand Up @@ -404,6 +405,35 @@ public void testBuildVectorOutput_withNestedMap_successful() {
assertNotNull(actionGamesKnn);
}

public void testBuildVectorOutput_withNestedList_successful() {
Map<String, Object> config = createNestedListConfiguration();
IngestDocument ingestDocument = createNestedListIngestDocument();
TextEmbeddingProcessor textEmbeddingProcessor = createInstanceWithNestedMapConfiguration(config);
Map<String, Object> knnMap = textEmbeddingProcessor.buildMapWithProcessorKeyAndOriginalValue(ingestDocument);
List<List<Float>> modelTensorList = createMockVectorResult();
textEmbeddingProcessor.buildNLPResult(knnMap, modelTensorList, ingestDocument.getSourceAndMetadata());
List<Map<String, Object>> nestedObj = (List<Map<String, Object>>) ingestDocument.getSourceAndMetadata().get("nestedField");
assertTrue(nestedObj.get(0).containsKey("vectorField"));
assertTrue(nestedObj.get(1).containsKey("vectorField"));
assertNotNull(nestedObj.get(0).get("vectorField"));
assertNotNull(nestedObj.get(1).get("vectorField"));
}

public void testBuildVectorOutput_withNestedList_Level2_successful() {
Map<String, Object> config = createNestedList2LevelConfiguration();
IngestDocument ingestDocument = create2LevelNestedListIngestDocument();
TextEmbeddingProcessor textEmbeddingProcessor = createInstanceWithNestedMapConfiguration(config);
Map<String, Object> knnMap = textEmbeddingProcessor.buildMapWithProcessorKeyAndOriginalValue(ingestDocument);
List<List<Float>> modelTensorList = createMockVectorResult();
textEmbeddingProcessor.buildNLPResult(knnMap, modelTensorList, ingestDocument.getSourceAndMetadata());
Map<String, Object> nestedLevel1 = (Map<String, Object>) ingestDocument.getSourceAndMetadata().get("nestedField");
List<Map<String, Object>> nestedObj = (List<Map<String, Object>>) nestedLevel1.get("nestedField");
assertTrue(nestedObj.get(0).containsKey("vectorField"));
assertTrue(nestedObj.get(1).containsKey("vectorField"));
assertNotNull(nestedObj.get(0).get("vectorField"));
assertNotNull(nestedObj.get(1).get("vectorField"));
}

public void test_updateDocument_appendVectorFieldsToDocument_successful() {
Map<String, Object> config = createPlainStringConfiguration();
IngestDocument ingestDocument = createPlainIngestDocument();
Expand Down Expand Up @@ -520,4 +550,44 @@ private IngestDocument createNestedMapIngestDocument() {
result.put("favorites", favorite);
return new IngestDocument(result, new HashMap<>());
}

private Map<String, Object> createNestedListConfiguration() {
Map<String, Object> nestedConfig = new HashMap<>();
nestedConfig.put("textField", "vectorField");
Map<String, Object> result = new HashMap<>();
result.put("nestedField", nestedConfig);
return result;
}

private Map<String, Object> createNestedList2LevelConfiguration() {
Map<String, Object> nestedConfig = new HashMap<>();
nestedConfig.put("textField", "vectorField");
Map<String, Object> nestConfigLevel1 = new HashMap<>();
nestConfigLevel1.put("nestedField", nestedConfig);
Map<String, Object> result = new HashMap<>();
result.put("nestedField", nestConfigLevel1);
return result;
}

private IngestDocument createNestedListIngestDocument() {
HashMap<String, Object> nestedObj1 = new HashMap<>();
nestedObj1.put("textField", "This is a text field");
HashMap<String, Object> nestedObj2 = new HashMap<>();
nestedObj2.put("textField", "This is another text field");
HashMap<String, Object> nestedList = new HashMap<>();
nestedList.put("nestedField", Arrays.asList(nestedObj1, nestedObj2));
return new IngestDocument(nestedList, new HashMap<>());
}

private IngestDocument create2LevelNestedListIngestDocument() {
HashMap<String, Object> nestedObj1 = new HashMap<>();
nestedObj1.put("textField", "This is a text field");
HashMap<String, Object> nestedObj2 = new HashMap<>();
nestedObj2.put("textField", "This is another text field");
HashMap<String, Object> nestedList = new HashMap<>();
nestedList.put("nestedField", Arrays.asList(nestedObj1, nestedObj2));
HashMap<String, Object> nestedList1 = new HashMap<>();
nestedList1.put("nestedField", nestedList);
return new IngestDocument(nestedList1, new HashMap<>());
}
}
21 changes: 21 additions & 0 deletions src/test/resources/processor/IndexMappings.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,27 @@
},
"passage_text": {
"type": "text"
},
"nested_passages": {
"type": "nested",
"properties": {
"text": {
"type": "text"
},
"embedding": {
"type": "knn_vector",
"dimension": 768,
"method": {
"name": "hnsw",
"space_type": "l2",
"engine": "lucene",
"parameters": {
"ef_construction": 128,
"m": 24
}
}
}
}
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/test/resources/processor/PipelineConfiguration.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
"favorites": {
"game": "game_knn",
"movie": "movie_knn"
},
"nested_passages": {
"text": "embedding"
}
}
}
Expand Down

0 comments on commit ea49d3c

Please sign in to comment.