Skip to content

Commit

Permalink
Enhance tika document parsing tests (#13618)
Browse files Browse the repository at this point in the history
* Update tika document parsing bwc tests.

Signed-off-by: Carroll <carrofin@amazon.com>

* Skip sample tika files which do not parse consistently.

Signed-off-by: Carroll <carrofin@amazon.com>

* Formatting for spotlessJavaCheck.

Signed-off-by: Carroll <carrofin@amazon.com>

* Use fixed locale for consistent tika parsing.

Signed-off-by: Carroll <carrofin@amazon.com>

* Move sha1 map to .checksums file.

Signed-off-by: Carroll <carrofin@amazon.com>

* For locale dependant files do not verify contents with hash.

Signed-off-by: Carroll <carrofin@amazon.com>

* Remove strict checksum validation for additional locale dependant files.

Signed-off-by: Carroll <carrofin@amazon.com>

---------

Signed-off-by: Carroll <carrofin@amazon.com>
  • Loading branch information
finnegancarroll committed May 16, 2024
1 parent da3ab92 commit f217270
Show file tree
Hide file tree
Showing 2 changed files with 248 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,54 +32,67 @@

package org.opensearch.ingest.attachment;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.tika.metadata.Metadata;
import org.opensearch.common.io.PathUtils;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.test.OpenSearchTestCase;

import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;

/**
* Evil test-coverage cheat, we parse a bunch of docs from tika
* so that we have a nice grab-bag variety, and assert some content
* comes back and no exception.
* Parse sample tika documents and assert the contents has not changed according to previously recorded checksums.
* Uncaught changes to tika parsing could potentially pose bwc issues.
* Note: In some cases tika will access a user's locale to inform the parsing of a file.
* The checksums of these files are left empty, and we only validate that parsed content is not null.
*/
@SuppressFileSystems("ExtrasFS") // don't try to parse extraN
public class TikaDocTests extends OpenSearchTestCase {

/** some test files from tika test suite, zipped up */
/** some test files from the apache tika unit test suite with accompanying sha1 checksums */
static final String TIKA_FILES = "/org/opensearch/ingest/attachment/test/tika-files/";
static final String TIKA_CHECKSUMS = "/org/opensearch/ingest/attachment/test/.checksums";

public void testFiles() throws Exception {
Path tmp = createTempDir();
logger.debug("unzipping all tika sample files");
try (DirectoryStream<Path> stream = Files.newDirectoryStream(PathUtils.get(getClass().getResource(TIKA_FILES).toURI()))) {
for (Path doc : stream) {
String filename = doc.getFileName().toString();
TestUtil.unzip(getClass().getResourceAsStream(TIKA_FILES + filename), tmp);
}
}
public void testParseSamples() throws Exception {
String checksumJson = Files.readString(PathUtils.get(getClass().getResource(TIKA_CHECKSUMS).toURI()));
Map<String, Object> checksums = XContentHelper.convertToMap(JsonXContent.jsonXContent, checksumJson, false);
DirectoryStream<Path> stream = Files.newDirectoryStream(unzipToTemp(TIKA_FILES));

try (DirectoryStream<Path> stream = Files.newDirectoryStream(tmp)) {
for (Path doc : stream) {
logger.debug("parsing: {}", doc);
assertParseable(doc);
for (Path doc : stream) {
String parsedContent = tryParse(doc);
assertNotNull(parsedContent);
assertFalse(parsedContent.isEmpty());

String check = checksums.get(doc.getFileName().toString()).toString();
if (!check.isEmpty()) {
assertEquals(check, DigestUtils.sha1Hex(parsedContent));
}
}

stream.close();
}

void assertParseable(Path fileName) throws Exception {
try {
byte bytes[] = Files.readAllBytes(fileName);
String parsedContent = TikaImpl.parse(bytes, new Metadata(), -1);
assertNotNull(parsedContent);
assertFalse(parsedContent.isEmpty());
logger.debug("extracted content: {}", parsedContent);
} catch (Exception e) {
throw new RuntimeException("parsing of filename: " + fileName.getFileName() + " failed", e);
private Path unzipToTemp(String zipDir) throws Exception {
Path tmp = createTempDir();
DirectoryStream<Path> stream = Files.newDirectoryStream(PathUtils.get(getClass().getResource(zipDir).toURI()));

for (Path doc : stream) {
String filename = doc.getFileName().toString();
TestUtil.unzip(getClass().getResourceAsStream(zipDir + filename), tmp);
}

stream.close();
return tmp;
}

private String tryParse(Path doc) throws Exception {
byte bytes[] = Files.readAllBytes(doc);
return TikaImpl.parse(bytes, new Metadata(), -1);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
{
"testWORD_tabular_symbol.doc": "c708d7ef841f7e1748436b8ef5670d0b2de1a227",
"testWORD_1img.docx": "367e2ade13ca3c19bcd8a323e21d51d407e017ac",
"testMasterFooter.odp": "bcc59df70699c739423a50e362c722b81ae76498",
"testTXTNonASCIIUTF8.txt": "1ef514431ca8d838f11e99f8e4a0637730b77aa0",
"EmbeddedOutlook.docx": "c544a6765c19ba11b0bf3edb55c79e1bd8565c6e",
"testWORD_override_list_numbering.docx": "4e892319b921322916225def763f451e4bbb4e16",
"testTextBoxes.key": "b01581d5bd2483ce649a1a1406136359f4b93167",
"testPPT_masterText.pptx": "9fee8337b76dc3e196f4554dcde22b9dd1c3b3e8",
"testComment.docx": "333b9009686f27265b4729e8172b3e62048ec7ec",
"testRTFInvalidUnicode.rtf": "32b3e3d8e5c5a1b66cb15fc964b9341bea7048f4",
"testEXCEL_headers_footers.xlsx": "9e8d2a700fc431fe29030e86e08162fc8ecf2c1a",
"testWORD6.doc": "1479de589755c7212815445799c44dab69d4587c",
"testPagesHeadersFootersFootnotes.pages": "99d434be7de4902dc70700aa9c2a31624583c1f1",
"testPDF_no_extract_yes_accessibility_owner_empty.pdf": "6eb693dac68fece3bf3cd1aa9880ea9b23fc927c",
"testOpenOffice2.odt": "564b3e1999a53073a04142e01b663757a6e7fb08",
"testTables.key": "250cff75db7fc3c8b95b2cbd3f37308826e0c93d",
"testDOCX_Thumbnail.docx": "fce6a43271bc242e2bb8341afa659ed166e08050",
"testWORD_3imgs.docx": "292ca6fa41d32b462e66061e89adb19423721975",
"testPDF_acroform3.pdf": "dcf6588cb5e41701b168606ea6bfbadecdcd3bc9",
"testWORD_missing_ooxml_bean1.docx": "c3058f2513fecc0a6d76d3ecf55676f236b085ff",
"testPDFTwoTextBoxes.pdf": "4adf324ce030076b1755fdb3a6cce676ee325ae4",
"testRTFUnicodeGothic.rtf": "f9932470ff686b0c217ea94ed5d4f2fd85f7998e",
"headers.mbox": "75ec25789fe870b6d25365e4ea73d731fc274847",
"testPPT_embeded.ppt": "",
"testXML3.xml": "804d4812408eb324ae8483d2140b648ec871dd2a",
"testOptionalHyphen.doc": "10f9ca38cc2985e94967aa2c454bfe40aff76976",
"testComment.doc": "66e57653d5d08478556ca640408b172b65855cc7",
"testEXCEL_headers_footers.xls": "18977c66fc8bcb8c44de3063b69b65a3de9c3f25",
"testWORD_embedded_rtf.doc": "cc2d289acfe3d1068a2649b7fa0c06c50bb6ceda",
"testEXCEL_custom_props.xlsx": "6b72ae08362a204b37dbba0a30b4134ae3e7918f",
"testOptionalHyphen.docx": "5b8ffc0df1691a8fed7d63aa9b256e9e02e36d71",
"testPPT_various.pptx": "d149de9af8071141a6ba6e2cd4ef5f6d9431a826",
"testWORD_closingSmartQInHyperLink.doc": "9859f378c603b70bf0d44a281169ae5b16a21878",
"test_embedded_zip.pptx": "d19406edcec09440d066877c451ceba60abc3483",
"testRTFUmlautSpaces.rtf": "155b39879c5b5fbad22fd650be37ae7f91489eb2",
"protectedFile.xlsx": "ee08eeaf05c35c960243f831c3a974d9ee07aa28",
"Doc1_ole.doc": "fb63220506ab666f1fe87b0608e1447fd4fd3489",
"testEXCEL_embeded.xlsx": "",
"EmbeddedDocument.docx": "",
"testODFwithOOo3.odt": "3815d6fb7f5829db882ea8ebd664f252711e6e60",
"testPagesHeadersFootersRomanUpper.pages": "85b3cd545ba6c33e5d44b844a6afea8cb6eaec0b",
"testPPT_comment.ppt": "88fd667fd0292785395a8d0d229304aa91110556",
"testPPT_2imgs.pptx": "66eda11ad472918153100dad8ee5be0f1f8e2e04",
"testPagesHeadersFootersAlphaUpper.pages": "56bef0d1eaedfd7599aae29031d2eeb0e3fe4688",
"testWORD_text_box.docx": "e01f7b05c6aac3449b9a699c3e4d2e62ff3368a3",
"testWORD_missing_text.docx": "3814332884a090b6d1020bff58d0531486710c45",
"testComment.pdf": "60e181061a00454c2e622bd37a9878234c13231d",
"testPDF_no_extract_no_accessibility_owner_empty.pdf": "6eb693dac68fece3bf3cd1aa9880ea9b23fc927c",
"test_embedded_package.rtf": "cd90adb3f777e68aa0288fd23e8f4fbce260a763",
"testPDF_bom.pdf": "6eb693dac68fece3bf3cd1aa9880ea9b23fc927c",
"testOptionalHyphen.ppt": "7e016e42860bd408054bb8653fef39b2756119d9",
"testHTML_utf8.html": "3ba828044754772e4c9df5f9a2213beaa75842ef",
"testPPT_comment.pptx": "25fab588194dabd5902fd2ef880ee9542d036776",
"testRTFWithCurlyBraces.rtf": "019cab63b73ff89d094823cf50c0a721bec08ee2",
"testFooter.ods": "846e1d0415b23fa27631b536b0cf566abbf8fcc1",
"testPPT.ppt": "933ee556884b1d9e28b801daa0d77bbaa4f4be62",
"testEXCEL-formats.xls": "",
"testPPT_masterFooter.pptx": "29bb97006b3608b7db6ff72b94d20157878d94dd",
"testWORD_header_hyperlink.doc": "914bbec0730c54948ad307ea3e375ef0c100abf1",
"testRTFHyperlink.rtf": "2b2ffb1997aa495fbab1af490d134051de168c97",
"testExtraSpaces.pdf": "b5575400309b01c1050a927d8d1ecf8761062abc",
"testRTFWindowsCodepage1250.rtf": "7ba418843f401634f97d21c844c2c4093b7194fb",
"testRTFTableCellSeparation2.rtf": "62782ca40ff0ed6c3ba90f8055ee724b44af203f",
"testPagesHeadersFootersRomanLower.pages": "2410fc803907001eb39c201ad4184b243e271c6d",
"headerPic.docx": "c704bb648feac7975dff1024a5f762325be7cbc2",
"testHTMLNoisyMetaEncoding_4.html": "630e14e3495a78580c4e26fa3bbe3123ccf4fd8a",
"testRTFBoldItalic.rtf": "0475d224078682cf3f9f3f4cbc14a63456c5a0d8",
"test-outlook.msg": "1f202fc11a873e305d5b4d4607409f3f734065ec",
"testRTFVarious.rtf": "bf6ea9cf57886e680c5e6743a66a12b950a09083",
"testXHTML.html": "c6da900f81c1c550518e65d579d3dd62dd7c5c0c",
"EmbeddedPDF.docx": "454476bdf4a968189a6f53e75c146382bf58a434",
"testXML.xml": "e1615e9b31be58f7af9ad963e5a112efa5cdaffa",
"testWORD_no_format.docx": "9a3f5d8a4c8c0f077cc615bcfc554dc87d5926aa",
"testPPT_masterText.ppt": "f5ff5e2d45ccb180cf371ed99b7dfeb2a93539b3",
"testPDF_PDFEncodedStringInXMP.pdf": "78fd59d394f72d28a9908739fa562099978dafa1",
"testPPT_custom_props.pptx": "72152d28afbc23a50cc71fa37d1dce9ef03ca72d",
"testRTFListOverride.rtf": "f8c61d8a66afdaa07f3740e859497818bfc2ca01",
"testEXCEL_1img.xls": "",
"testWORD_1img.doc": "0826d299a7770e93603f5667d89dccb7b74d904c",
"testNPEOpenDocument.odt": "4210b973c80084c58463ec637fa43e911f77d6fe",
"testRTFWord2010CzechCharacters.rtf": "9443011aac32434240ab8dbff360c970fc1c7074",
"testPDF_Version.8.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",
"testPPT.ppsx": "71333ef84f7825d8ad6aba2ba993d04b4bab41c6",
"testPPT_autodate.pptx": "50467dbb37d1c74b8b37fe93eddf6f9e87d21bf3",
"testWordArt.pptx": "3566bbee790704b3654fe78319957f9e0cddb6d9",
"NullHeader.docx": "18430c968ba29173b52610efdaa723424b3c4d79",
"testRTFWordPadCzechCharacters.rtf": "5dbb58452a3507c384008662f8fce90063f12189",
"resume.html": "fbfb9d8264f6eebd79847fe7a7f1b81edd4a027d",
"testPagesLayout.pages": "5db1ab91c93e6183d0af8513f62c7b87964704af",
"testOptionalHyphen.pptx": "c2977eefe7d2cad8c671f550d7883185ec65591b",
"testWORD_numbered_list.docx": "07194c58165993468e66bc4eba4f5bd89d5bee09",
"testEXCEL_1img.xlsx": "",
"testPDFTripleLangTitle.pdf": "6eb693dac68fece3bf3cd1aa9880ea9b23fc927c",
"protect.xlsx": "ee08eeaf05c35c960243f831c3a974d9ee07aa28",
"testWORD_bold_character_runs2.docx": "f10e562d8825ec2e17e0d9f58646f8084a658cfa",
"testXLSX_Thumbnail.xlsx": "020bf155ae157661c11727c54e6694cf9cd2c0d3",
"testWORD_embedded_pdf.docx": "d8adb797aaaac92afd8dd9b499bd197347f15688",
"testOptionalHyphen.rtf": "2f77b61bab5b4502b4ddd5018b454be157091d07",
"testEXCEL-charts.xls": "",
"testWORD_override_list_numbering.doc": "60e47a3e71ba08af20af96131d61740a1f0bafa3",
"testPDF_twoAuthors.pdf": "c5f0296cc21f9ae99ceb649b561c55f99d7d9452",
"testPDF_Version.10.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",
"testHTMLNoisyMetaEncoding_2.html": "630e14e3495a78580c4e26fa3bbe3123ccf4fd8a",
"testFooter.odt": "cd5d0fcbcf48d6f005d087c47d00e84f39bcc321",
"testPPT.pptm": "71333ef84f7825d8ad6aba2ba993d04b4bab41c6",
"testPPT_various.ppt": "399e27a9893284f106dc44f15b5e636454db681e",
"testRTFListMicrosoftWord.rtf": "0303eb3e2f30530621a7a407847b759a3b21467e",
"testWORD_bold_character_runs2.doc": "f10e562d8825ec2e17e0d9f58646f8084a658cfa",
"boilerplate-whitespace.html": "a9372bc75d7d84cbcbb0bce68fcaed73ad8ef52c",
"testEXCEL_95.xls": "20d9b9b0f3aecd28607516b4b837c8bab3524b6c",
"testPPT_embedded_two_slides.pptx": "",
"testPDF_bookmarks.pdf": "5fc486c443511452db4f1aa6530714c6aa49c831",
"test_recursive_embedded.docx": "afc32b07ce07ad273e5b3d1a43390a9d2b6dd0a9",
"testEXCEL-formats.xlsx": "",
"testPPT_masterText2.pptx": "2b01eab5d0349e3cfe791b28c70c2dbf4efc884d",
"test.doc": "774be3106edbb6d80be36dbb548d62401dcfa0fe",
"test_recursive_embedded_npe.docx": "afc32b07ce07ad273e5b3d1a43390a9d2b6dd0a9",
"testPPT_embedded2.ppt": "80e106b3fc68107e7f9579cff04e3b15bdfc557a",
"testWORD_custom_props.docx": "e7a737a5237a6aa9c6b3fc677eb8fa65c30d6dfe",
"testPDF_Version.4.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",
"testBinControlWord.rtf": "ef858fbb7584ea7f92ffed8d0a08c1cc35ffee07",
"testWORD_null_style.docx": "0be9dcfb83423c78a06af514ec21e4e7770ec48e",
"test-outlook2003.msg": "bb3c35eb7e95d657d7977c1d3d52862734f9f329",
"testPDFVarious.pdf": "c66bbbacb10dd27430f7d0bed9518e75793cedae",
"testHTMLNoisyMetaEncoding_3.html": "630e14e3495a78580c4e26fa3bbe3123ccf4fd8a",
"testRTFCorruptListOverride.rtf": "116a782d02a7f25010a15cbbb189bf98e6b89855",
"testEXCEL_custom_props.xls": "b5584d9b13ab1566ce539238dc75e7eb3449ba7f",
"testPDF_Version.7.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",
"testPDFEmbeddingAndEmbedded.docx": "e7b648adb15cd16cdd84437c2b9524a8eeb213e4",
"testHTMLNoisyMetaEncoding_1.html": "630e14e3495a78580c4e26fa3bbe3123ccf4fd8a",
"testWORD_3imgs.doc": "818aa8c6c44dd78c49100c3c38e95abdf3812981",
"testRTFEmbeddedLink.rtf": "2720ffb5ff3a6bbb2c5c1cb43fb4922362ed788a",
"testKeynote.key": "11387b59fc6339bb73653fcbb26d387521b98ec9",
"testPDF.pdf": "5a377554685367764eaf73d093408ace323fcec7",
"protectedSheets.xlsx": "",
"testWORD.doc": "cdd41377e699287cbbe17fbb1498cfe5814dde23",
"testComment.xlsx": "d4be580bb97c1c90be379281179c7932b37a18c0",
"testPDFPackage.pdf": "75d6fa216b4e2880a65ced55d17ca2b599d2606c",
"testWORD_embeded.doc": "",
"testHTML.html": "6548b16c5ea33e907577615ce60ca4876a3936ef",
"testEXCEL_5.xls": "a174f098333c659d331317641d4d1d9d83055288",
"pictures.ppt": "95bbfdbf2f60f74371285c337d3445d0acd59a9b",
"testPPT_masterText2.ppt": "f5ff5e2d45ccb180cf371ed99b7dfeb2a93539b3",
"testPDF-custommetadata.pdf": "a84b914655db55574e6002b6f37209ecd4c3d462",
"testWORD_embeded.docx": "",
"testStyles.odt": "c25dd05633e3aab7132d2f5608126e2b4b03848f",
"testPDF_multiFormatEmbFiles.pdf": "2103b2c30b44d5bb3aa790ab04a6741a10ea235a",
"testXML2.xml": "a8c85a327716fad93faa4eb0f993057597d6f471",
"testPagesComments.pages": "cbb45131cf45b9c454e754a07af3ae927b1a69cc",
"testEXCEL_4.xls": "8d5e6156222151faaccb079d46ddb5393dd25771",
"testWORD_no_format.doc": "88feaf03fe58ee5cc667916c6a54cbd5d605cc1c",
"testPages.pages": "288e6db2f39604e372a2095257509c78dba22cbb",
"footnotes.docx": "33b01b73a12f9e14efbcc340890b11ee332dca8e",
"testWORD_bold_character_runs.doc": "f10e562d8825ec2e17e0d9f58646f8084a658cfa",
"testWORD_custom_props.doc": "e7a737a5237a6aa9c6b3fc677eb8fa65c30d6dfe",
"testPDF_Version.11.x.PDFA-1b.pdf": "71853c6197a6a7f222db0f1978c7cb232b87c5ee",
"testAnnotations.pdf": "5f599e7916198540e1b52c3e472a525f50fd45f6",
"tika434.html": "7d74122631f52f003a48018cc376026ccd8d984e",
"testPagesHeadersFootersAlphaLower.pages": "fc1d766908134ff4689fa63fa3e91c3e9b08d975",
"testRTFRegularImages.rtf": "756b1db45cb05357ceaf9c8efcf0b76e3913e190",
"testRTFUmlautSpaces2.rtf": "1fcd029357062241d74d789e93477c101ff24e3f",
"testWORD_numbered_list.doc": "e06656dd9b79ac970f3cd065fa8b630a4981556f",
"testPPT_autodate.ppt": "05b93967ea0248ad263b2f24586e125df353fd3d",
"testBulletPoints.key": "92242d67c3dbc1b22aac3f98e47061d09e7719f9",
"testMasterSlideTable.key": "1d61e2fa3c3f3615500c7f72f62971391b9e9a2f",
"testWORD_various.doc": "8cbdf1a4e0d78471eb90403612c4e92866acf0cb",
"testEXCEL_textbox.xlsx": "1e81121e91e58a74d838e414ae0fc0055a4b4100",
"big-preamble.html": "a9d759b46b6c6c1857d0d89c3a75ee2f3ace70c9",
"testWORD.docx": "f72140bef19475e950e56084d1ab1cb926697b19",
"testComment.rtf": "f6351d0f1f20c4ee0fff70adca6abbc6e638610e",
"testRTFUnicodeUCNControlWordCharacterDoubling.rtf": "3e6f2f38682e38ffc96a476ca51bec2291a27fa7",
"testPDF_Version.5.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",
"testPPTX_Thumbnail.pptx": "6aa019154289317c7b7832fe46556e6d61cd0a9f",
"testRTFTableCellSeparation.rtf": "5647290a3197c1855fad10201dc7be60ea7b0e42",
"testRTFControls.rtf": "aee6afb80e8b09cf49f056020c037f70c2757e49",
"testEXCEL.xls": "",
"testRTFJapanese.rtf": "08976f9a7d6d3a155cad84d7fa23295cb972a17a",
"testPageNumber.pdf": "96b03d2cc6782eba653af28228045964e68422b5",
"testOptionalHyphen.pdf": "12edd450ea76ea4e79f80ebd3442999ec2180dbc",
"testPDFFileEmbInAnnotation.pdf": "97a6e5781bbaa6aea040546d797c4916f9d90c86",
"testFontAfterBufferedText.rtf": "d1c8757b3ed91f2d7795234405c43005868affa3",
"testPPT_masterFooter.ppt": "8c9104385820c2631ddda20814231808fac03d4d",
"testWORD_various.docx": "189df989e80afb09281901aefc458c6630a8530b",
"testComment.ppt": "21842dd9cb8a7d4af0f102543c192861c9789705",
"testPopupAnnotation.pdf": "1717b1d16c0a4b9ff5790cac90fc8e0fba170a35",
"testWORD_bold_character_runs.docx": "f10e562d8825ec2e17e0d9f58646f8084a658cfa",
"testOverlappingText.pdf": "726da7d6c184512ed8d44af2a5085d65523c4572",
"testRTF.rtf": "91e830ceba556741116c9e83b0c69a0d6c5c9304",
"testRTFIgnoredControlWord.rtf": "1eb6a2f2fd32b1bb4227c0c02a35cb6027d9ec8c",
"testComment.xls": "4de962f16452159ce302fc4a412b06a06cf9a0f6",
"testPPT.ppsm": "71333ef84f7825d8ad6aba2ba993d04b4bab41c6",
"boilerplate.html": "b3558f02c3179e4aeeb6057594d87bda79964e7b",
"testEXCEL_embeded.xls": "",
"testEXCEL.xlsx": "",
"testPPT_2imgs.ppt": "9a68072ffcf171389e78cf8bc018c4b568a6202d",
"testComment.pptx": "6ae6052f469b8f901fd4fd8bc70f8e267255a58e",
"testPDF_Version.6.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",
"testPPT.pptx": "71333ef84f7825d8ad6aba2ba993d04b4bab41c6",
"testPPT_custom_props.ppt": "edf196acc12701accc7be5dfe63e053436db45e6",
"testPPT_embeded.pptx": "",
"testRTFListLibreOffice.rtf": "4c38d9e2f0a8c9a4c2cc8d2a52db9591ab759abe",
"testPDF_Version.9.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",
"testRTFHexEscapeInsideWord.rtf": "6cffda07e774c55b5465d8134a0bdcb8c30f3386",
"testRTFNewlines.rtf": "2375ca14e2b0d8f7ff6bbda5191544b3ee7c09fb",
"testRTF-ms932.rtf": "5f9db1b83bf8e9c4c6abb065adaeb151307d33f2",
"test_TIKA-1251.doc": "5a9394c34274964055fdd9272b4f7dc314b99ecf",
"test_list_override.rtf": "9fe8b4a36c5222fe7ed2e9b54e2330aec8fa9423"
}

0 comments on commit f217270

Please sign in to comment.