Skip to content

Commit

Permalink
Rewrite MarcXmlParser and add MarcXmlParserTest (JabRef#10144)
Browse files Browse the repository at this point in the history
  • Loading branch information
Luggas4you committed Aug 5, 2023
1 parent 76b0745 commit faa6288
Show file tree
Hide file tree
Showing 16 changed files with 1,156 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.jabref.model.entry.Date;
import org.jabref.model.entry.LinkedFile;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.strings.StringUtil;

import org.slf4j.Logger;
Expand Down Expand Up @@ -99,6 +100,9 @@ private BibEntry parseEntry(Element element) {
putIsbn(bibEntry, datafield);
} else if ("100".equals(tag) || "700".equals(tag) || "710".equals(tag)) {
putPersonalName(bibEntry, datafield); // Author, Editor, Publisher
} else if ("111".equals(tag)) {
// FixMe: Conference Information also in Subtitle (245) & Author (710)
putConferenceDetail(bibEntry, datafield);
} else if ("245".equals(tag)) {
putTitle(bibEntry, datafield);
} else if ("250".equals(tag)) {
Expand All @@ -109,10 +113,14 @@ private BibEntry parseEntry(Element element) {
putPhysicalDescription(bibEntry, datafield);
} else if ("490".equals(tag) || "830".equals(tag)) {
putSeries(bibEntry, datafield);
} else if ("502".equals(tag)) {
putThesisDescription(bibEntry, datafield); // Master's thesis, PhD thesis, Thesis
} else if ("520".equals(tag)) {
putSummary(bibEntry, datafield);
} else if ("653".equals(tag)) {
putKeywords(bibEntry, datafield);
} else if ("773".equals(tag)) {
putIssue(bibEntry, datafield);
} else if ("856".equals(tag)) {
putElectronicLocation(bibEntry, datafield);
} else if ("966".equals(tag)) {
Expand All @@ -125,16 +133,6 @@ private BibEntry parseEntry(Element element) {
LOGGER.debug("Unparsed tag: {}", tag);
}
}

/*
* ToDo:
* pages
* volume and number correct
* series and journals stored in different tags
* thesis
* proceedings
*/

return bibEntry;
}

Expand Down Expand Up @@ -200,6 +198,15 @@ private void putPersonalName(BibEntry bibEntry, Element datafield) {
}
}

private void putConferenceDetail(BibEntry bibEntry, Element datafield) {
String conference = getSubfield("a", datafield);
bibEntry.setType(StandardEntryType.Proceedings);

if (StringUtil.isNotBlank(conference)) {
bibEntry.setField(StandardField.EVENTTITLE, conference);
}
}

private void putTitle(BibEntry bibEntry, Element datafield) {
String title = getSubfield("a", datafield);
String subtitle = getSubfield("b", datafield);
Expand Down Expand Up @@ -251,7 +258,7 @@ private void putPublication(BibEntry bibEntry, Element datafield) {
String date = getSubfield("c", datafield);

if (StringUtil.isNotBlank(place)) {
bibEntry.setField(StandardField.LOCATION, place);
bibEntry.setField(StandardField.ADDRESS, place);
}

if (StringUtil.isNotBlank(name)) {
Expand All @@ -274,8 +281,8 @@ private void putPublication(BibEntry bibEntry, Element datafield) {
private void putPhysicalDescription(BibEntry bibEntry, Element datafield) {
String pagetotal = getSubfield("a", datafield);

if (StringUtil.isNotBlank(pagetotal) && (pagetotal.contains("pages") || pagetotal.contains("p."))) {
pagetotal = pagetotal.replaceAll(" p\\.?$", "");
if (StringUtil.isNotBlank(pagetotal) && (pagetotal.contains("pages") || pagetotal.contains("p.") || pagetotal.contains("S") || pagetotal.contains("Seiten"))) {
pagetotal = pagetotal.replaceAll(".*?(\\d+)(?:\\s*Seiten|\\s*S|\\s*pages|\\s*p).*", "$1");
bibEntry.setField(StandardField.PAGETOTAL, pagetotal);
}
}
Expand All @@ -301,6 +308,20 @@ private void putSeries(BibEntry bibEntry, Element datafield) {
}
}

private void putThesisDescription(BibEntry bibEntry, Element datafield) {
String thesisDegree = getSubfield("b", datafield);
String school = getSubfield("c", datafield);
bibEntry.setType(StandardEntryType.MastersThesis);

if (StringUtil.isNotBlank(school)) {
bibEntry.setField(StandardField.SCHOOL, school);
}

if ("Dissertation".equals(thesisDegree)) {
bibEntry.setType(StandardEntryType.PhdThesis);
}
}

private void putSummary(BibEntry bibEntry, Element datafield) {
String summary = getSubfield("a", datafield);

Expand All @@ -327,6 +348,31 @@ private void putKeywords(BibEntry bibEntry, Element datafield) {
}
}

private void putIssue(BibEntry bibEntry, Element datafield) {
bibEntry.setType(StandardEntryType.Article);

List<String> issues = getSubfields("g", datafield);

for (String issue : issues) {
String[] parts = issue.split(":");
if (parts.length == 2) {
String key = parts[0].trim();
String value = parts[1].trim();

if (StringUtil.isNotBlank(value)) {
switch (key) {
case "number" -> bibEntry.setField(StandardField.NUMBER, value);
case "year" -> bibEntry.setField(StandardField.YEAR, value);
case "pages" -> bibEntry.setField(StandardField.PAGES, value);
case "volume" -> bibEntry.setField(StandardField.VOLUME, value);
case "day" -> bibEntry.setField(StandardField.DAY, value);
case "month" -> bibEntry.setField(StandardField.MONTH, value);
}
}
}
}
}

private void putDoi(BibEntry bibEntry, Element datafield) {
String ind1 = datafield.getAttribute("ind1");
String resource = getSubfield("u", datafield);
Expand Down Expand Up @@ -406,9 +452,16 @@ private String getSubfield(String a, Element datafield) {
return subfield.getTextContent();
}
}

return null;
}

private List<String> getSubfields(String a, Element datafield) {
List<Element> subfields = getChildren("subfield", datafield);

return subfields.stream().filter(field -> field.getAttribute("code").equals(a)).map(Node::getTextContent).toList();
}

private Element getChild(String name, Element e) {
if (e == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.jabref.logic.importer.fileformat;

import java.io.IOException;
import java.io.InputStream;
import java.util.function.Predicate;
import java.util.stream.Stream;

import org.jabref.logic.bibtex.BibEntryAssert;
import org.jabref.logic.util.io.FileUtil;
import org.jabref.model.entry.BibEntry;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertNotNull;

public class MarcXmlParserTest {

private static final String FILE_ENDING = ".xml";

private static Stream<String> fileNames() throws IOException {
Predicate<String> fileName = name -> name.startsWith("MarcXMLParserTest") && name.endsWith(FILE_ENDING);
return ImporterTestEngine.getTestFiles(fileName).stream();
}

private void doTest(String xmlName, String bibName) throws Exception {
try (InputStream is = MarcXmlParserTest.class.getResourceAsStream(xmlName)) {
MarcXmlParser parser = new MarcXmlParser();
java.util.List<BibEntry> entries = parser.parseEntries(is);
assertNotNull(entries);
BibEntryAssert.assertEquals(MarcXmlParserTest.class, bibName, entries.get(0));
}
}

@ParameterizedTest
@MethodSource("fileNames")
public void testImportEntries(String fileName) throws Exception {
String bibName = FileUtil.getBaseName(fileName) + ".bib";
doTest(fileName, bibName);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
@Proceedings{,
title = {VI International Congress on Experimental Mechanics},
year = {1988},
address = {London},
editor = {Adams, Frank},
isbn = {1851662529},
publisher = {{Elsevier [u.a.]}},
eventtitle = {International Congress on Experimental Mechanics},
subtitle = {[proceedings of the VI International Congress on Experimental Mechanics ... held on June 5 - 10, 1988 in Portland, Oregon, U.S.A.]},
titleaddon = {ed. by Frank Adams ...},
}

@Comment{jabref-meta: databaseType:bibtex;}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
<zs:searchRetrieveResponse>
<zs:version>1.1</zs:version>
<zs:numberOfRecords>1</zs:numberOfRecords>
<zs:records>
<zs:record>
<zs:recordSchema>info:srw/schema/1/marcxml-v1.1</zs:recordSchema>
<zs:recordPacking>xml</zs:recordPacking>
<zs:recordData>
<record>
<leader>01080nam a2200265 ca4500</leader>
<controlfield tag="001">BV049029404</controlfield>
<controlfield tag="003">DE-604</controlfield>
<controlfield tag="005">00000000000000.0</controlfield>
<controlfield tag="007">t</controlfield>
<controlfield tag="008">230630m1988uuuu |||| 10||| eng d</controlfield>
<datafield tag="020" ind1=" " ind2=" ">
<subfield code="a">1851662529</subfield>
<subfield code="9">1-85166-252-9</subfield>
</datafield>
<datafield tag="020" ind1=" " ind2=" ">
<subfield code="a">0912053208</subfield>
<subfield code="9">0-912053-20-8</subfield>
</datafield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">DE-604</subfield>
<subfield code="b">ger</subfield>
</datafield>
<datafield tag="111" ind1="2" ind2=" ">
<subfield code="a">International Congress on Experimental Mechanics</subfield>
<subfield code="n">6</subfield>
<subfield code="d">1988</subfield>
<subfield code="c">Portland, Or.</subfield>
<subfield code="j">Verfasser</subfield>
<subfield code="0">(DE-588)5022525-X</subfield>
<subfield code="4">aut</subfield>
</datafield>
<datafield tag="245" ind1="1" ind2="0">
<subfield code="a">VI International Congress on Experimental Mechanics</subfield>
<subfield code="b">[proceedings of the VI International Congress on Experimental Mechanics ... held on June 5 - 10, 1988 in Portland, Oregon, U.S.A.]</subfield>
<subfield code="c">ed. by Frank Adams ...</subfield>
</datafield>
<datafield tag="246" ind1="1" ind2="3">
<subfield code="a">Sixth International Congress on Experimental Mechanics</subfield>
</datafield>
<datafield tag="264" ind1=" " ind2="1">
<subfield code="a">London</subfield>
<subfield code="b">Elsevier [u.a.]</subfield>
<subfield code="c">1988</subfield>
</datafield>
<datafield tag="655" ind1=" " ind2="7">
<subfield code="0">(DE-588)1071861417</subfield>
<subfield code="a">Konferenzschrift</subfield>
<subfield code="2">gnd-content</subfield>
</datafield>
<datafield tag="700" ind1="1" ind2=" ">
<subfield code="a">Adams, Frank</subfield>
<subfield code="4">edt</subfield>
</datafield>
<datafield tag="710" ind1="2" ind2=" ">
<subfield code="a">Society for Experimental Mechanics (USA)</subfield>
<subfield code="e">Sonstige</subfield>
<subfield code="0">(DE-588)308357-3</subfield>
<subfield code="4">oth</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-599)GBV037240986</subfield>
</datafield>
<datafield tag="041" ind1="0" ind2=" ">
<subfield code="a">eng</subfield>
</datafield>
<datafield tag="336" ind1=" " ind2=" ">
<subfield code="b">txt</subfield>
<subfield code="2">rdacontent</subfield>
</datafield>
<datafield tag="337" ind1=" " ind2=" ">
<subfield code="b">n</subfield>
<subfield code="2">rdamedia</subfield>
</datafield>
<datafield tag="338" ind1=" " ind2=" ">
<subfield code="b">nc</subfield>
<subfield code="2">rdacarrier</subfield>
</datafield>
</record>
</zs:recordData>
<zs:recordPosition>1</zs:recordPosition>
</zs:record>
</zs:records>
</zs:searchRetrieveResponse>
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@Article{,
author = {Bello, Emmanuel G.},
title = {Article 22 of the African Charter on Human and Peoples' Rights},
year = {1992},
pages = {447-473},
titleaddon = {Emmanuel G. Bello},
}

@Comment{jabref-meta: databaseType:bibtex;}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<zs:searchRetrieveResponse>
<zs:version>1.1</zs:version>
<zs:numberOfRecords>1</zs:numberOfRecords>
<zs:records>
<zs:record>
<zs:recordSchema>info:srw/schema/1/marcxml-v1.1</zs:recordSchema>
<zs:recordPacking>xml</zs:recordPacking>
<zs:recordData>
<record>
<leader>00864naa a2200253 c 4500</leader>
<controlfield tag="001">BV048976418</controlfield>
<controlfield tag="003">DE-604</controlfield>
<controlfield tag="005">00000000000000.0</controlfield>
<controlfield tag="007">t</controlfield>
<controlfield tag="008">230526s1992 |||| 00||| eng d</controlfield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">DE-604</subfield>
<subfield code="b">ger</subfield>
<subfield code="e">rda</subfield>
</datafield>
<datafield tag="100" ind1="1" ind2=" ">
<subfield code="a">Bello, Emmanuel G.</subfield>
<subfield code="d">1939-</subfield>
<subfield code="e">Verfasser</subfield>
<subfield code="0">(DE-588)136157114</subfield>
<subfield code="4">aut</subfield>
</datafield>
<datafield tag="245" ind1="1" ind2="0">
<subfield code="a">Article 22 of the African Charter on Human and Peoples' Rights</subfield>
<subfield code="c">Emmanuel G. Bello</subfield>
</datafield>
<datafield tag="264" ind1=" " ind2="1">
<subfield code="c">1992</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(OCoLC)1381297713</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-599)BVBBV048976418</subfield>
</datafield>
<datafield tag="041" ind1="0" ind2=" ">
<subfield code="a">eng</subfield>
</datafield>
<datafield tag="049" ind1=" " ind2=" ">
<subfield code="a">DE-521</subfield>
</datafield>
<datafield tag="336" ind1=" " ind2=" ">
<subfield code="b">txt</subfield>
<subfield code="2">rdacontent</subfield>
</datafield>
<datafield tag="337" ind1=" " ind2=" ">
<subfield code="b">n</subfield>
<subfield code="2">rdamedia</subfield>
</datafield>
<datafield tag="338" ind1=" " ind2=" ">
<subfield code="b">nc</subfield>
<subfield code="2">rdacarrier</subfield>
</datafield>
<datafield tag="773" ind1="1" ind2="8">
<subfield code="g">pages:447-473</subfield>
</datafield>
<datafield tag="773" ind1="0" ind2="8">
<subfield code="t">Essays in honour of Judge Taslim Olawale Elias; 1. Contemporary international law and human rights / edited by Emmanuel G. Bello ...</subfield>
<subfield code="d">Dordrecht [u.a.], 1992</subfield>
<subfield code="g">Seite 447-473</subfield>
<subfield code="w">(DE-604)BV013223576</subfield>
</datafield>
<datafield tag="941" ind1=" " ind2=" ">
<subfield code="s">447-473</subfield>
</datafield>
</record>
</zs:recordData>
<zs:recordPosition>1</zs:recordPosition>
</zs:record>
</zs:records>
</zs:searchRetrieveResponse
Loading

0 comments on commit faa6288

Please sign in to comment.