Skip to content

Commit

Permalink
issue #12 adopted the suggestion by yim1990 with a small change, so t…
Browse files Browse the repository at this point in the history
…hat the keyword emit is lowercased as well
  • Loading branch information
robert-bor committed Sep 22, 2015
1 parent e2c5334 commit 76ae822
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/main/java/org/ahocorasick/trie/Trie.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ private void addKeyword(String keyword) {
}
State currentState = this.rootState;
for (Character character : keyword.toCharArray()) {
if (trieConfig.isCaseInsensitive()) {
character = Character.toLowerCase(character);
}
currentState = currentState.addState(character);
}
currentState.addEmit(keyword);
currentState.addEmit(trieConfig.isCaseInsensitive() ? keyword.toLowerCase() : keyword);
}

public Collection<Token> tokenize(String text) {
Expand Down
19 changes: 18 additions & 1 deletion src/test/java/org/ahocorasick/trie/TrieTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,24 @@ public void ushersTest() {
checkEmit(iterator.next(), 2, 5, "hers");
}

@Test
@Test
public void ushersTestWithCapitalKeywords() {
Trie trie = Trie.builder()
.caseInsensitive()
.addKeyword("HERS")
.addKeyword("HIS")
.addKeyword("SHE")
.addKeyword("HE")
.build();
Collection<Emit> emits = trie.parseText("ushers");
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<Emit> iterator = emits.iterator();
checkEmit(iterator.next(), 2, 3, "he");
checkEmit(iterator.next(), 1, 3, "she");
checkEmit(iterator.next(), 2, 5, "hers");
}

@Test
public void ushersTestFirstMatch() {
Trie trie = Trie.builder()
.addKeyword("hers")
Expand Down

0 comments on commit 76ae822

Please sign in to comment.