-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
35 lines (22 loc) · 1.03 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import src.textprocessing as tp
def load_book_as_list(name_of_book: str) -> list:
text = []
with open(f"./books/{name_of_book}", 'r', encoding='utf-8') as file:
for line in file: text.extend(line.split())
return text
def jaccard_similarity(book_1: tuple, book_2: tuple) -> str:
first = tp.TextProcessing(*book_1)
second = tp.TextProcessing(*book_2)
result = tp.TextSimilarity(first_book=first, second_book=second)
return str(result)
def minhash_jaccard_similarity(book_1: tuple, book_2: tuple, hash_no: int) -> str:
first = tp.TextProcessing(*book_1)
second = tp.TextProcessing(*book_2)
mhs = tp.MinHashSimilarity(first_book=first, second_book=second, minhashes_no=hash_no)
return str(mhs)
if __name__ == '__main__':
book_name = 'test.txt'
with open(fr"./output.txt", 'w') as f:
book = (book_name, load_book_as_list(book_name))
f.write(jaccard_similarity(book_1=book, book_2=book))
f.write(minhash_jaccard_similarity(book_1=book, book_2=book, hash_no=200))