wannaphong · wannaphong · Sep 23, 2023 · Sep 23, 2023
diff --git a/docs/conf.py b/docs/conf.py
@@ -19,14 +19,14 @@
 
 # -- Project information -----------------------------------------------------
 
-project = u'LaoNLP'
-copyright = u'2020 - 2021, Wannaphong Phatthiyaphaibun'
-author = u'Wannaphong Phatthiyaphaibun'
+project = 'LaoNLP'
+copyright = '2020 - 2021, Wannaphong Phatthiyaphaibun'
+author = 'Wannaphong Phatthiyaphaibun'
 
 # The short X.Y version
-version = u''
+version = ''
 # The full version, including alpha/beta/rc tags
-release = u''
+release = ''
 
 
 # -- General configuration ---------------------------------------------------
@@ -133,8 +133,8 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'SphinxGitHubActionTest.tex', u'Sphinx GitHub Action Test Documentation',
-     u'Sean Zheng', 'manual'),
+    (master_doc, 'SphinxGitHubActionTest.tex', 'Sphinx GitHub Action Test Documentation',
+     'Sean Zheng', 'manual'),
 ]
 
 
@@ -143,7 +143,7 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    (master_doc, 'sphinxgithubactiontest', u'Sphinx GitHub Action Test Documentation',
+    (master_doc, 'sphinxgithubactiontest', 'Sphinx GitHub Action Test Documentation',
      [author], 1)
 ]
 
@@ -154,7 +154,7 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'SphinxGitHubActionTest', u'Sphinx GitHub Action Test Documentation',
+    (master_doc, 'SphinxGitHubActionTest', 'Sphinx GitHub Action Test Documentation',
      author, 'SphinxGitHubActionTest', 'One line description of project.',
      'Miscellaneous'),
 ]
@@ -185,4 +185,4 @@
     'special-members': '__init__',
     'undoc-members': True,
     'exclude-members': '__weakref__'
-} 
+}
diff --git a/laonlp/__init__.py b/laonlp/__init__.py
@@ -14,33 +14,33 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from laonlp.tokenize import *
 from laonlp.corpus import *
-from laonlp.transliterate import *
 from laonlp.tag import pos_tag
+from laonlp.tokenize import *
+from laonlp.transliterate import *
 from laonlp.util import *
 
 TONE_MARKS = "່້"+"໊໋"
 CONSONANTS = "ກຂຄງຈສຊຍດຕຖທນບປຜຝພຟມຢຣລວຫອຮ"
 VOWELS_COMBINING = "ັ"+"ິີ"+"ຶືຸ"+"ູົໍ"
 VOWELS = "ະັາ"+"ຳິີ"+"ຶືຸ"+"ູົຼ"+"ຽເແ"+"ໂໃໄ"+"ໍ"
-NUMBERS = "໑໒໓໔໕໖໗໘໙໐" # 1234567890
+NUMBERS = "໑໒໓໔໕໖໗໘໙໐"  # 1234567890
 CANCELLATION_MARK = "\u0ECC"
 # This is Obsolete consonants.
 # You can read at https://en.wikipedia.org/wiki/Lao_script
 lao_obsolete_consonants_mapping_thai = {
-    "ຆ":"ฆ", # PALI GHA
-    "ຉ":"ฉ", # PALI CHA
-    "ຌ":"ฌ", # PALI JHA
-    "ຎ":"ญ", # PALI NYA
-    "ຏ":"ฏ", # PALI TTA
-    "ຐ":"ฐ", # PALI TTHA
-    "ຑ":"ฑ", # PALI DDA
-    "ຒ":"ฒ", # PALI DDHA
-    "ຓ":"ณ", # PALI NNA
-    "ຘ":"ธ", # PALI DHA
-    "ຠ":"ภ", # PALI BHA
-    "ຨ":"ศ", # SANSKRIT SHA
-    "ຩ":"ษ", # SANSKRIT SSA
-    "ຬ":"ฬ", # PALI LLA
-}
+    "ຆ": "ฆ",  # PALI GHA
+    "ຉ": "ฉ",  # PALI CHA
+    "ຌ": "ฌ",  # PALI JHA
+    "ຎ": "ญ",  # PALI NYA
+    "ຏ": "ฏ",  # PALI TTA
+    "ຐ": "ฐ",  # PALI TTHA
+    "ຑ": "ฑ",  # PALI DDA
+    "ຒ": "ฒ",  # PALI DDHA
+    "ຓ": "ณ",  # PALI NNA
+    "ຘ": "ธ",  # PALI DHA
+    "ຠ": "ภ",  # PALI BHA
+    "ຨ": "ศ",  # SANSKRIT SHA
+    "ຩ": "ษ",  # SANSKRIT SSA
+    "ຬ": "ฬ",  # PALI LLA
+}
diff --git a/laonlp/corpus/__init__.py b/laonlp/corpus/__init__.py
@@ -30,4 +30,4 @@
     "lao_wiktionarydict",
     "get_path_corpus",
     "lao_stopwords"
-]
+]
diff --git a/laonlp/corpus/core.py b/laonlp/corpus/core.py
@@ -19,4 +19,4 @@
 
 
 def get_path_corpus(file):
-    return os.path.join(laonlp_path, "corpus", file)
+    return os.path.join(laonlp_path, "corpus", file)
diff --git a/laonlp/corpus/lao_words.py b/laonlp/corpus/lao_words.py
@@ -25,7 +25,7 @@ def lao_dictionary() -> List[str]:
     """
     path = get_path_corpus("Lao-Dictionary.txt")
     with open(path, "r", encoding="utf-8-sig") as f:
-        return [i.strip() for i in f.readlines() if i[0]!="#"]
+        return [i.strip() for i in f.readlines() if i[0] != "#"]
 
 
 def lao_spellcheckdict() -> List[str]:
@@ -34,13 +34,13 @@ def lao_spellcheckdict() -> List[str]:
     """
     path = get_path_corpus("lo_spellcheck_dict.txt")
     with open(path, "r", encoding="utf-8-sig") as f:
-        return [i.strip() for i in f.readlines() if i[0]!="#"]
+        return [i.strip() for i in f.readlines() if i[0] != "#"]
 
 
 def lao_wannaphongdict() -> List[str]:
     path = get_path_corpus("lao-wannaphong.txt")
     with open(path, "r", encoding="utf-8-sig") as f:
-        return [i.strip() for i in f.readlines() if i[0]!="#"]
+        return [i.strip() for i in f.readlines() if i[0] != "#"]
 
 
 def lao_wiktionarydict() -> List[str]:
@@ -49,7 +49,7 @@ def lao_wiktionarydict() -> List[str]:
     """
     path = get_path_corpus("wiktionary-20210720.txt")
     with open(path, "r", encoding="utf-8-sig") as f:
-        return [i.strip() for i in f.readlines() if i[0]!="#"]
+        return [i.strip() for i in f.readlines() if i[0] != "#"]
 
 
 def lao_words() -> List[str]:
@@ -68,5 +68,5 @@ def lao_stopwords() -> FrozenSet[str]:
     path = get_path_corpus("stopwords_lao.txt")
     with open(path, "r", encoding="utf-8-sig") as fh:
         lines = fh.read().splitlines()
-    lines = [line.strip() for line in lines if line.startswith("#") == False]
+    lines = [line.strip() for line in lines if line.startswith("#") is False]
     return frozenset(filter(None, lines))
diff --git a/laonlp/corpus/mopt_dict.py b/laonlp/corpus/mopt_dict.py
@@ -17,38 +17,37 @@
 import csv
 from collections import defaultdict
 
-from laonlp.corpus import laonlp_path
 from laonlp.corpus.core import get_path_corpus
 corpus_path = get_path_corpus("lao-eng-dictionary.csv")
-list_data=[]
-with open(corpus_path,encoding="utf-8-sig") as csvfile:
+list_data = []
+with open(corpus_path, encoding="utf-8-sig") as csvfile:
     reader = csv.DictReader(csvfile)
     for row in reader:
         list_data.append(row)
 
 
-def get_lao_eng()->dict:
+def get_lao_eng() -> dict:
     _w = defaultdict(list)
     for i in list_data:
         _w[i['LaoWord']].append(i['English'])
     return _w
 
 
-def get_eng_lao()->dict:
+def get_eng_lao() -> dict:
     _w = defaultdict(list)
     for i in list_data:
         _w[i['English']].append(i['LaoWord'])
     return _w
 
 
-def get_pronunciation()->dict:
+def get_pronunciation() -> dict:
     _w = defaultdict(list)
     for i in list_data:
         _w[i['LaoWord']].append(i['Pronunciation'])
     return _w
 
 
-def get_type()->dict:
+def get_type() -> dict:
     _w = defaultdict(list)
     for i in list_data:
         _w[i['LaoWord']].append(i['Type'])

diff --git a/laonlp/translate/__init__.py b/laonlp/translate/__init__.py
@@ -19,7 +19,8 @@
 ]
 from laonlp.translate.mopt_dict import dictionary
 
-def word_dictionary(word: str, src: str, target: str, name: str = "mopt_laos")->list:
+
+def word_dictionary(word: str, src: str, target: str, name: str = "mopt_laos") -> list:
     """
     Word dictionary
 
@@ -29,4 +30,4 @@ def word_dictionary(word: str, src: str, target: str, name: str = "mopt_laos")->
     :return: return word
     :rtype: str
     """
-    return dictionary(word, src, target)
+    return dictionary(word, src, target)
diff --git a/laonlp/translate/mopt_dict.py b/laonlp/translate/mopt_dict.py
@@ -14,12 +14,10 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-from typing import List
-
 from laonlp.corpus import mopt_dict
 
 
-def dictionary(word: str, src: str, target: str)->list:
+def dictionary(word: str, src: str, target: str) -> list:
     if src == "lao" and target == "eng":
         _temp = mopt_dict.get_lao_eng()
         if word not in list(_temp.keys()):
@@ -31,4 +29,4 @@ def dictionary(word: str, src: str, target: str)->list:
             return None
         return _temp[word]
     else:
-        return word
+        return word
diff --git a/laonlp/util/__init__.py b/laonlp/util/__init__.py
@@ -25,4 +25,4 @@
 )
 from laonlp.util.lao import (
     remove_tone_mark
-)
+)
diff --git a/laonlp/util/digitconv.py b/laonlp/util/digitconv.py
@@ -29,10 +29,10 @@
     "ສູນ"
 ]
 _dict_lao_arabic = {
-    i:j for i,j in zip(list(NUMBERS), list(_arabic_numerals))
+    i: j for i, j in zip(list(NUMBERS), list(_arabic_numerals))
 }
 _dict_arabic_lao = {
-    i:j for i,j in zip(list(_arabic_numerals), list(NUMBERS))
+    i: j for i, j in zip(list(_arabic_numerals), list(NUMBERS))
 }
 _lao_arabic_table = str.maketrans(_dict_lao_arabic)
 _arabic_lao_table = str.maketrans(_dict_arabic_lao)
@@ -48,6 +48,7 @@ def lao_digit_to_arabic_digit(text: str) -> str:
     """
     return text.translate(_lao_arabic_table)
 
+
 def arabic_digit_to_lao_digit(text: str) -> str:
     """
     Arabic digit to Lao digit
@@ -58,6 +59,7 @@ def arabic_digit_to_lao_digit(text: str) -> str:
     """
     return text.translate(_arabic_lao_table)
 
+
 def number2lao(numbers: int):
     """
     Numbers to La opronunciation

diff --git a/laonlp/util/lao.py b/laonlp/util/lao.py
@@ -15,7 +15,7 @@
 limitations under the License.
 """
 TONE_MARKS = "່້"+"໊໋"
-_tone_mark = str.maketrans({i:None for i in TONE_MARKS})
+_tone_mark = str.maketrans({i: None for i in TONE_MARKS})
 
 
 def remove_tone_mark(text: str) -> str:
@@ -26,4 +26,4 @@ def remove_tone_mark(text: str) -> str:
     :return: returns a lao text without tone mark.
     :rtype: str
     """
-    return text.translate(_tone_mark)
+    return text.translate(_tone_mark)
diff --git a/laonlp/word_vector/word2vec.py b/laonlp/word_vector/word2vec.py
@@ -25,11 +25,12 @@
 except ModuleNotFoundError:
     raise ModuleNotFoundError('Word vector functionalities require huggingface_hub which is not currently installed. Please try installing the package via "pip install huggingface_hub".')
 
+
 class Word2Vec:
     """
     Word2Vec
     """
-    def __init__(self, model: str, corpus: str="oscar"):
+    def __init__(self, model: str, corpus: str = "oscar"):
         """
         :param str model: model name (cbow or skip-gram)
         :param str corpus: corpus name (oscar)
@@ -39,27 +40,27 @@ def __init__(self, model: str, corpus: str="oscar"):
         if self.corpus not in ["oscar"]:
             raise NotImplementedError("LaoNLP doesn't support %s corpus." % self.corpus)
         self.load_model(self.model)
-    
+
     def load_model(self, model: str):
         """
         Load Word2Vec model
 
         :param str model: model name (cbow or skip-gram)
         """
-        if model=="cbow":
+        if model == "cbow":
             self.model_path = hf_hub_download(repo_id="wannaphong/Lao-Word-Embedding", filename="lao_oscar_cbow_model.bin")
-        elif model=="skip-gram":
+        elif model == "skip-gram":
             self.model_path = hf_hub_download(repo_id="wannaphong/Lao-Word-Embedding", filename="lao_oscar_skipgram_model.bin")
         else:
             raise NotImplementedError("LaoNLP doesn't support %s model." % model)
         self.model_wav2vec = gensim.models.keyedvectors.KeyedVectors.load_word2vec_format(self.model_path, binary=True, encoding='utf-8-sig', unicode_errors='ignore')
-    
+
     def get_model(self):
         """
         Get gensim.models.keyedvectors.KeyedVectors class
         """
         return self.model_wav2vec
-    
+
     def doesnt_match(self, words: List[str]) -> str:
         """
         Get donesn't match
@@ -70,12 +71,12 @@ def doesnt_match(self, words: List[str]) -> str:
         :rtype: str
         """
         return self.model_wav2vec.doesnt_match(words)
-    
+
     def most_similar_cosmul(self, positive: List[str], negative: List[str]):
         return self.model_wav2vec.most_similar_cosmul(
             positive=positive, negative=negative
         )
-    
+
     def similarity(self, word1: str, word2: str) -> float:
         """
         Find similarity between word pairs.
@@ -86,4 +87,4 @@ def similarity(self, word1: str, word2: str) -> float:
         :return: return similarity
         :rtype: float
         """
-        return self.model_wav2vec.similarity(word1, word2)
+        return self.model_wav2vec.similarity(word1, word2)
diff --git a/tests/test_translate.py b/tests/test_translate.py
@@ -6,4 +6,4 @@
 
 class TestTagPackage(unittest.TestCase):
     def test_word_dictionary(self):
-        self.assertIsNotNone(word_dictionary("cat","en","lao"))
+        self.assertIsNotNone(word_dictionary("cat", "en", "lao"))
diff --git a/tests/test_util.py b/tests/test_util.py
@@ -10,13 +10,15 @@ def test_lao_digit_to_arabic_digit(self):
             lao_digit_to_arabic_digit("໑໒໓໔໕໖໗໘໙໐"),
             '1234567890'
         )
+
     def test_arabic_digit_to_lao_digit(self):
         self.assertEqual(
             arabic_digit_to_lao_digit('1234567890'),
             "໑໒໓໔໕໖໗໘໙໐"
         )
+
     def test_remove_tone_mark(self):
         self.assertEqual(
             remove_tone_mark("ຜູ້"),
             'ຜູ'
-        )
+        )