Skip to content

Commit bbb5854

Browse files
authored
Merge pull request #1267 from PyThaiNLP/copilot/add-type-hints-to-submodules-one-more-time
Add type hints to modules in extra test suite
2 parents a22ff19 + c2edfb3 commit bbb5854

File tree

10 files changed

+33
-33
lines changed

10 files changed

+33
-33
lines changed

pythainlp/augment/lm/wangchanberta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,4 @@ def augment(self, sentence: str, num_replace_tokens: int = 3) -> list[str]:
7474
'ช้างมีทั้งหมด 50 ตัว บนหัว']
7575
"""
7676
sent2 = self.generate(sentence, num_replace_tokens)
77-
return sent2
77+
return sent2 # type: ignore[no-any-return]

pythainlp/augment/word2vec/bpemb_wv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def tokenizer(self, text: str) -> list[str]:
2424
""":param str text: Thai text
2525
:rtype: List[str]
2626
"""
27-
return self.bpemb_temp.encode(text)
27+
return self.bpemb_temp.encode(text) # type: ignore[no-any-return]
2828

2929
def load_w2v(self):
3030
"""Load BPEmb model

pythainlp/benchmarks/word_tokenization.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,11 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict:
157157
c_pos_pred = c_pos_pred[c_pos_pred < ref_sample_arr.shape[0]]
158158
c_neg_pred = c_neg_pred[c_neg_pred < ref_sample_arr.shape[0]]
159159

160-
c_tp = np.sum(ref_sample_arr[c_pos_pred] == 1)
161-
c_fp = np.sum(ref_sample_arr[c_pos_pred] == 0)
160+
c_tp: np.intp = np.sum(ref_sample_arr[c_pos_pred] == 1)
161+
c_fp: np.intp = np.sum(ref_sample_arr[c_pos_pred] == 0)
162162

163-
c_tn = np.sum(ref_sample_arr[c_neg_pred] == 0)
164-
c_fn = np.sum(ref_sample_arr[c_neg_pred] == 1)
163+
c_tn: np.intp = np.sum(ref_sample_arr[c_neg_pred] == 0)
164+
c_fn: np.intp = np.sum(ref_sample_arr[c_neg_pred] == 1)
165165

166166
# Compute word-level statistics
167167

@@ -174,7 +174,7 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict:
174174
word_boundaries, ss_boundaries
175175
)
176176

177-
correctly_tokenised_words = np.sum(tokenization_indicators)
177+
correctly_tokenised_words: np.intp = np.sum(tokenization_indicators)
178178

179179
tokenization_indicators_str = list(map(str, tokenization_indicators))
180180

@@ -206,14 +206,14 @@ def _binary_representation(txt: str, verbose: bool = False) -> np.ndarray:
206206
:param bool verbose: for debugging purposes
207207
208208
:return: {0, 1} sequence
209-
:rtype: str
209+
:rtype: np.ndarray
210210
"""
211211
chars = np.array(list(txt))
212212

213213
boundary = np.argwhere(chars == SEPARATOR).reshape(-1)
214214
boundary = boundary - np.array(range(boundary.shape[0]))
215215

216-
bin_rept = np.zeros(len(txt) - boundary.shape[0])
216+
bin_rept: np.ndarray = np.zeros(len(txt) - boundary.shape[0])
217217
bin_rept[list(boundary) + [0]] = 1
218218

219219
sample_wo_seps = list(txt.replace(SEPARATOR, ""))

pythainlp/corpus/wordnet.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def synsets(word: str, pos: Optional[str] = None, lang: str = "tha") -> list[wor
7474
>>> synsets("แรง", pos="a", lang="tha")
7575
[Synset('hard.s.10'), Synset('strong.s.02')]
7676
"""
77-
return wordnet.synsets(lemma=word, pos=pos, lang=lang)
77+
return wordnet.synsets(lemma=word, pos=pos, lang=lang) # type: ignore[no-any-return]
7878

7979

8080
def synset(name_synsets: str) -> wordnet.Synset:
@@ -140,7 +140,7 @@ def all_lemma_names(pos: Optional[str] = None, lang: str = "tha") -> list[str]:
140140
>>> len(all_lemma_names(pos="a"))
141141
5277
142142
"""
143-
return wordnet.all_lemma_names(pos=pos, lang=lang)
143+
return wordnet.all_lemma_names(pos=pos, lang=lang) # type: ignore[no-any-return]
144144

145145

146146
def all_synsets(pos: Optional[str] = None) -> Iterable[wordnet.Synset]:
@@ -170,7 +170,7 @@ def all_synsets(pos: Optional[str] = None) -> Iterable[wordnet.Synset]:
170170
>>> next(generator)
171171
Synset('unable.a.01')
172172
"""
173-
return wordnet.all_synsets(pos=pos)
173+
return wordnet.all_synsets(pos=pos) # type: ignore[no-any-return]
174174

175175

176176
def langs() -> list[str]:
@@ -188,7 +188,7 @@ def langs() -> list[str]:
188188
'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha',
189189
'zsm']
190190
"""
191-
return wordnet.langs()
191+
return wordnet.langs() # type: ignore[no-any-return]
192192

193193

194194
def lemmas(word: str, pos: Optional[str] = None, lang: str = "tha") -> list[wordnet.Lemma]:
@@ -231,7 +231,7 @@ def lemmas(word: str, pos: Optional[str] = None, lang: str = "tha") -> list[word
231231
>>> lemmas("ม้วน", pos="n")
232232
[Lemma('roll.n.11.ม้วน')]
233233
"""
234-
return wordnet.lemmas(word, pos=pos, lang=lang)
234+
return wordnet.lemmas(word, pos=pos, lang=lang) # type: ignore[no-any-return]
235235

236236

237237
def lemma(name_synsets: str) -> wordnet.Lemma:
@@ -323,7 +323,7 @@ def path_similarity(synsets1: wordnet.Synset, synsets2: wordnet.Synset) -> float
323323
>>> path_similarity(obj, cat)
324324
0.08333333333333333
325325
"""
326-
return wordnet.path_similarity(synsets1, synsets2)
326+
return wordnet.path_similarity(synsets1, synsets2) # type: ignore[no-any-return]
327327

328328

329329
def lch_similarity(synsets1: wordnet.Synset, synsets2: wordnet.Synset) -> float:
@@ -360,7 +360,7 @@ def lch_similarity(synsets1: wordnet.Synset, synsets2: wordnet.Synset) -> float:
360360
>>> lch_similarity(obj, cat)
361361
1.1526795099383855
362362
"""
363-
return wordnet.lch_similarity(synsets1, synsets2)
363+
return wordnet.lch_similarity(synsets1, synsets2) # type: ignore[no-any-return]
364364

365365

366366
def wup_similarity(synsets1: wordnet.Synset, synsets2: wordnet.Synset) -> float:
@@ -391,7 +391,7 @@ def wup_similarity(synsets1: wordnet.Synset, synsets2: wordnet.Synset) -> float:
391391
>>> wup_similarity(obj, cat)
392392
0.35294117647058826
393393
"""
394-
return wordnet.wup_similarity(synsets1, synsets2)
394+
return wordnet.wup_similarity(synsets1, synsets2) # type: ignore[no-any-return]
395395

396396

397397
def morphy(form: str, pos: Optional[str] = None) -> str:
@@ -421,7 +421,7 @@ def morphy(form: str, pos: Optional[str] = None) -> str:
421421
>>> morphy("calculated")
422422
'calculate'
423423
"""
424-
return wordnet.morphy(form, pos=None)
424+
return wordnet.morphy(form, pos=None) # type: ignore[no-any-return]
425425

426426

427427
def custom_lemmas(tab_file, lang: str) -> None:
@@ -432,4 +432,4 @@ def custom_lemmas(tab_file, lang: str) -> None:
432432
:param tab_file: Tab file as a file or file-like object
433433
:param str lang: abbreviation of language (i.e. *eng*, *tha*).
434434
"""
435-
return wordnet.custom_lemmas(tab_file, lang)
435+
return wordnet.custom_lemmas(tab_file, lang) # type: ignore[no-any-return]

pythainlp/spell/phunspell.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ def spell(text: str) -> list[str]:
2727

2828

2929
def correct(text: str) -> str:
30-
return list(pspell.suggest(text))[0]
30+
return list(pspell.suggest(text))[0] # type: ignore[no-any-return]

pythainlp/spell/tltk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@
2121

2222

2323
def spell(text: str) -> list[str]:
24-
return spell_candidates(text)
24+
return spell_candidates(text) # type: ignore[no-any-return]

pythainlp/tag/tltk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
def pos_tag(words: list[str], corpus: str = "tnc") -> list[tuple[str, str]]:
2121
if corpus != "tnc":
2222
raise ValueError(f"tltk not support {0} corpus.")
23-
return nlp.pos_tag_wordlist(words)
23+
return nlp.pos_tag_wordlist(words) # type: ignore[no-any-return]
2424

2525

2626
def _post_process(text: str) -> str:

pythainlp/transliterate/ipa.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717

1818

1919
def transliterate(text: str) -> str:
20-
return _EPI_THA.transliterate(text)
20+
return _EPI_THA.transliterate(text) # type: ignore[no-any-return]
2121

2222

2323
def trans_list(text: str) -> list[str]:
24-
return _EPI_THA.trans_list(text)
24+
return _EPI_THA.trans_list(text) # type: ignore[no-any-return]
2525

2626

2727
def xsampa_list(text: str) -> list[str]:
28-
return _EPI_THA.xsampa_list(text)
28+
return _EPI_THA.xsampa_list(text) # type: ignore[no-any-return]

pythainlp/transliterate/tltk.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,18 @@ def romanize(text: str) -> str:
2121
# Replace ฅ with ค to avoid KeyError in tltk (out-of-vocabulary issue)
2222
text = text.replace("ฅ", "ค")
2323
_temp = th2roman(text)
24-
return _temp[: _temp.rfind(" <s/>")].replace("<s/>", "")
24+
return _temp[: _temp.rfind(" <s/>")].replace("<s/>", "") # type: ignore[no-any-return]
2525

2626

2727
def tltk_g2p(text: str) -> str:
2828
# Replace ฅ with ค to avoid KeyError in tltk (out-of-vocabulary issue)
2929
text = text.replace("ฅ", "ค")
3030
_temp = g2p(text).split("<tr/>")[1].replace("|<s/>", "").replace("|", " ")
31-
return _temp.replace("<s/>", "")
31+
return _temp.replace("<s/>", "") # type: ignore[no-any-return]
3232

3333

3434
def tltk_ipa(text: str) -> str:
3535
# Replace ฅ with ค to avoid KeyError in tltk (out-of-vocabulary issue)
3636
text = text.replace("ฅ", "ค")
3737
_temp = th2ipa(text)
38-
return _temp[: _temp.rfind(" <s/>")].replace("<s/>", "")
38+
return _temp[: _temp.rfind(" <s/>")].replace("<s/>", "") # type: ignore[no-any-return]

pythainlp/word_vector/core.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def doesnt_match(self, words: list[str]) -> str:
108108
>>> wv.doesnt_match(words)
109109
'เรือ'
110110
"""
111-
return self.model.doesnt_match(words)
111+
return self.model.doesnt_match(words) # type: ignore[no-any-return]
112112

113113
def most_similar_cosmul(
114114
self, positive: list[str], negative: list[str]
@@ -209,7 +209,7 @@ def most_similar_cosmul(
209209
>>> wv.most_similar_cosmul(list_positive, list_negative)
210210
KeyError: "word 'เมนูอาหารไทย' not in vocabulary"
211211
"""
212-
return self.model.most_similar_cosmul(
212+
return self.model.most_similar_cosmul( # type: ignore[no-any-return]
213213
positive=positive, negative=negative
214214
)
215215

@@ -249,7 +249,7 @@ def similarity(self, word1: str, word2: str) -> float:
249249
0.04300258
250250
251251
"""
252-
return self.model.similarity(word1, word2)
252+
return self.model.similarity(word1, word2) # type: ignore[no-any-return]
253253

254254
def sentence_vectorizer(self, text: str, use_mean: bool = True) -> ndarray:
255255
"""Converts a Thai sentence into a vector.
@@ -302,7 +302,7 @@ def sentence_vectorizer(self, text: str, use_mean: bool = True) -> ndarray:
302302
len_words = len(words)
303303

304304
if not len_words:
305-
return vec
305+
return vec # type: ignore[no-any-return]
306306

307307
for word in words:
308308
if word == " " and self.model_name == "thai2fit_wv":
@@ -316,4 +316,4 @@ def sentence_vectorizer(self, text: str, use_mean: bool = True) -> ndarray:
316316
if use_mean:
317317
vec /= len_words
318318

319-
return vec
319+
return vec # type: ignore[no-any-return]

0 commit comments

Comments
 (0)