PyThaiNLP
diff --git a/‎build_tools/analysis/output/type_hint_analysis.json‎
Lines changed: 285 additions & 38 deletions b/‎build_tools/analysis/output/type_hint_analysis.json‎
Lines changed: 285 additions & 38 deletions
diff --git a/‎pythainlp/augment/wordnet.py‎
Lines changed: 3 additions & 2 deletions b/‎pythainlp/augment/wordnet.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎pythainlp/benchmarks/word_tokenization.py‎
Lines changed: 12 additions & 12 deletions b/‎pythainlp/benchmarks/word_tokenization.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎pythainlp/chat/core.py‎
Lines changed: 6 additions & 4 deletions b/‎pythainlp/chat/core.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎pythainlp/classify/param_free.py‎
Lines changed: 4 additions & 6 deletions b/‎pythainlp/classify/param_free.py‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎pythainlp/cli/tag.py‎
Lines changed: 2 additions & 2 deletions b/‎pythainlp/cli/tag.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pythainlp/cli/tokenize.py‎
Lines changed: 1 addition & 1 deletion b/‎pythainlp/cli/tokenize.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pythainlp/coref/_fastcoref.py‎
Lines changed: 12 additions & 4 deletions b/‎pythainlp/coref/_fastcoref.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎pythainlp/corpus/core.py‎
Lines changed: 3 additions & 3 deletions b/‎pythainlp/corpus/core.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pythainlp/generate/thai2fit.py‎
Lines changed: 5 additions & 2 deletions b/‎pythainlp/generate/thai2fit.py‎
Lines changed: 5 additions & 2 deletions
@@ -12,9 +12,10 @@
 
 import itertools
 from collections import OrderedDict
-from typing import Any, Callable, Optional
+from typing import Callable, Optional
 
 from nltk.corpus import wordnet as wn
+from nltk.corpus.reader.wordnet import Synset
 
 from pythainlp.corpus import wordnet
 from pythainlp.tag import pos_tag
@@ -121,7 +122,7 @@ class WordNetAug:
     synonyms: list[str]
     list_synsets: list
     p2w_pos: Optional[str]
-    synset: Any
+    synset: Synset
     syn: str
     synonyms_without_duplicates: list[str]
     list_words: list[str]
 
@@ -5,7 +5,7 @@
 
 import re
 import sys
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Union
 
 if TYPE_CHECKING:
     import numpy as np
@@ -42,7 +42,7 @@ def _f1(precision: float, recall: float) -> float:
     return 2 * precision * recall / (precision + recall)
 
 
-def _flatten_result(my_dict: dict, sep: str = ":") -> dict[str, Any]:
+def _flatten_result(my_dict: dict, sep: str = ":") -> dict[str, Union[int, str]]:
     """Flatten two-dimension dictionary.
 
     Use keys in the first dimension as a prefix for keys in the second dimension.
@@ -56,7 +56,7 @@ def _flatten_result(my_dict: dict, sep: str = ":") -> dict[str, Any]:
     :param str sep: separator between the two keys (default: ":")
 
     :return: a one-dimension dictionary with keys combined
-    :rtype: dict[str, Any]
+    :rtype: dict[str, Union[int, str]]
     """
     return {
         f"{k1}{sep}{k2}": v
@@ -133,7 +133,7 @@ def preprocessing(txt: str, remove_space: bool = True) -> str:
     return txt
 
 
-def compute_stats(ref_sample: str, raw_sample: str) -> dict[str, Any]:
+def compute_stats(ref_sample: str, raw_sample: str) -> dict[str, dict[str, Union[int, str]]]:
     """Compute statistics for tokenization quality
 
     These statistics include:
@@ -150,7 +150,7 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict[str, Any]:
     :param str samples: samples that we want to evaluate
 
     :return: metrics at character- and word-level and indicators of correctly tokenized words
-    :rtype: dict[str, Any]
+    :rtype: dict[str, dict[str, Union[int, str]]]
     """
     import numpy as np
 
@@ -166,11 +166,11 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict[str, Any]:
     c_pos_pred = c_pos_pred[c_pos_pred < ref_sample_arr.shape[0]]
     c_neg_pred = c_neg_pred[c_neg_pred < ref_sample_arr.shape[0]]
 
-    c_tp: np.intp = np.sum(ref_sample_arr[c_pos_pred] == 1)
-    c_fp: np.intp = np.sum(ref_sample_arr[c_pos_pred] == 0)
+    c_tp: int = int(np.sum(ref_sample_arr[c_pos_pred] == 1))
+    c_fp: int = int(np.sum(ref_sample_arr[c_pos_pred] == 0))
 
-    c_tn: np.intp = np.sum(ref_sample_arr[c_neg_pred] == 0)
-    c_fn: np.intp = np.sum(ref_sample_arr[c_neg_pred] == 1)
+    c_tn: int = int(np.sum(ref_sample_arr[c_neg_pred] == 0))
+    c_fn: int = int(np.sum(ref_sample_arr[c_neg_pred] == 1))
 
     # Compute word-level statistics
 
@@ -183,7 +183,7 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict[str, Any]:
         word_boundaries, ss_boundaries
     )
 
-    correctly_tokenised_words: np.intp = np.sum(tokenization_indicators)
+    correctly_tokenised_words: int = int(np.sum(tokenization_indicators))
 
     tokenization_indicators_str = list(map(str, tokenization_indicators))
 
@@ -196,8 +196,8 @@ def compute_stats(ref_sample: str, raw_sample: str) -> dict[str, Any]:
         },
         "word_level": {
             "correctly_tokenised_words": correctly_tokenised_words,
-            "total_words_in_sample": np.sum(sample_arr),
-            "total_words_in_ref_sample": np.sum(ref_sample_arr),
+            "total_words_in_sample": int(np.sum(sample_arr)),
+            "total_words_in_ref_sample": int(np.sum(ref_sample_arr)),
         },
         "global": {
             "tokenisation_indicators": "".join(tokenization_indicators_str)
 
@@ -3,19 +3,21 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Optional, cast
+from typing import TYPE_CHECKING, Optional, cast
 
 if TYPE_CHECKING:
     import torch
 
+    from pythainlp.generate.wangchanglm import WangChanGLM
+
 
 class ChatBotModel:
     history: list[tuple[str, str]]
-    model: Any
+    model: "WangChanGLM"
 
     def __init__(self) -> None:
         """Chat using AI generation"""
-        self.history: list[tuple[str, str]] = []
+        self.history = []
 
     def reset_chat(self) -> None:
         """Reset chat by cleaning history"""
@@ -49,7 +51,7 @@ def load_model(
         if model_name == "wangchanglm":
             from pythainlp.generate.wangchanglm import WangChanGLM
 
-            self.model: Any = WangChanGLM()
+            self.model = WangChanGLM()
             self.model.load_model(
                 model_path="pythainlp/wangchanglm-7.5B-sft-en-sharded",
                 return_dict=return_dict,
 
@@ -35,8 +35,8 @@ def __init__(
         if model_path:
             self.load(model_path)
         else:
-            self.training_data: "NDArray[Any]" = np.array(training_data)
-            self.cx2_list: list[int] = self.train()
+            self.training_data = np.array(training_data)
+            self.cx2_list = self.train()
 
     def train(self) -> list[int]:
         temp_list = []
@@ -112,7 +112,5 @@ def load(self, path: str) -> None:
 
         with open(path, "r", encoding="utf-8") as f:
             data = json.load(f)
-            self.cx2_list: list[int] = data["cx2_list"]
-            self.training_data: "NDArray[Any]" = np.array(
-                data["training_data"]
-            )
+            self.cx2_list = data["cx2_list"]
+            self.training_data = np.array(data["training_data"])
@@ -6,7 +6,7 @@
 from __future__ import annotations
 
 import argparse
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 from pythainlp import cli
 from pythainlp.tag import pos_tag
@@ -38,7 +38,7 @@ def __init__(self, name: str, argv: Sequence[str]) -> None:
         )
 
         args = parser.parse_args(argv)
-        self.args: Any = args
+        self.args: argparse.Namespace = args
 
         tokens = args.text.split(args.separator)
         result = self.run(tokens)
 
@@ -74,7 +74,7 @@ def __init__(self, name: str, argv: Sequence[str]) -> None:
         parser.set_defaults(keep_whitespace=True)
 
         args = parser.parse_args(argv)
-        self.args: Any = args
+        self.args: argparse.Namespace = args
 
         cli.exit_if_empty(args.text, parser)
         result = self.run(
 
@@ -3,13 +3,21 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Optional, TypedDict
 
 if TYPE_CHECKING:
-    from fastcoref.modeling import CorefModel
+    from fastcoref.modeling import CorefModel, CorefResult
     from spacy.language import Language
 
 
+class CorefResultDict(TypedDict):
+    """Dictionary representation of coreference resolution results."""
+
+    text: str
+    clusters_string: list[list[str]]
+    clusters: list[list[tuple[int, int]]]
+
+
 class FastCoref:
     def __init__(
         self,
@@ -34,14 +42,14 @@ def __init__(
             self.model_name, device=device, nlp=self.nlp
         )
 
-    def _to_json(self, _predict: Any) -> dict[str, Any]:
+    def _to_json(self, _predict: "CorefResult") -> CorefResultDict:
         return {
             "text": _predict.text,
             "clusters_string": _predict.get_clusters(as_strings=True),
             "clusters": _predict.get_clusters(as_strings=False),
         }
 
-    def predict(self, texts: list[str]) -> list[dict]:
+    def predict(self, texts: list[str]) -> list[CorefResultDict]:
         return [
             self._to_json(pred) for pred in self.model.predict(texts=texts)
         ]
@@ -38,9 +38,9 @@ class _ResponseWrapper:
     _content: bytes
 
     def __init__(self, response: HTTPResponse) -> None:
-        self.status_code: int = response.status
-        self.headers: HTTPMessage = response.headers
-        self._content: bytes = response.read()
+        self.status_code = response.status
+        self.headers = response.headers
+        self._content = response.read()
 
     def json(self) -> dict[str, Any]:
         """Parse JSON content from response."""
 
@@ -16,7 +16,10 @@
 from typing import TYPE_CHECKING, Any, Union
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     import pandas as pd
+    from fastai.basic_train import Learner
     from fastai.text import (
         AWD_LSTM,
         LMDataBunch,
@@ -48,7 +51,7 @@
 )
 
 # get dummy data
-imdb: Any = untar_data(URLs.IMDB_SAMPLE)
+imdb: "Path" = untar_data(URLs.IMDB_SAMPLE)
 dummy_df: "pd.DataFrame" = pd.read_csv(imdb / "texts.csv")
 
 # get vocab
@@ -113,7 +116,7 @@
     "beta": 1,
 }
 
-learn: Any = language_model_learner(
+learn: "Learner" = language_model_learner(
     data_lm, AWD_LSTM, config=config, pretrained=False, **trn_args
 )