Skip to content

Commit 0eaff7f

Browse files
authored
Merge pull request #1215 from PyThaiNLP/copilot/fix-camembert-roberta-issue
Suppress benign camembert/roberta model type warning in ThEnTranslator
2 parents b0c6a91 + 1f7959c commit 0eaff7f

File tree

2 files changed

+29
-20
lines changed

2 files changed

+29
-20
lines changed

CITATION.cff

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ authors:
2323
abstract: PyThaiNLP is a Thai natural language processing library for Python. It provides standard linguistic analysis for the Thai language, including tokenization and part-of-speech tagging. Additionally, it offers standard Thai locale utility functions, such as Thai Buddhist Era date formatting and the conversion of numbers into Thai text.
2424
repository-code: "https://github.com/PyThaiNLP/pythainlp"
2525
type: software
26+
doi: 10.5281/zenodo.3519354
2627
version: 5.2.0
2728
license-url: "https://spdx.org/licenses/Apache-2.0"
2829
keywords:

pythainlp/translate/en_th.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from __future__ import annotations
1212

1313
import os
14+
import warnings
1415

1516
try:
1617
from fairseq.models.transformer import TransformerModel
@@ -125,26 +126,33 @@ def __init__(self, use_gpu: bool = False):
125126
self._model_name = _TH_EN_MODEL_NAME
126127

127128
_download_install(self._model_name)
128-
self._model = TransformerModel.from_pretrained(
129-
model_name_or_path=_get_translate_path(
130-
self._model_name,
131-
_TH_EN_FILE_NAME,
132-
"models",
133-
),
134-
checkpoint_file="checkpoint.pt",
135-
data_name_or_path=_get_translate_path(
136-
self._model_name,
137-
_TH_EN_FILE_NAME,
138-
"vocab",
139-
),
140-
bpe="sentencepiece",
141-
sentencepiece_model=_get_translate_path(
142-
self._model_name,
143-
_TH_EN_FILE_NAME,
144-
"bpe",
145-
"spm.th.model",
146-
),
147-
)
129+
# Suppress model type mismatch warning from transformers
130+
# The pre-trained model has camembert config but works fine
131+
with warnings.catch_warnings():
132+
warnings.filterwarnings(
133+
"ignore",
134+
message="(?i).*using a model of type .* to instantiate a model of type.*",
135+
)
136+
self._model = TransformerModel.from_pretrained(
137+
model_name_or_path=_get_translate_path(
138+
self._model_name,
139+
_TH_EN_FILE_NAME,
140+
"models",
141+
),
142+
checkpoint_file="checkpoint.pt",
143+
data_name_or_path=_get_translate_path(
144+
self._model_name,
145+
_TH_EN_FILE_NAME,
146+
"vocab",
147+
),
148+
bpe="sentencepiece",
149+
sentencepiece_model=_get_translate_path(
150+
self._model_name,
151+
_TH_EN_FILE_NAME,
152+
"bpe",
153+
"spm.th.model",
154+
),
155+
)
148156
if use_gpu:
149157
self._model.cuda()
150158

0 commit comments

Comments
 (0)