Skip to content

Commit fc53568

Browse files
authored
Fix T5 (#172)
1 parent 4676609 commit fc53568

File tree

4 files changed

+42
-60
lines changed

4 files changed

+42
-60
lines changed

optimum/executorch/modeling.py

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -460,29 +460,27 @@ def __init__(
460460
config: "PretrainedConfig",
461461
):
462462
super().__init__(models=models, config=config)
463-
if not hasattr(self, "encoder"):
464-
raise AttributeError("Expected attribute 'encoder' not found in the instance.")
465-
if not hasattr(self, "text_decoder"):
466-
raise AttributeError("Expected attribute 'text_decoder' not found in the instance.")
467-
metadata = self.decoder.method_names()
463+
if not hasattr(self, "model"):
464+
raise AttributeError("Expected attribute 'model' not found in the instance.")
465+
metadata = self.model.method_names()
468466
if "use_kv_cache" in metadata:
469-
self.use_kv_cache = self.decoder.run_method("use_kv_cache")[0]
467+
self.use_kv_cache = self.model.run_method("use_kv_cache")[0]
470468
if "get_max_seq_len" in metadata:
471-
self.max_cache_size = self.decoder.run_method("get_max_seq_len")[0]
469+
self.max_cache_size = self.model.run_method("get_max_seq_len")[0]
472470
if "get_max_batch_size" in metadata:
473-
self.max_batch_size = self.decoder.run_method("get_max_batch_size")[0]
471+
self.max_batch_size = self.model.run_method("get_max_batch_size")[0]
474472
if "get_dtype" in metadata:
475-
self.dtype = self.decoder.run_method("get_dtype")[0]
473+
self.dtype = self.model.run_method("get_dtype")[0]
476474
if "get_bos_id" in metadata:
477-
self.bos_token_id = self.decoder.run_method("get_bos_id")[0]
475+
self.bos_token_id = self.model.run_method("get_bos_id")[0]
478476
if "get_eos_id" in metadata:
479-
self.eos_token_id = self.decoder.run_method("get_eos_id")[0]
477+
self.eos_token_id = self.model.run_method("get_eos_id")[0]
480478
if "get_vocab_size" in metadata:
481-
self.vocab_size = self.decoder.run_method("get_vocab_size")[0]
479+
self.vocab_size = self.model.run_method("get_vocab_size")[0]
482480
if "max_hidden_seq_length" in metadata:
483-
self.max_hidden_seq_length = self.decoder.run_method("max_hidden_seq_length")[0]
481+
self.max_hidden_seq_length = self.model.run_method("max_hidden_seq_length")[0]
484482
if "decoder_start_token_id" in metadata:
485-
self.decoder_start_token_id = self.decoder.run_method("decoder_start_token_id")[0]
483+
self.decoder_start_token_id = self.model.run_method("decoder_start_token_id")[0]
486484

487485
def forward(
488486
self,
@@ -491,15 +489,14 @@ def forward(
491489
cache_position: torch.Tensor,
492490
encoder_outputs: Optional[torch.Tensor] = None,
493491
):
494-
# Encode if needed (first prediction pass)
495492
is_first_prediction = encoder_outputs is None
496493
self.stats.on_model_execution_start()
497494
if is_first_prediction:
498-
encoder_outputs = self.encoder.forward((input_ids,))[0]
495+
encoder_outputs = self.model.run_method("encoder", (input_ids,))[0]
499496
self.stats.on_prompt_eval_end()
500497

501498
result = (
502-
self.decoder.forward((decoder_input_ids, encoder_outputs, cache_position))[0],
499+
self.model.run_method("text_decoder", (decoder_input_ids, encoder_outputs, cache_position))[0],
503500
encoder_outputs,
504501
)
505502
self.stats.on_model_execution_end()
@@ -530,9 +527,6 @@ def generate(
530527
Returns:
531528
List[int]: List of generated token IDs.
532529
533-
Note:
534-
Temporarily implemented this method in Python due to limited access to ExecuTorch's c++ LLM runner via pybind.
535-
Expect improvements to the pybind interface in ExecuTorch version 0.4.1.
536530
"""
537531
self.device = torch.device("cpu")
538532
if max_seq_len is None:
@@ -550,7 +544,6 @@ def generate(
550544
encoder_input_ids = input_ids
551545
encoder_outputs = None
552546
generated_ids = [0]
553-
554547
first_token_generated = False
555548

556549
# Generate tokens one by one

optimum/exporters/executorch/integrations.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,10 @@ def __init__(
424424
self.use_custom_sdpa = use_custom_sdpa
425425
self.disable_dynamic_shapes = disable_dynamic_shapes
426426
self.metadata = save_config_to_constant_methods(
427-
model.config, model.generation_config, get_max_seq_len=max_seq_len, enable_dynamic_shape=not self.disable_dynamic_shapes
427+
model.config,
428+
model.generation_config,
429+
get_max_seq_len=max_seq_len,
430+
enable_dynamic_shape=not self.disable_dynamic_shapes,
428431
)
429432
logging.info(f"Metadata to be recorded in PTE: {self.metadata}")
430433

optimum/exporters/executorch/tasks/seq2seq_lm.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,36 +23,36 @@
2323
@register_task("text2text-generation")
2424
def load_seq2seq_lm_model(model_name_or_path: str, **kwargs) -> Seq2SeqLMExportableModule:
2525
"""
26-
Loads a seq2seq language model for conditional text generation and registers it under the task
27-
'text2text-generation' using Hugging Face's `AutoModelForSeq2SeqLM`.
26+
Loads a seq2seq language model for conditional text generation and registers it under the task
27+
'text2text-generation' using Hugging Face's `AutoModelForSeq2SeqLM`.
2828
29-
Args:
30-
model_name_or_path (str):
31-
Model ID on huggingface.co or path on disk to the model repository to export. For example:
32-
`model_name_or_path="google-t5/t5-small"` or `mode_name_or_path="/path/to/model_folder`
33-
**kwargs:
34-
Additional configuration options for the model:
35-
- dtype (str, optional):
36-
Data type for model weights (default: "float32").
37-
Options include "float16" and "bfloat16".
38-
- max_hidden_seq_length (int, optional):
39-
Maximum hidden sequence length (default: 4096).
40-
- max_cache_length (int, optional):
41-
Maximum sequence length for generation (default: 1024).
29+
Args:
30+
model_name_or_path (str):
31+
Model ID on huggingface.co or path on disk to the model repository to export. For example:
32+
`model_name_or_path="google-t5/t5-small"` or `mode_name_or_path="/path/to/model_folder`
33+
**kwargs:
34+
Additional configuration options for the model:
35+
- dtype (str, optional):
36+
Data type for model weights (default: "float32").
37+
Options include "float16" and "bfloat16".
38+
- max_hidden_seq_length (int, optional):
39+
Maximum hidden sequence length (default: 4096).
40+
- max_cache_length (int, optional):
41+
Maximum sequence length for generation (default: 1024).
4242
43-
Returns:
44-
Seq2SeqLMExportableModule:
45-
An instance of `Seq2SeqLMExportableModule` for exporting and lowering to ExecuTorch.
46-
"""
43+
Returns:
44+
Seq2SeqLMExportableModule:
45+
An instance of `Seq2SeqLMExportableModule` for exporting and lowering to ExecuTorch.
46+
n"""
4747
device = "cpu"
4848
batch_size = 1
49-
max_hidden_seq_length = kwargs.get("max_hidden_seq_length", 4096)
50-
max_cache_length = kwargs.get("max_cache_length", 1024)
49+
max_hidden_seq_len = kwargs.get("max_hidden_seq_len", 4096)
50+
max_seq_len = kwargs.get("max_seq_len", 1024)
5151

5252
full_model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path).to(device).eval()
5353
return Seq2SeqLMExportableModule(
5454
full_model,
5555
batch_size=batch_size,
56-
max_hidden_seq_length=max_hidden_seq_length,
57-
max_cache_length=max_cache_length,
56+
max_seq_len=max_seq_len,
57+
max_hidden_seq_len=max_hidden_seq_len,
5858
)

tests/models/test_modeling_t5.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import pytest
2323
from executorch import version
24-
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
2524
from packaging.version import parse
2625
from transformers import AutoTokenizer
2726
from transformers.testing_utils import slow
@@ -45,20 +44,13 @@ def test_t5_export_to_executorch(self):
4544
shell=True,
4645
check=True,
4746
)
48-
self.assertTrue(os.path.exists(f"{tempdir}/executorch/encoder.pte"))
49-
self.assertTrue(os.path.exists(f"{tempdir}/executorch/decoder.pte"))
47+
self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
5048

5149
def _helper_t5_translation(self, recipe: str):
5250
model_id = "google/flan-t5-small"
5351
tokenizer = AutoTokenizer.from_pretrained(model_id)
5452
model = ExecuTorchModelForSeq2SeqLM.from_pretrained(model_id, recipe=recipe)
5553

56-
self.assertIsInstance(model, ExecuTorchModelForSeq2SeqLM)
57-
self.assertTrue(hasattr(model, "text_encoder"))
58-
self.assertIsInstance(model.encoder, ExecuTorchModule)
59-
self.assertTrue(hasattr(model, "text_decoder"))
60-
self.assertIsInstance(model.decoder, ExecuTorchModule)
61-
6254
input_text = "translate English to German: How old are you?"
6355
generated_text = model.text_generation(
6456
tokenizer=tokenizer,
@@ -88,12 +80,6 @@ def _helper_t5_summarization(self, recipe: str):
8880
tokenizer = AutoTokenizer.from_pretrained(model_id)
8981
model = ExecuTorchModelForSeq2SeqLM.from_pretrained(model_id, recipe=recipe)
9082

91-
self.assertIsInstance(model, ExecuTorchModelForSeq2SeqLM)
92-
self.assertTrue(hasattr(model, "encoder"))
93-
self.assertIsInstance(model.encoder, ExecuTorchModule)
94-
self.assertTrue(hasattr(model, "text_decoder"))
95-
self.assertIsInstance(model.decoder, ExecuTorchModule)
96-
9783
article = (
9884
" New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A"
9985
" year later, she got married again in Westchester County, but to a different man and without divorcing"

0 commit comments

Comments
 (0)