Fix a variety of bugs + support newest OpenAI models (gpt-4o, gpt-4o-mini) [release] (#37)

AjayP13 · web-flow · commit 89083f1fb808 · 2024-08-01T07:10:49.000-04:00
* Fix notebook login bug

* Fix validate quantization config bug

* Fix prompt steps

* Allow VLLM progress bars to be enabled

* Support GPT-4o and GPT-4o-mini
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "DataDreamer"
-version = "0.36.0"
+version = "0.37.0"
 description = "Prompt. Generate Synthetic Data. Train & Align Models."
 license = "MIT"
 authors= [
diff --git a/src/llms/openai.py b/src/llms/openai.py
@@ -63,16 +63,30 @@ def _is_gpt_3_5_legacy(model_name: str):
 @lru_cache(maxsize=None)
 def _is_gpt_4(model_name: str):
     model_name = _normalize_model_name(model_name)
-    return model_name == "gpt-4" or any(
-        gpt4_name in model_name for gpt4_name in ["gpt-4-"]
+    return (
+        model_name == "gpt-4"
+        or any(gpt4_name in model_name for gpt4_name in ["gpt-4-"])
+        or _is_gpt_4o(model_name)
     )
 
 
+@lru_cache(maxsize=None)
+def _is_gpt_4o(model_name: str):
+    model_name = _normalize_model_name(model_name)
+    return any(gpt4_name in model_name for gpt4_name in ["gpt-4o"])
+
+
+@lru_cache(maxsize=None)
+def _is_gpt_mini(model_name: str):
+    model_name = _normalize_model_name(model_name)
+    return any(gpt_mini_name in model_name for gpt_mini_name in ["-mini"])
+
+
 @lru_cache(maxsize=None)
 def _is_128k_model(model_name: str):
     model_name = _normalize_model_name(model_name)
     return _is_gpt_4(model_name) and (
-        "-preview" in model_name or "2024-04-09" in model_name
+        _is_gpt_4o(model_name) or "-preview" in model_name or "2024-04-09" in model_name
     )
 
 
@@ -249,11 +263,14 @@ def get_max_context_length(self, max_new_tokens: int) -> int:  # pragma: no cove
         return max_context_length - max_new_tokens - format_tokens
 
     def _get_max_output_length(self) -> None | int:  # pragma: no cover
-        if _is_128k_model(self.model_name) or (
+        if _is_128k_model(self.model_name) and _is_gpt_mini(self.model_name):
+            return 16384
+        elif _is_128k_model(self.model_name) or (
             _is_gpt_3_5(self.model_name) and not (_is_gpt_3_5_legacy(self.model_name))
         ):
             return 4096
-        return None
+        else:
+            return None
 
     @ring.lru(maxsize=5000)
     def count_tokens(self, value: str) -> int:
diff --git a/src/llms/vllm.py b/src/llms/vllm.py
@@ -1,6 +1,7 @@
 import gc
 import logging
 import os
+from contextlib import nullcontext
 from functools import cached_property, partial
 from typing import Any, Callable, Generator, Iterable
 
@@ -115,7 +116,7 @@ def _monkey_patch_init_logger(*args, **kwargs):
                     timeout=10.0,
                 )
                 LLM = import_module("vllm").LLM
-                with ignore_tqdm():
+                with ignore_tqdm() if datadreamer_logger.level > logging.DEBUG else nullcontext():
                     self_resource.model = LLM(
                         model=self.model_name,
                         trust_remote_code=self.trust_remote_code,
@@ -216,7 +217,9 @@ def _run_batch(  # noqa: C901
             **kwargs,
         )
         generated_texts_batch = self.model.proxy.get_generated_texts_batch(
-            prompts, sampling_params, use_tqdm=False
+            prompts,
+            sampling_params,
+            use_tqdm=(datadreamer_logger.level <= logging.DEBUG),
         )
 
         # Post-process and return
diff --git a/src/steps/prompt/few_shot_prompt_with_retrieval.py b/src/steps/prompt/few_shot_prompt_with_retrieval.py
@@ -143,5 +143,10 @@ def output_examples_generator():
 
         return input_examples_generator, output_examples_generator
 
+    def _run_prompts(self, args, *positionalargs, **kwargs):
+        args.pop("embedder")
+        args.pop("k")
+        return super()._run_prompts(args, *positionalargs, **kwargs)
+
 
 __all__ = ["FewShotPromptWithRetrieval"]
diff --git a/src/steps/prompt/rag_prompt.py b/src/steps/prompt/rag_prompt.py
@@ -99,6 +99,8 @@ def retrieved_texts_generator():
     def run(self):
         # Get inputs and arguments
         args = self.args
+        args.pop("retriever")
+        args.pop("k")
         llm = args["llm"]
         prompts = self.inputs["prompts"]
         retrieved_text_label = args.pop("retrieved_text_label")
diff --git a/src/tests/llms/test_llms.py b/src/tests/llms/test_llms.py
@@ -1102,17 +1102,20 @@ def test_metadata(self, create_datadreamer):
         assert llm.citation[0].endswith("year={2020}\n}")
         assert llm.citation[1].startswith("@article{ouyang2022training")
         assert llm.citation[1].endswith("year={2022}\n}")
-        llm = OpenAI("gpt-4")
-        assert llm.model_card == "https://cdn.openai.com/papers/gpt-4-system-card.pdf"
-        assert llm.license == "https://openai.com/policies"
-        assert isinstance(llm.citation, list)
-        assert len(llm.citation) == 2
-        assert llm.citation[0].startswith("@article{OpenAI2023GPT4TR,")
-        assert llm.citation[0].endswith(
-            "url={https://api.semanticscholar.org/CorpusID:257532815}\n}"
-        )
-        assert llm.citation[1].startswith("@article{ouyang2022training")
-        assert llm.citation[1].endswith("year={2022}\n}")
+        for gpt_4_model_name in ["gpt-4", "gpt-4o", "gpt-4o-mini"]:
+            llm = OpenAI(gpt_4_model_name)
+            assert (
+                llm.model_card == "https://cdn.openai.com/papers/gpt-4-system-card.pdf"
+            )
+            assert llm.license == "https://openai.com/policies"
+            assert isinstance(llm.citation, list)
+            assert len(llm.citation) == 2
+            assert llm.citation[0].startswith("@article{OpenAI2023GPT4TR,")
+            assert llm.citation[0].endswith(
+                "url={https://api.semanticscholar.org/CorpusID:257532815}\n}"
+            )
+            assert llm.citation[1].startswith("@article{ouyang2022training")
+            assert llm.citation[1].endswith("year={2022}\n}")
 
     def test_count_tokens(self, create_datadreamer):
         with create_datadreamer():
@@ -1122,6 +1125,10 @@ def test_count_tokens(self, create_datadreamer):
     def test_get_max_context_length(self, create_datadreamer):
         with create_datadreamer():
             # Check max context length
+            llm = OpenAI("gpt-4o")
+            assert llm.get_max_context_length(max_new_tokens=0) == 127982
+            llm = OpenAI("gpt-4o-mini")
+            assert llm.get_max_context_length(max_new_tokens=0) == 127982
             llm = OpenAI("gpt-4")
             assert llm.get_max_context_length(max_new_tokens=0) == 8174
             llm = OpenAI("gpt-4-turbo-2024-04-09")
@@ -1136,6 +1143,10 @@ def test_get_max_context_length(self, create_datadreamer):
     def test_get_max_output_length(self, create_datadreamer):
         with create_datadreamer():
             # Check max output length
+            llm = OpenAI("gpt-4o")
+            assert llm._get_max_output_length() == 4096
+            llm = OpenAI("gpt-4o-mini")
+            assert llm._get_max_output_length() == 16384
             llm = OpenAI("gpt-4")
             assert llm._get_max_output_length() is None
             llm = OpenAI("gpt-4-turbo-2024-04-09")
diff --git a/src/tests/test_utils/fixtures/mock_llm.py b/src/tests/test_utils/fixtures/mock_llm.py
@@ -6,9 +6,28 @@
 
 
 @pytest.fixture
-def mock_llm() -> Callable[..., LLM]:
+def mock_llm(
+    allowed_kwargs=frozenset(
+        {
+            "inputs",
+            "batch_size",
+            "max_new_tokens",
+            "temperature",
+            "top_p",
+            "n",
+            "stop",
+            "repetition_penalty",
+            "logit_bias",
+            "seed",
+            "max_length_func",
+            "cached_tokenizer",
+        }
+    ),
+) -> Callable[..., LLM]:
     def _mock_llm(llm: LLM, responses: dict[str, str]) -> LLM:
         def _run_batch_mocked(**kwargs):
+            for kwarg in kwargs:
+                assert kwarg in allowed_kwargs, f"LLM got unexpected keyword: {kwarg}"
             return [responses[prompt] for prompt in kwargs["inputs"]]
 
         llm._run_batch = _run_batch_mocked  # type: ignore[attr-defined]
diff --git a/src/utils/hf_hub_utils.py b/src/utils/hf_hub_utils.py
@@ -3,8 +3,9 @@
 from io import BytesIO
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Callable
+from unittest import mock
 
-from .import_utils import ignore_pydantic_warnings
+from .import_utils import ignore_hf_token_warnings, ignore_pydantic_warnings
 
 with ignore_pydantic_warnings():
     from huggingface_hub import HfApi, hf_hub_download, login
@@ -179,22 +180,27 @@ def get_citation_info(
 
 def hf_hub_login(token: None | str = None) -> HfApi:  # pragma: no cover
     # Login
-    api = HfApi()
-    if token is not None:
-        try:
-            login(token=token, add_to_git_credential=False, write_permission=True)
-        except ValueError:
-            pass
-    while True:
-        try:
-            api.whoami()
-            break
-        except LocalTokenNotFoundError:
+    with ignore_hf_token_warnings(), mock.patch(
+        "huggingface_hub._login.is_notebook", new=lambda: False
+    ):
+        api = HfApi()
+        if token is not None:
             try:
                 login(token=token, add_to_git_credential=False, write_permission=True)
             except ValueError:
                 pass
-    return api
+        while True:
+            try:
+                api.whoami()
+                break
+            except LocalTokenNotFoundError:
+                try:
+                    login(
+                        token=token, add_to_git_credential=False, write_permission=True
+                    )
+                except ValueError:
+                    pass
+        return api
 
 
 def prepare_to_publish(
diff --git a/src/utils/hf_model_utils.py b/src/utils/hf_model_utils.py
@@ -238,7 +238,7 @@ def validate_quantization_config(
     quantization_config = copy(quantization_config)
     if (
         getattr(quantization_config, "quant_method", None) == "bitsandbytes"
-    ):  # pragma: no cover
+    ) and dtype is not None:  # pragma: no cover
         quantization_config.bnb_4bit_compute_dtype = dtype  # type:ignore[union-attr]
         quantization_config.bnb_4bit_quant_storage = dtype  # type:ignore[union-attr]
     return quantization_config
diff --git a/src/utils/import_utils.py b/src/utils/import_utils.py
@@ -133,6 +133,13 @@ def ignore_setfit_warnings():
         yield None
 
 
+@contextlib.contextmanager
+def ignore_hf_token_warnings():  # pragma: no cover
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=UserWarning)
+        yield None
+
+
 @contextlib.contextmanager
 def ignore_faiss_warnings():
     with warnings.catch_warnings():