Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
import concurrent
import logging
import tempfile
from typing import Generator, Union
from collections.abc import Generator
from typing import Union

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -49,7 +50,7 @@ def summarize_text_from_summaries_prompt(content_type: str = "an academic paper"


@config.when(file_type="pdf")
def raw_text__pdf(pdf_source: Union[str, bytes, tempfile.SpooledTemporaryFile]) -> str:
def raw_text__pdf(pdf_source: str | bytes | tempfile.SpooledTemporaryFile) -> str:
"""Takes a filepath to a PDF and returns a string of the PDF's contents
:param pdf_source: the path, or the temporary file, to the PDF.
:return: the text of the PDF.
Expand All @@ -64,7 +65,7 @@ def raw_text__pdf(pdf_source: Union[str, bytes, tempfile.SpooledTemporaryFile])


@config.when(file_type="txt")
def raw_text__txt(text_file: Union[str, tempfile.SpooledTemporaryFile]) -> str:
def raw_text__txt(text_file: str | tempfile.SpooledTemporaryFile) -> str:
"""Takes a filepath to a text file and returns a string of the text file's contents
:param text_file: the path, or the temporary file, to the text file.
:return: the contents of the file as a string.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def openai_client() -> openai.OpenAI:
return openai.OpenAI()


def _encode_image(image_path_or_file: Union[str, IO], ext: str):
def _encode_image(image_path_or_file: str | IO, ext: str):
"""Helper fn to return a base-64 encoded image"""
file_like_object = (
image_path_or_file
Expand Down Expand Up @@ -79,8 +79,8 @@ def processed_image_url(image_url: str) -> str:

def caption_prompt(
core_prompt: str,
additional_prompt: Optional[str] = None,
descriptiveness: Optional[str] = None,
additional_prompt: str | None = None,
descriptiveness: str | None = None,
) -> str:
"""Returns the prompt used to describe an image"""
out = core_prompt
Expand Down Expand Up @@ -128,7 +128,7 @@ def caption_embeddings(
openai_client: openai.OpenAI,
embeddings_model: str = DEFAULT_EMBEDDINGS_MODEL,
generated_caption: str = None,
) -> List[float]:
) -> list[float]:
"""Returns the embeddings for a generated caption"""
data = (
openai_client.embeddings.create(
Expand Down Expand Up @@ -158,7 +158,7 @@ def caption_metadata(

@config.when(include_embeddings=True)
def embeddings_metadata(
caption_embeddings: List[float],
caption_embeddings: list[float],
embeddings_model: str = DEFAULT_EMBEDDINGS_MODEL,
) -> dict:
"""Returns metadata for the embeddings portion of the workflow"""
Expand All @@ -170,9 +170,9 @@ def embeddings_metadata(

def metadata(
embeddings_metadata: dict,
caption_metadata: Optional[dict] = None,
additional_metadata: Optional[dict] = None,
) -> Dict[str, Any]:
caption_metadata: dict | None = None,
additional_metadata: dict | None = None,
) -> dict[str, Any]:
"""Returns the response to a given chat"""
out = embeddings_metadata
if caption_metadata is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def converted_and_saved(
image: Image,
file_to_convert: ToConvert,
new_format: str = "jpeg",
image_params: Optional[Dict[str, Any]] = None,
image_params: dict[str, Any] | None = None,
) -> Converted:
"""Returns a list of all files to convert."""
s3 = _s3()
Expand Down Expand Up @@ -121,7 +121,7 @@ def converted_and_saved(
)


def all_converted_and_saved(converted_and_saved: Collect[Converted]) -> List[Converted]:
def all_converted_and_saved(converted_and_saved: Collect[Converted]) -> list[Converted]:
"""Returns a list of all downloaded locations"""
return list(converted_and_saved)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def downloaded_data(
return download_location


def all_downloaded_data(downloaded_data: Collect[str]) -> List[str]:
def all_downloaded_data(downloaded_data: Collect[str]) -> list[str]:
"""Returns a list of all downloaded locations"""
out = []
for path in downloaded_data:
Expand All @@ -120,7 +120,7 @@ def _jsonl_parse(path: str) -> pd.DataFrame:
return pd.read_json(path, lines=True)


def processed_dataframe(all_downloaded_data: List[str]) -> pd.DataFrame:
def processed_dataframe(all_downloaded_data: list[str]) -> pd.DataFrame:
"""Processes everything into a dataframe"""
out = []
for floc in all_downloaded_data:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@


@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def _get_embedding(text: str, model="text-similarity-davinci-001", **kwargs) -> List[float]:
def _get_embedding(text: str, model="text-similarity-davinci-001", **kwargs) -> list[float]:
"""Get embedding from OpenAI API.
:param text: text to embed.
:param model: the embedding model to use.
Expand Down Expand Up @@ -394,7 +394,7 @@ def embedded_data_set(

def _accuracy_and_se(
cosine_similarity: list[float], labeled_similarity: list[int]
) -> Tuple[float, float]:
) -> tuple[float, float]:
"""Calculate accuracy (and its standard error) of predicting label=1 if similarity>x

x is optimized by sweeping from -1 to 1 in steps of 0.01
Expand Down Expand Up @@ -465,7 +465,7 @@ def accuracy_computation(
return a, se


def _embedding_multiplied_by_matrix(embedding: List[float], matrix: torch.tensor) -> np.array:
def _embedding_multiplied_by_matrix(embedding: list[float], matrix: torch.tensor) -> np.array:
"""Helper function to multiply an embedding by a matrix."""
embedding_tensor = torch.tensor(embedding).float()
modified_embedding = embedding_tensor @ matrix
Expand Down Expand Up @@ -530,7 +530,7 @@ def tensors_from_dataframe(
embedding_column_1: str,
embedding_column_2: str,
similarity_label_column: str,
) -> Tuple[torch.tensor, torch.tensor, torch.tensor]:
) -> tuple[torch.tensor, torch.tensor, torch.tensor]:
e1 = np.stack(np.array(df[embedding_column_1].values))
e2 = np.stack(np.array(df[embedding_column_2].values))
s = np.stack(np.array(df[similarity_label_column].astype("float").values))
Expand Down Expand Up @@ -638,7 +638,7 @@ def mse_loss(predictions, targets):
optimization_result_matrices=group(*[source(k) for k in optimization_parameterization.keys()])
)
def optimization_results(
optimization_result_matrices: List[pd.DataFrame],
optimization_result_matrices: list[pd.DataFrame],
) -> pd.DataFrame:
"""Combine optimization results into one dataframe."""
return pd.concat(optimization_result_matrices)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def raw_dataset(
validation_size: float = 0.8,
input_text_key: str = "question",
output_text_key: str = "reply",
) -> Dict[str, Dataset]:
) -> dict[str, Dataset]:
"""Loads the raw dataset from disk and splits it into train and test sets.

:param data_path: the path to the dataset.
Expand Down
25 changes: 13 additions & 12 deletions contrib/hamilton/contrib/user/zilto/lancedb_vdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
# under the License.

import logging
from collections.abc import Iterable
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Union
from typing import Dict, List, Optional, Union

logger = logging.getLogger(__name__)

Expand All @@ -33,11 +34,11 @@
from hamilton.function_modifiers import tag

VectorType = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
DataType = Union[Dict, List[Dict], pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]]
DataType = Union[dict, list[dict], pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]]
TableSchema = Union[pa.Schema, LanceModel]


def client(uri: Union[str, Path] = "./.lancedb") -> lancedb.DBConnection:
def client(uri: str | Path = "./.lancedb") -> lancedb.DBConnection:
"""Create a LanceDB connection.

:param uri: path to local LanceDB
Expand All @@ -49,7 +50,7 @@ def client(uri: Union[str, Path] = "./.lancedb") -> lancedb.DBConnection:
def _create_table(
client: lancedb.DBConnection,
table_name: str,
schema: Optional[TableSchema] = None,
schema: TableSchema | None = None,
overwrite_table: bool = False,
) -> lancedb.db.LanceTable:
"""Create a new table based on schema."""
Expand All @@ -62,7 +63,7 @@ def _create_table(
def table_ref(
client: lancedb.DBConnection,
table_name: str,
schema: Optional[TableSchema] = None,
schema: TableSchema | None = None,
overwrite_table: bool = False,
) -> lancedb.db.LanceTable:
"""Create or reference a LanceDB table
Expand Down Expand Up @@ -91,7 +92,7 @@ def table_ref(


@tag(side_effect="True")
def reset(client: lancedb.DBConnection) -> Dict[str, List[str]]:
def reset(client: lancedb.DBConnection) -> dict[str, list[str]]:
"""Drop all existing tables.

:param vdb_client: LanceDB connection.
Expand All @@ -106,7 +107,7 @@ def reset(client: lancedb.DBConnection) -> Dict[str, List[str]]:


@tag(side_effect="True")
def insert(table_ref: lancedb.db.LanceTable, data: DataType) -> Dict:
def insert(table_ref: lancedb.db.LanceTable, data: DataType) -> dict:
"""Push new data to the specified table.

:param table_ref: Reference to the LanceDB table.
Expand All @@ -121,7 +122,7 @@ def insert(table_ref: lancedb.db.LanceTable, data: DataType) -> Dict:


@tag(side_effect="True")
def delete(table_ref: lancedb.db.LanceTable, delete_expression: str) -> Dict:
def delete(table_ref: lancedb.db.LanceTable, delete_expression: str) -> dict:
"""Delete existing data using an SQL expression.

:param table_ref: Reference to the LanceDB table.
Expand All @@ -138,8 +139,8 @@ def delete(table_ref: lancedb.db.LanceTable, delete_expression: str) -> Dict:
def vector_search(
table_ref: lancedb.db.LanceTable,
vector_query: VectorType,
columns: Optional[List[str]] = None,
where: Optional[str] = None,
columns: list[str] | None = None,
where: str | None = None,
prefilter_where: bool = False,
limit: int = 10,
) -> pd.DataFrame:
Expand Down Expand Up @@ -169,8 +170,8 @@ def vector_search(
def full_text_search(
table_ref: lancedb.db.LanceTable,
full_text_query: str,
full_text_index: Union[str, List[str]],
where: Optional[str] = None,
full_text_index: str | list[str],
where: str | None = None,
limit: int = 10,
rebuild_index: bool = True,
) -> pd.DataFrame:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import openai


def llm_client(api_key: Optional[str] = None) -> openai.OpenAI:
def llm_client(api_key: str | None = None) -> openai.OpenAI:
"""Create an OpenAI client."""
if api_key is None:
api_key = os.environ.get("OPENAI_API_KEY")
Expand Down
51 changes: 26 additions & 25 deletions contrib/hamilton/contrib/user/zilto/nixtla_mlforecast/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
# under the License.

import logging
from typing import Any, Callable, Iterable, Optional, Union
from collections.abc import Callable, Iterable
from typing import Any, TypeAlias

logger = logging.getLogger(__name__)

Expand All @@ -38,12 +39,12 @@


# sklearn compliant models (including XGBoost and LightGBM) subclass BaseEstimator
MODELS_TYPE = Union[
BaseEstimator, list[BaseEstimator], dict[str, BaseEstimator]
] # equivalent to mlforecast.core.Models
LAG_TRANSFORMS_TYPE = dict[int, list[Union[Callable, tuple[Callable, Any]]]]
DATE_FEATURES_TYPE = Iterable[Union[str, Callable]]
CONFIDENCE_INTERVAL_TYPE = Optional[list[Union[int, float]]]
MODELS_TYPE = (
BaseEstimator | list[BaseEstimator] | dict[str, BaseEstimator]
) # equivalent to mlforecast.core.Models
LAG_TRANSFORMS_TYPE: TypeAlias = dict[int, list[Callable | tuple[Callable, Any]]]
DATE_FEATURES_TYPE: TypeAlias = Iterable[str | Callable]
CONFIDENCE_INTERVAL_TYPE: TypeAlias = list[int | float] | None


def base_models() -> MODELS_TYPE:
Expand Down Expand Up @@ -81,11 +82,11 @@ def date_features() -> DATE_FEATURES_TYPE:

def forecaster(
base_models: MODELS_TYPE,
freq: Union[int, str] = "M",
lags: Optional[list[int]] = None,
lag_transforms: Optional[LAG_TRANSFORMS_TYPE] = None,
date_features: Optional[DATE_FEATURES_TYPE] = None,
target_transforms: Optional[list[BaseTargetTransform]] = None,
freq: int | str = "M",
lags: list[int] | None = None,
lag_transforms: LAG_TRANSFORMS_TYPE | None = None,
date_features: DATE_FEATURES_TYPE | None = None,
target_transforms: list[BaseTargetTransform] | None = None,
num_threads: int = 1,
) -> MLForecast:
"""Create the forecasting harness with data and models
Expand All @@ -106,15 +107,15 @@ def forecaster(
def cross_validation_predictions(
forecaster: MLForecast,
dataset: pd.DataFrame,
static_features: Optional[list[str]] = None,
static_features: list[str] | None = None,
dropna: bool = True,
keep_last_n_inputs: Optional[int] = None,
train_models_for_n_horizons: Optional[int] = None,
keep_last_n_inputs: int | None = None,
train_models_for_n_horizons: int | None = None,
confidence_percentile: CONFIDENCE_INTERVAL_TYPE = None,
cv_n_windows: int = 2,
cv_forecast_horizon: int = 12,
cv_step_size: Optional[int] = None,
cv_input_size: Optional[int] = None,
cv_step_size: int | None = None,
cv_input_size: int | None = None,
cv_refit: bool = True,
cv_save_train_predictions: bool = True,
) -> pd.DataFrame:
Expand Down Expand Up @@ -182,10 +183,10 @@ def best_model_per_series(cross_validation_evaluation: pd.DataFrame) -> pd.Serie
def fitted_forecaster(
forecaster: MLForecast,
dataset: pd.DataFrame,
static_features: Optional[list[str]] = None,
static_features: list[str] | None = None,
dropna: bool = True,
keep_last_n: Optional[int] = None,
train_models_for_n_horizons: Optional[int] = None,
keep_last_n: int | None = None,
train_models_for_n_horizons: int | None = None,
save_train_predictions: bool = True,
) -> MLForecast:
"""Fit models over full dataset"""
Expand All @@ -202,9 +203,9 @@ def fitted_forecaster(
def inference_predictions(
fitted_forecaster: MLForecast,
inference_forecast_horizon: int = 12,
inference_uids: Optional[list[str]] = None,
inference_dataset: Optional[pd.DataFrame] = None,
inference_exogenous: Optional[pd.DataFrame] = None,
inference_uids: list[str] | None = None,
inference_dataset: pd.DataFrame | None = None,
inference_exogenous: pd.DataFrame | None = None,
confidence_percentile: CONFIDENCE_INTERVAL_TYPE = None,
) -> pd.DataFrame:
"""Infer values using the trained models
Expand All @@ -221,8 +222,8 @@ def inference_predictions(

def plotting_config(
plot_max_n_series: int = 4,
plot_uids: Optional[list[str]] = None,
plot_models: Optional[list[str]] = None,
plot_uids: list[str] | None = None,
plot_models: list[str] | None = None,
plot_anomalies: bool = False,
plot_confidence_percentile: CONFIDENCE_INTERVAL_TYPE = None,
plot_engine: str = "matplotlib",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
# under the License.

import logging
from typing import Callable, Optional
from collections.abc import Callable
from typing import Optional

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -155,8 +156,8 @@ def inference_predictions(


def plotting_config(
plot_uids: Optional[list[str]] = None,
plot_models: Optional[list[str]] = None,
plot_uids: list[str] | None = None,
plot_models: list[str] | None = None,
plot_anomalies: bool = False,
plot_confidence_percentile: list[float] = [90.0], # noqa: B006
plot_engine: str = "matplotlib",
Expand Down
Loading
Loading