Reference
octoflow ¶
__version__ = '0.0.56' module-attribute ¶
Experiment ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
name: str instance-attribute ¶
description: Optional[str] instance-attribute ¶
artifact_uri: Optional[str] instance-attribute ¶
start_run(name: str, description: Optional[str] = None) -> Run ¶
search_runs(**kwargs) -> List[Run] ¶
delete_run(run: Union[Run, int]) -> None ¶
Run ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
experiment_id: int instance-attribute ¶
name: str instance-attribute ¶
description: Optional[str] instance-attribute ¶
created_at: Optional[dt.datetime] = None class-attribute instance-attribute ¶
tags: MutableMapping[str, JSONType] = field(init=False) class-attribute instance-attribute ¶
__post_init__() ¶
log_param(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value ¶
log_params(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value] ¶
log_metric(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value ¶
log_metrics(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value] ¶
get_values() -> List[Tuple[Variable, Value]] ¶
TrackingClient ¶
store: TrackingStore property ¶
__init__(store: TrackingStore) -> None ¶
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment ¶
get_experiment_by_name(name: str) -> Optional[Experiment] ¶
get_or_create_experiment(name: str, *, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment ¶
list_experiments() ¶
Value ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
run_id: int instance-attribute ¶
variable_id: int instance-attribute ¶
value: ValueType instance-attribute ¶
timestamp: Optional[dt.datetime] = None class-attribute instance-attribute ¶
step_id: Optional[int] = None class-attribute instance-attribute ¶
Config ¶
Bases: MutableMapping
omconf = config if isinstance(config, OmegaConf) else OmegaConf.structured(config) if is_dataclass(config) else OmegaConf.create(config) instance-attribute ¶
__new__(config: Union[Type[T], dict[str, Any]]) -> Union[T, Self] ¶
__new__(config: Type[T]) -> T
__new__(config: dict[str, Any]) -> Self
__init__(config: Any) -> None ¶
__getitem__(name: str) -> Any ¶
__setitem__(name: str, value: Any) -> None ¶
__delitem__(name: str) -> None ¶
__iter__() -> Any ¶
__len__() -> int ¶
__getattr__(name: str) -> Any ¶
wraps(wrapped: Union[type, callable, str, None] = None, **kwargs: Any) -> Union[ConfigWrapper, functools.partial] ¶
wraps(wrapped: Union[type, callable], **kwargs) -> ConfigWrapper
wraps(wrapped: Union[str, None], **kwargs) -> functools.partial
load(path: str) -> Self classmethod ¶
from_dotlist(dotlist: str) -> Self classmethod ¶
from_cli(args: list[str]) -> Self classmethod ¶
config ¶
config = Config(OctoFlowConfig) module-attribute ¶
CacheConfig dataclass ¶
ResourcesConfig dataclass ¶
LoggingConfig dataclass ¶
level: str = 'INFO' class-attribute instance-attribute ¶
format: str = '%(asctime)s %(levelname)s %(name)s [%(pathname)s:%(lineno)s] %(message)s' class-attribute instance-attribute ¶
__init__(level: str = 'INFO', format: str = '%(asctime)s %(levelname)s %(name)s [%(pathname)s:%(lineno)s] %(message)s') -> None ¶
OctoFlowConfig dataclass ¶
resources: ResourcesConfig = field(default_factory=ResourcesConfig) class-attribute instance-attribute ¶
logging: LoggingConfig = field(default_factory=LoggingConfig) class-attribute instance-attribute ¶
__init__(resources: ResourcesConfig = ResourcesConfig(), logging: LoggingConfig = LoggingConfig()) -> None ¶
constants ¶
data ¶
Dataset ¶
Bases: BaseDataset
cache_dir = cache_dir instance-attribute ¶
path: Path property ¶
The path to the dataset.
Returns:
| Type | Description |
|---|---|
Path | The path to the dataset. |
format: str property ¶
The format of the dataset.
Returns:
| Type | Description |
|---|---|
str | The format of the dataset. |
columns: List[str] property ¶
Get the names of the columns in the dataset.
Returns:
| Type | Description |
|---|---|
List[str] | The names of the columns in the dataset. |
__init__(data_or_loader: Union[List[dict], Dict[str, list], DataFrame, DatasetLoader, str] = None, format: str = DEFAULT_FORMAT, *, schema: Union[pa.Schema, BaseModel, None] = None, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None, loader_args: Optional[Tuple[Any, ...]] = None, loader_kwargs: Optional[Dict[str, Any]] = None, force: bool = False) ¶
__init__(data: Union[List[dict], Dict[str, list], DataFrame] = None, format: str = DEFAULT_FORMAT)
__init__(data: Union[List[dict], Dict[str, list], DataFrame], format: str = DEFAULT_FORMAT, *, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None)
__init__(loader: DatasetLoader, format: str = DEFAULT_FORMAT, *, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None, loader_args: Optional[Tuple[Any, ...]] = None, loader_kwargs: Optional[Dict[str, Any]] = None, force: bool = False)
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
data_or_loader | list of dict, dict of list, DataFrame, BaseDatasetLoader, str | The data to load into the dataset or the (name of) loader to use. | None |
format | str | The format of the dataset. | DEFAULT_FORMAT |
path | (str, Path, None) | Load the data to this path. | None |
cache_dir | (str, Path, None) | The directory to use for caching. | None |
loader_args | (tuple, None) | The arguments to pass to the loader function if provided as the first argument. | None |
loader_kwargs | (dict, None) | The keyword arguments to pass to the loader function if provided as the first argument. | None |
count_rows() -> int ¶
Count the number of rows in the dataset.
Returns:
| Type | Description |
|---|---|
int | The number of rows in the dataset. |
__len__() -> int ¶
Get the number of rows in the dataset.
Returns:
| Type | Description |
|---|---|
int | The number of rows in the dataset. |
head(num_rows: int = 5, columns: Union[str, List[str], None] = None, filter: Expression = None, batch_size: int = DEFAULT_BATCH_SIZE) -> DataFrame ¶
Get the first rows of the dataset as a pandas DataFrame.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
num_rows | int | The number of rows to get. | 5 |
columns | str, list of str, None | Names of columns to get. If None, all columns are returned. | None |
filter | Expression | The filter expression. | None |
batch_size | int | Number of rows to get at a time. | DEFAULT_BATCH_SIZE |
Returns:
| Type | Description |
|---|---|
DataFrame | A pandas DataFrame containing the first rows of the dataset. |
__getitem__(indices: Union[int, slice, List[int], ArrayLike]) -> Union[dict, pa.Table] ¶
__getitem__(indices: int) -> Dict[str, Any]
__getitem__(indices: Union[slice, List[int], ArrayLike]) -> pa.Table
Get rows from the dataset.
take(*, indices: Union[int, slice, List[int], ArrayLike] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> Union[dict, pa.Table] ¶
take(*, indices: Optional[int] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> Dict[str, Any]
take(*, indices: Union[slice, List[int], ArrayLike] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> DataFrame
Take rows(/columns) from the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
indices | int, slice, list of int, array-like | Indices of rows to take. | None |
columns | str, list of str, None | Names of columns to take. If None, all columns are taken. | None |
batch_size | int | Number of rows to take at a time. | DEFAULT_BATCH_SIZE |
Returns:
| Type | Description |
|---|---|
(Document, Table) | The taken rows or row. |
map(func: Any, batch_size: int = DEFAULT_BATCH_SIZE, batched: bool = False, keep_cols: Union[bool, List[str], None] = True, exclude_cols: Union[List[str], None] = None, verbose: Union[bool, int] = 1) -> Dataset ¶
Map a function over the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
func | Any | The function to map over the dataset. | required |
batch_size | int | Number of rows to map at a time. | DEFAULT_BATCH_SIZE |
batched | bool | Whether the function is batched. | False |
verbose | bool | int | Whether to show a progress bar. | 1 |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset containing the mapped rows. |
filter(expression: Expression = None) -> Dataset ¶
Filter the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
expression | Expression | The filter expression. | None |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset containing only the rows that match the filter expression. |
select(columns: Union[str, List[str]], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset ¶
Select columns from the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
columns | str, list of str | Names of columns to select. | required |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset containing only the selected columns. |
rename(columns: Dict[str, str], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset ¶
Rename columns in the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
columns | dict | Mapping of old column names to new column names. | required |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset with the columns renamed. |
project(columns: Union[Dict[str, Expression], Dict[str, str], List[str]], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset ¶
Project columns in the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
columns | dict | Mapping of column names to expressions. | required |
batch_size | int | Number of rows to project at a time. | DEFAULT_BATCH_SIZE |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset with the columns projected. |
load_dataset(path: Union[Path, str], format: str = DEFAULT_FORMAT, cache_dir: Union[Path, str, None] = None) -> Dataset classmethod ¶
Load an existing dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | (str, Path) | The path to the dataset. | required |
format | str | The format of the dataset. | DEFAULT_FORMAT |
Returns:
| Type | Description |
|---|---|
Dataset | The loaded dataset. |
to_polars() -> pl.LazyFrame ¶
Convert the dataset to a Polars DataFrame.
Returns:
| Type | Description |
|---|---|
LazyFrame | The Polars Lazy DataFrame. |
Expression ¶
Bases: BaseExpression
A class representing an expression in Octoflow.
__init__(expression: Union[Expression, ds.Expression]) ¶
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
expression | Union[Expression, Expression] | The (pyarrow) expression to wrap. | required |
__eq__(other: Any) -> Expression ¶
Compare two expressions for equality.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__ne__(other: Any) -> Expression ¶
Compare two expressions for inequality.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__lt__(other: Any) -> Expression ¶
Compare two expressions for less than.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__le__(other: Any) -> Expression ¶
Compare two expressions for less than or equal to.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__gt__(other: Any) -> Expression ¶
Compare two expressions for greater than.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__ge__(other: Any) -> Expression ¶
Compare two expressions for greater than or equal to.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__and__(other: Any) -> Expression ¶
Combine two expressions with a logical and.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to combine with. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the combination. |
__or__(other: Any) -> Expression ¶
Combine two expressions with a logical or.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to combine with. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the combination. |
__invert__() -> Expression ¶
Invert an expression.
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the inverted expression. |
is_nan() -> Expression ¶
Check if an expression is NaN.
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
is_null(nan_is_null: bool = False) ¶
Check if an expression is null.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
nan_is_null | bool | Whether to consider NaN values as null, by default False | False |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
is_valid() -> Expression ¶
Check if an expression is valid.
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
isin(other: Expression) -> Expression ¶
Check if an expression is in a set of values.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Expression | The set of values to check against. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
equals(other: Expression) -> Expression ¶
Check if an expression is equal to another expression.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Expression | The other expression to check against. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
__hash__() -> int ¶
Get the hash of the expression.
Returns:
| Type | Description |
|---|---|
int | The hash of the expression. |
__repr__() -> str ¶
Get the representation of the expression.
Returns:
| Type | Description |
|---|---|
str | The representation of the expression. |
field(*args, **kwargs) -> Field ¶
Create a new field getter.
scalar(value: Any) -> Expression ¶
Create an expression from a scalar.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
value | Any | The value of the scalar. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the scalar. |
dataloader(func: Union[F, str, None] = None, name: Optional[str] = None, extensions: Optional[list[str]] = None, wraps: Optional[Callable[..., Any]] = None, path_arg: Optional[str] = None) -> Union[F, Callable[[F], F]] ¶
dataloader(func: F, name: Optional[str] = None, extensions: Optional[list[str]] = None, wraps: Optional[Callable[P, R]] = None, path_arg: Optional[str] = None) -> F
dataloader(name: str, extensions: Optional[list[str]] = None, wraps: Optional[Callable[P, R]] = None, path_arg: Optional[str] = None) -> Callable[[F], F]
Decorator to register a function as a dataset loader.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
func | Union[Callable[..., Any], str, None] | The function to decorate, by default None. | None |
name | Optional[str] | The name of the loader, by default None. | None |
extensions | Optional[list[str]] | The extensions that the loader supports, by default None. | None |
wraps | Optional[Callable[..., Any]] | The function to wrap, by default None. | None |
path_arg | Optional[str] | The name of the argument that is the path, by default None. | None |
Returns:
| Type | Description |
|---|---|
DatasetLoader | The dataset loader. |
load_dataset(__loader: str, __path: Optional[str], __force: bool = False, __dataset_format: str = DEFAULT_FORMAT, __dataset_path: Union[Path, str, None] = None, /, *args, **kwargs) -> Dataset ¶
Load a dataset from a path.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
__loader | str | The name of the loader. | required |
__path | Optional[str] | The path to the data (to be passed to the loader). | required |
__dataset_format | str | The format of the dataset, by default DEFAULT_FORMAT. | DEFAULT_FORMAT |
__dataset_path | Union[Path, str, None] | The path that the dataset will be stored. | None |
*args | tuple | The arguments to pass to the loader. | () |
**kwargs | dict | The keyword arguments to pass to the loader. | {} |
Returns:
| Type | Description |
|---|---|
Dataset | The loaded dataset. |
base ¶
ArrowType = TypeVar('ArrowType') module-attribute ¶
P = ParamSpec('P') module-attribute ¶
R = TypeVar('R') module-attribute ¶
DEFAULT_BATCH_SIZE: Final[int] = 1048576 module-attribute ¶
DEFAULT_FORMAT: Final[str] = 'arrow' module-attribute ¶
BaseExpression = PyArrowWrapper[ds.Expression] module-attribute ¶
BaseDataset = PyArrowWrapper[ds.Dataset] module-attribute ¶
PyArrowWrapper ¶
dataclass ¶
T = TypeVar('T') module-attribute ¶
Field ¶
Bases: Field, Expression
name = name instance-attribute ¶
__init__(name: Optional[str] = None, *, default=dc.MISSING, default_factory=dc.MISSING, init=True, repr=True, hash=None, compare=True, metadata=None, kw_only=dc.MISSING) ¶
__call__(data: Mapping[str, Any]) -> Any ¶
Get the value of the field.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
data | dict | The data to be accessed. | required |
FieldAccessor ¶
ModelMeta ¶
field(*args, **kwargs) -> Field ¶
Create a new field getter.
field_from_dataclass_field(field: dc.Field) -> Field ¶
Create a new field getter.
fields(cls: Type[T]) -> Union[FieldAccessor[T], Type[T]] ¶
dataset ¶
SourceType = Union[str, List[str], Union[Path, List[Path]], 'Dataset', List['Dataset']] module-attribute ¶
Dataset ¶
Bases: BaseDataset
cache_dir = cache_dir instance-attribute ¶
path: Path property ¶
The path to the dataset.
Returns:
| Type | Description |
|---|---|
Path | The path to the dataset. |
format: str property ¶
The format of the dataset.
Returns:
| Type | Description |
|---|---|
str | The format of the dataset. |
columns: List[str] property ¶
Get the names of the columns in the dataset.
Returns:
| Type | Description |
|---|---|
List[str] | The names of the columns in the dataset. |
__init__(data_or_loader: Union[List[dict], Dict[str, list], DataFrame, DatasetLoader, str] = None, format: str = DEFAULT_FORMAT, *, schema: Union[pa.Schema, BaseModel, None] = None, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None, loader_args: Optional[Tuple[Any, ...]] = None, loader_kwargs: Optional[Dict[str, Any]] = None, force: bool = False) ¶
__init__(data: Union[List[dict], Dict[str, list], DataFrame] = None, format: str = DEFAULT_FORMAT)
__init__(data: Union[List[dict], Dict[str, list], DataFrame], format: str = DEFAULT_FORMAT, *, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None)
__init__(loader: DatasetLoader, format: str = DEFAULT_FORMAT, *, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None, loader_args: Optional[Tuple[Any, ...]] = None, loader_kwargs: Optional[Dict[str, Any]] = None, force: bool = False)
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
data_or_loader | list of dict, dict of list, DataFrame, BaseDatasetLoader, str | The data to load into the dataset or the (name of) loader to use. | None |
format | str | The format of the dataset. | DEFAULT_FORMAT |
path | (str, Path, None) | Load the data to this path. | None |
cache_dir | (str, Path, None) | The directory to use for caching. | None |
loader_args | (tuple, None) | The arguments to pass to the loader function if provided as the first argument. | None |
loader_kwargs | (dict, None) | The keyword arguments to pass to the loader function if provided as the first argument. | None |
count_rows() -> int ¶
Count the number of rows in the dataset.
Returns:
| Type | Description |
|---|---|
int | The number of rows in the dataset. |
__len__() -> int ¶
Get the number of rows in the dataset.
Returns:
| Type | Description |
|---|---|
int | The number of rows in the dataset. |
head(num_rows: int = 5, columns: Union[str, List[str], None] = None, filter: Expression = None, batch_size: int = DEFAULT_BATCH_SIZE) -> DataFrame ¶
Get the first rows of the dataset as a pandas DataFrame.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
num_rows | int | The number of rows to get. | 5 |
columns | str, list of str, None | Names of columns to get. If None, all columns are returned. | None |
filter | Expression | The filter expression. | None |
batch_size | int | Number of rows to get at a time. | DEFAULT_BATCH_SIZE |
Returns:
| Type | Description |
|---|---|
DataFrame | A pandas DataFrame containing the first rows of the dataset. |
__getitem__(indices: Union[int, slice, List[int], ArrayLike]) -> Union[dict, pa.Table] ¶
__getitem__(indices: int) -> Dict[str, Any]
__getitem__(indices: Union[slice, List[int], ArrayLike]) -> pa.Table
Get rows from the dataset.
take(*, indices: Union[int, slice, List[int], ArrayLike] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> Union[dict, pa.Table] ¶
take(*, indices: Optional[int] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> Dict[str, Any]
take(*, indices: Union[slice, List[int], ArrayLike] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> DataFrame
Take rows(/columns) from the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
indices | int, slice, list of int, array-like | Indices of rows to take. | None |
columns | str, list of str, None | Names of columns to take. If None, all columns are taken. | None |
batch_size | int | Number of rows to take at a time. | DEFAULT_BATCH_SIZE |
Returns:
| Type | Description |
|---|---|
(Document, Table) | The taken rows or row. |
map(func: Any, batch_size: int = DEFAULT_BATCH_SIZE, batched: bool = False, keep_cols: Union[bool, List[str], None] = True, exclude_cols: Union[List[str], None] = None, verbose: Union[bool, int] = 1) -> Dataset ¶
Map a function over the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
func | Any | The function to map over the dataset. | required |
batch_size | int | Number of rows to map at a time. | DEFAULT_BATCH_SIZE |
batched | bool | Whether the function is batched. | False |
verbose | bool | int | Whether to show a progress bar. | 1 |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset containing the mapped rows. |
filter(expression: Expression = None) -> Dataset ¶
Filter the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
expression | Expression | The filter expression. | None |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset containing only the rows that match the filter expression. |
select(columns: Union[str, List[str]], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset ¶
Select columns from the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
columns | str, list of str | Names of columns to select. | required |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset containing only the selected columns. |
rename(columns: Dict[str, str], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset ¶
Rename columns in the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
columns | dict | Mapping of old column names to new column names. | required |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset with the columns renamed. |
project(columns: Union[Dict[str, Expression], Dict[str, str], List[str]], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset ¶
Project columns in the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
columns | dict | Mapping of column names to expressions. | required |
batch_size | int | Number of rows to project at a time. | DEFAULT_BATCH_SIZE |
Returns:
| Type | Description |
|---|---|
Dataset | A new dataset with the columns projected. |
load_dataset(path: Union[Path, str], format: str = DEFAULT_FORMAT, cache_dir: Union[Path, str, None] = None) -> Dataset classmethod ¶
Load an existing dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | (str, Path) | The path to the dataset. | required |
format | str | The format of the dataset. | DEFAULT_FORMAT |
Returns:
| Type | Description |
|---|---|
Dataset | The loaded dataset. |
to_polars() -> pl.LazyFrame ¶
Convert the dataset to a Polars DataFrame.
Returns:
| Type | Description |
|---|---|
LazyFrame | The Polars Lazy DataFrame. |
gen_unique_cached_path(*refs: Any, cache_dir: Union[str, Path, None] = None) -> Path ¶
writable(data: Any, schema: Optional[pa.Schema] = None) -> Union[pa.RecordBatch, pa.Table, pa.RecordBatchReader] ¶
write_dataset(path: Union[str, Path], data: Union[ds.Dataset, pa.Table, pa.RecordBatch, Iterable[pa.RecordBatch], pa.RecordBatchReader, pd.DataFrame, Mapping[str, List[Any]], Sequence[Mapping[str, Any]]], schema: pa.Schema = None, format: Optional[str] = None) -> bool ¶
read_dataset(path: Union[str, Path], format: str) -> ds.dataset ¶
to_batches(data: Union[pa.Table, pa.RecordBatch, Iterable[pa.RecordBatch], Iterable[pa.Table], pa.RecordBatchReader]) -> Generator[pa.RecordBatch, None, None] ¶
create_mapped_table(data: Union[dict, list, pd.DataFrame, pa.RecordBatch, pa.Table], existing: Optional[pa.Table] = None, keep_cols: Union[bool, List[str], None] = True, exclude_cols: Optional[List[str]] = None) -> pa.Table ¶
expression ¶
Expression ¶
Bases: BaseExpression
A class representing an expression in Octoflow.
__init__(expression: Union[Expression, ds.Expression]) ¶
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
expression | Union[Expression, Expression] | The (pyarrow) expression to wrap. | required |
__eq__(other: Any) -> Expression ¶
Compare two expressions for equality.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__ne__(other: Any) -> Expression ¶
Compare two expressions for inequality.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__lt__(other: Any) -> Expression ¶
Compare two expressions for less than.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__le__(other: Any) -> Expression ¶
Compare two expressions for less than or equal to.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__gt__(other: Any) -> Expression ¶
Compare two expressions for greater than.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__ge__(other: Any) -> Expression ¶
Compare two expressions for greater than or equal to.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to compare to. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the comparison. |
__and__(other: Any) -> Expression ¶
Combine two expressions with a logical and.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to combine with. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the combination. |
__or__(other: Any) -> Expression ¶
Combine two expressions with a logical or.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Any | The other expression to combine with. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the combination. |
__invert__() -> Expression ¶
Invert an expression.
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the inverted expression. |
is_nan() -> Expression ¶
Check if an expression is NaN.
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
is_null(nan_is_null: bool = False) ¶
Check if an expression is null.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
nan_is_null | bool | Whether to consider NaN values as null, by default False | False |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
is_valid() -> Expression ¶
Check if an expression is valid.
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
isin(other: Expression) -> Expression ¶
Check if an expression is in a set of values.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Expression | The set of values to check against. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
equals(other: Expression) -> Expression ¶
Check if an expression is equal to another expression.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other | Expression | The other expression to check against. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the result of the check. |
__hash__() -> int ¶
Get the hash of the expression.
Returns:
| Type | Description |
|---|---|
int | The hash of the expression. |
__repr__() -> str ¶
Get the representation of the expression.
Returns:
| Type | Description |
|---|---|
str | The representation of the expression. |
scalar(value: Any) -> Expression ¶
Create an expression from a scalar.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
value | Any | The value of the scalar. | required |
Returns:
| Type | Description |
|---|---|
Expression | The expression representing the scalar. |
loaders ¶
P = ParamSpec('P') module-attribute ¶
R = TypeVar('R') module-attribute ¶
F = TypeVar('F', bound=Callable[..., Any]) module-attribute ¶
loaders: Dict[str, DatasetLoader] = {} module-attribute ¶
DatasetLoader ¶
Bases: BaseDatasetLoader
func = func instance-attribute ¶
name = name or self.func.__name__ instance-attribute ¶
extensions = extensions instance-attribute ¶
path_arg = path_arg instance-attribute ¶
wraps = wraps instance-attribute ¶
__init__(func: Callable[..., Any], name: Optional[str] = None, extensions: Optional[list[str]] = None, path_arg: Optional[str] = None, wraps: Optional[Callable[P, R]] = None) ¶
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
func | Callable[..., Any] | The function to decorate. | required |
name | Optional[str] | The name of the loader, by default None. | None |
extensions | Optional[list[str]] | The extensions that the loader supports, by default None. | None |
path_arg | Optional[str] | The name of the argument that is the path, by default None. | None |
wraps | Optional[Callable[..., Any]] | The function to wrap, by default None. | None |
__call__(*args: P.args, **kwargs: P.kwargs) -> R ¶
Call the loader function.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
args | tuple | The arguments to pass to the function. | () |
kwargs | dict | The keyword arguments to pass to the function. | {} |
Returns:
| Type | Description |
|---|---|
R | The result of the function. |
bind(*args: P.args, **kwargs: P.kwargs) -> Callable[..., R] ¶
Bind arguments to the loader function.
Notes
This method is useful for creating a partial function with pre-filled arguments and keyword arguments. This helps to improve the uniqueness of the fingerprint of the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
args | tuple | The arguments to pre-fill. | () |
kwargs | dict | The keyword arguments to pre-fill. | {} |
Returns:
| Type | Description |
|---|---|
Callable[..., R] | The partial function. |
dataloader(func: Union[F, str, None] = None, name: Optional[str] = None, extensions: Optional[list[str]] = None, wraps: Optional[Callable[..., Any]] = None, path_arg: Optional[str] = None) -> Union[F, Callable[[F], F]] ¶
dataloader(func: F, name: Optional[str] = None, extensions: Optional[list[str]] = None, wraps: Optional[Callable[P, R]] = None, path_arg: Optional[str] = None) -> F
dataloader(name: str, extensions: Optional[list[str]] = None, wraps: Optional[Callable[P, R]] = None, path_arg: Optional[str] = None) -> Callable[[F], F]
Decorator to register a function as a dataset loader.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
func | Union[Callable[..., Any], str, None] | The function to decorate, by default None. | None |
name | Optional[str] | The name of the loader, by default None. | None |
extensions | Optional[list[str]] | The extensions that the loader supports, by default None. | None |
wraps | Optional[Callable[..., Any]] | The function to wrap, by default None. | None |
path_arg | Optional[str] | The name of the argument that is the path, by default None. | None |
Returns:
| Type | Description |
|---|---|
DatasetLoader | The dataset loader. |
load_json(path: Union[str, Path], encoding: str = 'utf-8') -> Generator[List[Dict], None, None] ¶
Load a dataset from a JSON file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | (str, Path) | The path to the file. | required |
encoding | str | The encoding of the file, by default "utf-8". | 'utf-8' |
Returns:
| Type | Description |
|---|---|
dict | The loaded dataset. |
load_jsonl(path: Union[str, Path], encoding: str = 'utf-8') -> Generator[List[Dict], None, None] ¶
Load a dataset from a JSONL file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | (str, Path) | The path to the file. | required |
encoding | str | The encoding of the file, by default "utf-8". | 'utf-8' |
Returns:
| Type | Description |
|---|---|
list[dict] | The loaded dataset. |
load_csv(path: Union[str, Path], encoding: str = 'utf-8') -> Generator[List[Dict], None, None] ¶
Load a dataset from a CSV/TSV file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | (str, Path) | The path to the file. | required |
encoding | str | The encoding of the file, by default "utf-8". | 'utf-8' |
Returns:
| Type | Description |
|---|---|
list[dict] | The loaded dataset. |
sampler ¶
schema ¶
T = TypeVar('T') module-attribute ¶
unify_schemas(this: pa.Schema, other: Optional[pa.Schema]) -> pa.Schema ¶
infer_schema(data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> Self ¶
validate(schema: pa.Schema, data: dict) -> bool ¶
Validates a dictionary against a PyArrow schema.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
schema | Schema | The PyArrow schema to validate against. | required |
data | dict | The dictionary to validate. | required |
Raises:
| Type | Description |
|---|---|
ValidationError | If the dictionary does not match the schema. |
Examples:
>>> schema = pa.schema([pa.field('id', pa.int64()), pa.field('name', pa.string())])
>>> valid_dict = {'id': 1, 'name': 'Alice'}
>>> validate(schema, valid_dict)
>>> invalid_dict = {'id': '1', 'name': 'Alice'}
>>> validate(schema, invalid_dict)
Traceback (most recent call last):
...
ValidationError: ...
get_schema(data: T) -> Tuple[T, pa.Schema] ¶
Extracts the schema from a PyArrow schema or a generator of PyArrow record batches.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
data | Any | The PyArrow schema or generator of record batches. | required |
Returns:
| Type | Description |
|---|---|
Tuple[Any, Schema] | The data and the schema. |
from_dataclass(cls: T) -> pa.Schema ¶
Converts a dataclass to a PyArrow schema.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
cls | Type[T] | The dataclass to convert. | required |
Returns:
| Type | Description |
|---|---|
Schema | The PyArrow schema. |
Examples:
>>> import dataclasses
>>> @dataclasses.dataclass
... class Record:
... id: int
... name: str
>>> from_dataclass(Record)
pyarrow.Schema([...])
get_schema_from_dataclass(*args, **kwargs) -> pa.Schema ¶
Alias for from_dataclass.
Examples:
>>> import dataclasses
>>> @dataclasses.dataclass
... class Record:
... id: int
... name: str
>>> get_schema_from_dataclass(Record)
pyarrow.Schema([...])
types ¶
UNDEFINED = undefined() module-attribute ¶
MonthDayNano ¶
Undefined ¶
undefined() -> Undefined ¶
is_undefined(obj: pa.DataType) -> bool ¶
from_dataclass(cls: type) -> pa.DataType ¶
Return the PyArrow data type of a dataclass.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
cls | type | The dataclass. | required |
Returns:
| Type | Description |
|---|---|
DataType | The PyArrow data type. |
from_typed_dict(cls: _TypedDictMeta) -> pa.DataType ¶
Return the PyArrow data type of a TypedDict.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
cls | _TypedDictMeta | The TypedDict. | required |
Returns:
| Type | Description |
|---|---|
DataType | The PyArrow data type. |
from_union(args: tuple[type, ...]) -> pa.DataType ¶
from_dtype(dtype: Union[type, np.dtype, None]) -> pa.DataType ¶
Return the PyArrow data type of a provided native/NumPy data type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dtype | type | dtype | None | The native or NumPy data type. | required |
Returns:
| Type | Description |
|---|---|
DataType | The PyArrow data type. |
unify_types(left: pa.DataType, right: pa.DataType) -> pa.DataType ¶
Return the PyArrow data type that can represent both left and right.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
left | DataType | The left PyArrow data type. | required |
right | DataType | The right PyArrow data type. | required |
Returns:
| Type | Description |
|---|---|
DataType | The PyArrow data type. |
infer_type(obj: Any) -> pa.DataType ¶
Return the PyArrow data type of an object.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
obj | Any | The object. | required |
Returns:
| Type | Description |
|---|---|
DataType | The PyArrow data type. |
logging ¶
CRITICAL = logging.CRITICAL module-attribute ¶
FATAL = CRITICAL module-attribute ¶
ERROR = logging.ERROR module-attribute ¶
WARNING = logging.WARNING module-attribute ¶
WARN = WARNING module-attribute ¶
INFO = logging.INFO module-attribute ¶
DEBUG = logging.DEBUG module-attribute ¶
NOTSET = logging.NOTSET module-attribute ¶
set_level(level: Union[int, str], logger: Optional[logging.Logger] = None) ¶
Set the logging level of the logger.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
logger | Logger | Logger instance. | None |
level | int or str | Logging level. | required |
plugin ¶
Package ¶
name = name instance-attribute ¶
modules = modules instance-attribute ¶
__init__(name: str, modules: list) ¶
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
name | str | Name of the package. | required |
modules | list | List of modules to expose. Each module can be a If a If a | required |
import_modules() ¶
Import all modules in the package.
The modules are imported in the order they are defined in the package.
Returns:
| Type | Description |
|---|---|
None | The modules are imported silently. If an error occurs, it is raised. |
Raises:
| Type | Description |
|---|---|
TypeError | If a module is not a |
ImportError | If a module cannot be imported. |
project ¶
Project ¶
project ¶
ProjectExperiment ¶
ProjectExperimentDict ¶
Bases: Mapping[str, ProjectExperiment]
get_project = weakref.ref(project) instance-attribute ¶
project: Project property ¶
experiments_path: Path property ¶
names: Set[str] property ¶
__init__(project: Project) -> None ¶
__iter__() ¶
__getitem__(key: str) -> ProjectExperiment ¶
__contains__(key: str) -> bool ¶
__len__() -> int ¶
__repr__() -> str ¶
first() -> ProjectExperiment ¶
Project ¶
update_project_gitgnore(path: Path) -> None ¶
tracking ¶
Experiment ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
name: str instance-attribute ¶
description: Optional[str] instance-attribute ¶
artifact_uri: Optional[str] instance-attribute ¶
start_run(name: str, description: Optional[str] = None) -> Run ¶
search_runs(**kwargs) -> List[Run] ¶
delete_run(run: Union[Run, int]) -> None ¶
Run ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
experiment_id: int instance-attribute ¶
name: str instance-attribute ¶
description: Optional[str] instance-attribute ¶
created_at: Optional[dt.datetime] = None class-attribute instance-attribute ¶
tags: MutableMapping[str, JSONType] = field(init=False) class-attribute instance-attribute ¶
__post_init__() ¶
log_param(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value ¶
log_params(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value] ¶
log_metric(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value ¶
log_metrics(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value] ¶
get_values() -> List[Tuple[Variable, Value]] ¶
TrackingClient ¶
store: TrackingStore property ¶
__init__(store: TrackingStore) -> None ¶
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment ¶
get_experiment_by_name(name: str) -> Optional[Experiment] ¶
get_or_create_experiment(name: str, *, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment ¶
list_experiments() ¶
Value ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
run_id: int instance-attribute ¶
variable_id: int instance-attribute ¶
value: ValueType instance-attribute ¶
timestamp: Optional[dt.datetime] = None class-attribute instance-attribute ¶
step_id: Optional[int] = None class-attribute instance-attribute ¶
SQLAlchemyTrackingStore ¶
Bases: TrackingStore
SQLAlchemy tracking store.
This class is used to define the interface for tracking store.
lock: Optional[FileLock] = lockfile instance-attribute ¶
engine = create_engine(url) instance-attribute ¶
__init__(url: Union[str, URL] = 'sqlite:///:memory:') ¶
create_all(checkfirst: bool = True) ¶
session() ¶
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment ¶
list_experiments() -> List[Experiment] ¶
get_experiment(experiment_id: int) -> Experiment ¶
get_experiment_by_name(name: str) -> Experiment ¶
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run ¶
delete_run(experiment_id: int, run_id: int) -> None ¶
search_runs(experiment_id: int, expression: Optional[ColumnExpressionArgument[bool]] = None) -> List[Run] ¶
list_runs(experiment_id: int) -> List[Run] ¶
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags ¶
get_tag(run_id: int, name: str) -> JSONType ¶
get_tags(run_id: int) -> Dict[str, JSONType] ¶
count_tags(run_id: int) -> int ¶
delete_tag(run_id: int, name: str) -> RunTags ¶
log_value(run_id: int, key: str, value: ValueType, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, value_id: Optional[int] = None, is_step: Optional[bool] = None) -> Value ¶
get_values(run_id: int) -> List[Tuple[Variable, Value]] ¶
TrackingStore ¶
Abstract class for tracking store.
This class is used to define the interface for tracking store.
__enter__() ¶
__exit__(exc_type, exc_value, traceback) ¶
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment abstractmethod ¶
list_experiments() -> List[Experiment] abstractmethod ¶
get_experiment(experiment_id: int) -> Experiment abstractmethod ¶
get_experiment_by_name(name: str) -> Experiment abstractmethod ¶
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run abstractmethod ¶
delete_run(experiment_id: int, run_id: int) -> None abstractmethod ¶
list_runs(experiment_id: int) -> List[Run] abstractmethod ¶
search_runs(experiment_id: int, **kwargs) -> List[Run] abstractmethod ¶
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags abstractmethod ¶
get_tag(run_id: int, name: str) -> JSONType abstractmethod ¶
get_tags(run_id: int) -> Dict[str, JSONType] abstractmethod ¶
count_tags(run_id: int) -> int abstractmethod ¶
delete_tag(run_id: int, name: str) -> RunTags abstractmethod ¶
log_value(run_id: int, key: str, value: str, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, is_step: Optional[bool] = None) -> Value abstractmethod ¶
log_values(run_id: int, values: List[Union[ValueMapping, ValueTuple, Value]], *, step_id: Optional[int] = None, type: Optional[VariableType] = None) -> List[Value] ¶
get_values(run_id: int) -> List[Tuple[Variable, Value]] abstractmethod ¶
import_store(other: TrackingStore) ¶
artifact ¶
handler ¶
ArtifactMetadata ¶
Bases: MutableDict[str, Any]
ArtifactHandler ¶
path: Path = Path(path) instance-attribute ¶metadata = ArtifactMetadata(self) instance-attribute ¶__init__(path: Union[str, Path]) -> None ¶Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | str | The path to the artifact | required |
load() -> Any abstractmethod ¶Load the artifact from the path.
Returns:
| Type | Description |
|---|---|
Any | The loaded artifact. |
save(obj: Any, *args, **kwargs) abstractmethod ¶Save the given artifact to the path.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
obj | Any | The artifact to save. | required |
args | tuple | Additional positional arguments. | () |
kwargs | dict | Additional keyword arguments. | {} |
Returns:
| Type | Description |
|---|---|
None | |
can_handle(obj: object) -> bool abstractmethod classmethod ¶Return True if this handler can handle the given object type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
obj | object | The object to check. | required |
Returns:
| Type | Description |
|---|---|
bool | True if this handler can handle the given object type. |
exists() -> bool ¶Return True if the artifact exists.
Returns:
| Type | Description |
|---|---|
bool | True if the artifact exists. |
unlink() ¶Unlink/delete the artifact.
Returns:
| Type | Description |
|---|---|
None | None |
get_handler_type(name: str) -> Type[ArtifactHandler] ¶
get_handler_type_by_object(obj: Any) -> Type[ArtifactHandler] ¶
list_handler_types() -> List[str] ¶
models ¶
JSONType = Union[str, int, float, bool, None, Dict[str, Any], List[Any]] module-attribute ¶
TrackingStore ¶
Abstract class for tracking store.
This class is used to define the interface for tracking store.
__enter__() ¶
__exit__(exc_type, exc_value, traceback) ¶
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment abstractmethod ¶
list_experiments() -> List[Experiment] abstractmethod ¶
get_experiment(experiment_id: int) -> Experiment abstractmethod ¶
get_experiment_by_name(name: str) -> Experiment abstractmethod ¶
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run abstractmethod ¶
delete_run(experiment_id: int, run_id: int) -> None abstractmethod ¶
list_runs(experiment_id: int) -> List[Run] abstractmethod ¶
search_runs(experiment_id: int, **kwargs) -> List[Run] abstractmethod ¶
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags abstractmethod ¶
get_tag(run_id: int, name: str) -> JSONType abstractmethod ¶
get_tags(run_id: int) -> Dict[str, JSONType] abstractmethod ¶
count_tags(run_id: int) -> int abstractmethod ¶
delete_tag(run_id: int, name: str) -> RunTags abstractmethod ¶
log_value(run_id: int, key: str, value: str, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, is_step: Optional[bool] = None) -> Value abstractmethod ¶
log_values(run_id: int, values: List[Union[ValueMapping, ValueTuple, Value]], *, step_id: Optional[int] = None, type: Optional[VariableType] = None) -> List[Value] ¶
get_values(run_id: int) -> List[Tuple[Variable, Value]] abstractmethod ¶
import_store(other: TrackingStore) ¶
TrackingClient ¶
store: TrackingStore property ¶
__init__(store: TrackingStore) -> None ¶
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment ¶
get_experiment_by_name(name: str) -> Optional[Experiment] ¶
get_or_create_experiment(name: str, *, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment ¶
list_experiments() ¶
Experiment ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
name: str instance-attribute ¶
description: Optional[str] instance-attribute ¶
artifact_uri: Optional[str] instance-attribute ¶
start_run(name: str, description: Optional[str] = None) -> Run ¶
search_runs(**kwargs) -> List[Run] ¶
delete_run(run: Union[Run, int]) -> None ¶
TagsMapping ¶
Run ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
experiment_id: int instance-attribute ¶
name: str instance-attribute ¶
description: Optional[str] instance-attribute ¶
created_at: Optional[dt.datetime] = None class-attribute instance-attribute ¶
tags: MutableMapping[str, JSONType] = field(init=False) class-attribute instance-attribute ¶
__post_init__() ¶
log_param(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value ¶
log_params(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value] ¶
log_metric(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value ¶
log_metrics(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value] ¶
get_values() -> List[Tuple[Variable, Value]] ¶
Variable ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
experiment_id: int instance-attribute ¶
key: str instance-attribute ¶
parent_id: Optional[int] instance-attribute ¶
type: Optional[VariableType] = None class-attribute instance-attribute ¶
is_step: Optional[bool] = None class-attribute instance-attribute ¶
Value ¶
Bases: StoredModel
id: int = field(init=False) class-attribute instance-attribute ¶
run_id: int instance-attribute ¶
variable_id: int instance-attribute ¶
value: ValueType instance-attribute ¶
timestamp: Optional[dt.datetime] = None class-attribute instance-attribute ¶
step_id: Optional[int] = None class-attribute instance-attribute ¶
RunTags ¶
Bases: StoredModel
Tag ¶
Bases: StoredModel
sqlalchemy_store ¶
mapper_registry = registry() module-attribute ¶
variable_constraints = (Index('ix_experiment_id_key', Variable.experiment_id, Variable.key, case((Variable.parent_id.is_(None), '<NULL>'), else_=Variable.parent_id), unique=True),) module-attribute ¶
SQLAlchemyTrackingStore ¶
Bases: TrackingStore
SQLAlchemy tracking store.
This class is used to define the interface for tracking store.
lock: Optional[FileLock] = lockfile instance-attribute ¶
engine = create_engine(url) instance-attribute ¶
__init__(url: Union[str, URL] = 'sqlite:///:memory:') ¶
create_all(checkfirst: bool = True) ¶
session() ¶
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment ¶
list_experiments() -> List[Experiment] ¶
get_experiment(experiment_id: int) -> Experiment ¶
get_experiment_by_name(name: str) -> Experiment ¶
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run ¶
delete_run(experiment_id: int, run_id: int) -> None ¶
search_runs(experiment_id: int, expression: Optional[ColumnExpressionArgument[bool]] = None) -> List[Run] ¶
list_runs(experiment_id: int) -> List[Run] ¶
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags ¶
get_tag(run_id: int, name: str) -> JSONType ¶
get_tags(run_id: int) -> Dict[str, JSONType] ¶
count_tags(run_id: int) -> int ¶
delete_tag(run_id: int, name: str) -> RunTags ¶
log_value(run_id: int, key: str, value: ValueType, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, value_id: Optional[int] = None, is_step: Optional[bool] = None) -> Value ¶
get_values(run_id: int) -> List[Tuple[Variable, Value]] ¶
SQLAlchemyStore ¶
Bases: SQLAlchemyTrackingStore
store ¶
store_cv = ContextVar('store', default=None) module-attribute ¶
VariableType = Literal['param', 'metric'] module-attribute ¶
ValueType = Union[str, float, int, bool, None] module-attribute ¶
ValueMapping ¶
ValueTuple ¶
Bases: NamedTuple
key: str instance-attribute ¶
value: ValueType instance-attribute ¶
type: VariableType instance-attribute ¶
step_id: Optional[int] = None class-attribute instance-attribute ¶
timestamp: Optional[dt.datetime] = None class-attribute instance-attribute ¶
is_step: Optional[bool] = None class-attribute instance-attribute ¶
TrackingStore ¶
Abstract class for tracking store.
This class is used to define the interface for tracking store.
__enter__() ¶
__exit__(exc_type, exc_value, traceback) ¶
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment abstractmethod ¶
list_experiments() -> List[Experiment] abstractmethod ¶
get_experiment(experiment_id: int) -> Experiment abstractmethod ¶
get_experiment_by_name(name: str) -> Experiment abstractmethod ¶
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run abstractmethod ¶
delete_run(experiment_id: int, run_id: int) -> None abstractmethod ¶
list_runs(experiment_id: int) -> List[Run] abstractmethod ¶
search_runs(experiment_id: int, **kwargs) -> List[Run] abstractmethod ¶
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags abstractmethod ¶
get_tag(run_id: int, name: str) -> JSONType abstractmethod ¶
get_tags(run_id: int) -> Dict[str, JSONType] abstractmethod ¶
count_tags(run_id: int) -> int abstractmethod ¶
delete_tag(run_id: int, name: str) -> RunTags abstractmethod ¶
log_value(run_id: int, key: str, value: str, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, is_step: Optional[bool] = None) -> Value abstractmethod ¶
log_values(run_id: int, values: List[Union[ValueMapping, ValueTuple, Value]], *, step_id: Optional[int] = None, type: Optional[VariableType] = None) -> List[Value] ¶
get_values(run_id: int) -> List[Tuple[Variable, Value]] abstractmethod ¶
import_store(other: TrackingStore) ¶
wrap(method) ¶
typing ¶
utils ¶
cache ¶
collections ¶
EventTarget ¶
MutableCollection ¶
Bases: EventTarget
MutableDict ¶
MutableList ¶
MutableSet ¶
flatten(data: Dict[str, Any], *, separator: str = '.', parent_key: Optional[str] = None) -> Dict[Union[str, Tuple[str]], Any] ¶
Flatten a nested dictionary.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
data | Dict[str, Any] | The nested dictionary to flatten. | required |
separator | str | The separator, by default "." | '.' |
parent_key | Optional[str] | The parent key, by default None | None |
Returns:
| Type | Description |
|---|---|
Dict[str | tuple[str], Any] | The flattened dictionary. |
config ¶
T = TypeVar('T', bound=dataclass) module-attribute ¶
ConfigWrapper ¶
wrapped = wrapped instance-attribute ¶
signature = inspect.signature(wrapped) instance-attribute ¶
filter_keys = filter_keys instance-attribute ¶
name = name instance-attribute ¶
config = config instance-attribute ¶
__init__(wrapped, config: Config, name: Optional[str] = None) ¶
get_params(*args, **kwargs) ¶
__call__(*args, **kwargs) ¶
Config ¶
Bases: MutableMapping
omconf = config if isinstance(config, OmegaConf) else OmegaConf.structured(config) if is_dataclass(config) else OmegaConf.create(config) instance-attribute ¶
__new__(config: Union[Type[T], dict[str, Any]]) -> Union[T, Self] ¶
__new__(config: Type[T]) -> T
__new__(config: dict[str, Any]) -> Self
__init__(config: Any) -> None ¶
__getitem__(name: str) -> Any ¶
__setitem__(name: str, value: Any) -> None ¶
__delitem__(name: str) -> None ¶
__iter__() -> Any ¶
__len__() -> int ¶
__getattr__(name: str) -> Any ¶
wraps(wrapped: Union[type, callable, str, None] = None, **kwargs: Any) -> Union[ConfigWrapper, functools.partial] ¶
wraps(wrapped: Union[type, callable], **kwargs) -> ConfigWrapper
wraps(wrapped: Union[str, None], **kwargs) -> functools.partial
load(path: str) -> Self classmethod ¶
from_dotlist(dotlist: str) -> Self classmethod ¶
from_cli(args: list[str]) -> Self classmethod ¶
configmethod(func: T) -> T ¶
escape ¶
ESCAPED_CHARS = '\\\\`*_{}[]()#+\\-.!' module-attribute ¶
ESCAPED_CHARS_RE = re.compile(f'([{re.escape(ESCAPED_CHARS)}])') module-attribute ¶
UNESCAPED_CHARS_RE = re.compile(f'\\([{re.escape(ESCAPED_CHARS)}])') module-attribute ¶
escape(text: str) -> str ¶
Escape text.
unescape(text: str) -> str ¶
Unescape text.
func ¶
P = ParamSpec('P') module-attribute ¶
T = TypeVar('T') module-attribute ¶
bind(__func: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> Union[functools.partial, Callable[..., T]] ¶
Bind arguments to a function and return a new function.
This function is similar to functools.partial but it allows to bind arguments by name and by position (converting positional arguments to keyword arguments when possible).
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
__func | Callable | The function to bind arguments to. | required |
*args | args | Positional arguments to bind to the function. | () |
**kwargs | kwargs | Keyword arguments to bind to the function. | {} |
Returns:
| Type | Description |
|---|---|
partial or Callable | A new function with the arguments bound. |
hashing ¶
Fast cryptographic hash of Python objects, with a special case for fast hashing of numpy arrays.
Pickler = pickle._Pickler module-attribute ¶
T = TypeVar('T') module-attribute ¶
P = ParamSpec('P') module-attribute ¶
Hashable ¶
Bases: Protocol
A class that can be hashed.
Wrapped ¶
hash(*obj, coerce_mmap=False) ¶
Quick calculation of a hash to identify uniquely Python objects containing numpy arrays.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
obj | The objects to hash | () | |
coerce_mmap | Make no difference between np.memmap and np.ndarray | False |