Skip to content

Reference

octoflow

__version__ = '0.0.56' module-attribute

Experiment

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute

name: str instance-attribute

description: Optional[str] instance-attribute

artifact_uri: Optional[str] instance-attribute

start_run(name: str, description: Optional[str] = None) -> Run

search_runs(**kwargs) -> List[Run]

delete_run(run: Union[Run, int]) -> None

Run

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute

experiment_id: int instance-attribute

name: str instance-attribute

description: Optional[str] instance-attribute

created_at: Optional[dt.datetime] = None class-attribute instance-attribute

tags: MutableMapping[str, JSONType] = field(init=False) class-attribute instance-attribute

__post_init__()

log_param(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value

log_params(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value]

log_metric(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value

log_metrics(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value]

get_values() -> List[Tuple[Variable, Value]]

TrackingClient

store: TrackingStore property

__init__(store: TrackingStore) -> None

create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment

get_experiment_by_name(name: str) -> Optional[Experiment]

get_or_create_experiment(name: str, *, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment

list_experiments()

Value

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute

run_id: int instance-attribute

variable_id: int instance-attribute

value: ValueType instance-attribute

timestamp: Optional[dt.datetime] = None class-attribute instance-attribute

step_id: Optional[int] = None class-attribute instance-attribute

Config

Bases: MutableMapping

omconf = config if isinstance(config, OmegaConf) else OmegaConf.structured(config) if is_dataclass(config) else OmegaConf.create(config) instance-attribute

__new__(config: Union[Type[T], dict[str, Any]]) -> Union[T, Self]

__new__(config: Type[T]) -> T
__new__(config: dict[str, Any]) -> Self

__init__(config: Any) -> None

__getitem__(name: str) -> Any

__setitem__(name: str, value: Any) -> None

__delitem__(name: str) -> None

__iter__() -> Any

__len__() -> int

__getattr__(name: str) -> Any

wraps(wrapped: Union[type, callable, str, None] = None, **kwargs: Any) -> Union[ConfigWrapper, functools.partial]

wraps(wrapped: Union[type, callable], **kwargs) -> ConfigWrapper
wraps(wrapped: Union[str, None], **kwargs) -> functools.partial

load(path: str) -> Self classmethod

from_dotlist(dotlist: str) -> Self classmethod

from_cli(args: list[str]) -> Self classmethod

config

config = Config(OctoFlowConfig) module-attribute

CacheConfig dataclass

path: Path = '${oc.select:resources.path}/cache' class-attribute instance-attribute
__init__(path: Path = '${oc.select:resources.path}/cache') -> None

ResourcesConfig dataclass

path: Path = '~/.octoflow' class-attribute instance-attribute
cache: CacheConfig = field(default_factory=CacheConfig) class-attribute instance-attribute
__init__(path: Path = '~/.octoflow', cache: CacheConfig = CacheConfig()) -> None

LoggingConfig dataclass

level: str = 'INFO' class-attribute instance-attribute
format: str = '%(asctime)s %(levelname)s %(name)s [%(pathname)s:%(lineno)s] %(message)s' class-attribute instance-attribute
__init__(level: str = 'INFO', format: str = '%(asctime)s %(levelname)s %(name)s [%(pathname)s:%(lineno)s] %(message)s') -> None

OctoFlowConfig dataclass

resources: ResourcesConfig = field(default_factory=ResourcesConfig) class-attribute instance-attribute
logging: LoggingConfig = field(default_factory=LoggingConfig) class-attribute instance-attribute
__init__(resources: ResourcesConfig = ResourcesConfig(), logging: LoggingConfig = LoggingConfig()) -> None

constants

MissingType = _MISSING_TYPE module-attribute

DEFAULT = DefaultType() module-attribute

DefaultType

data

Dataset

Bases: BaseDataset

cache_dir = cache_dir instance-attribute
path: Path property

The path to the dataset.

Returns:

Type Description
Path

The path to the dataset.

format: str property

The format of the dataset.

Returns:

Type Description
str

The format of the dataset.

columns: List[str] property

Get the names of the columns in the dataset.

Returns:

Type Description
List[str]

The names of the columns in the dataset.

__init__(data_or_loader: Union[List[dict], Dict[str, list], DataFrame, DatasetLoader, str] = None, format: str = DEFAULT_FORMAT, *, schema: Union[pa.Schema, BaseModel, None] = None, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None, loader_args: Optional[Tuple[Any, ...]] = None, loader_kwargs: Optional[Dict[str, Any]] = None, force: bool = False)
__init__(data: Union[List[dict], Dict[str, list], DataFrame] = None, format: str = DEFAULT_FORMAT)
__init__(data: Union[List[dict], Dict[str, list], DataFrame], format: str = DEFAULT_FORMAT, *, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None)
__init__(loader: DatasetLoader, format: str = DEFAULT_FORMAT, *, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None, loader_args: Optional[Tuple[Any, ...]] = None, loader_kwargs: Optional[Dict[str, Any]] = None, force: bool = False)

Parameters:

Name Type Description Default
data_or_loader list of dict, dict of list, DataFrame, BaseDatasetLoader, str

The data to load into the dataset or the (name of) loader to use.

None
format str

The format of the dataset.

DEFAULT_FORMAT
path (str, Path, None)

Load the data to this path.

None
cache_dir (str, Path, None)

The directory to use for caching.

None
loader_args (tuple, None)

The arguments to pass to the loader function if provided as the first argument.

None
loader_kwargs (dict, None)

The keyword arguments to pass to the loader function if provided as the first argument.

None
count_rows() -> int

Count the number of rows in the dataset.

Returns:

Type Description
int

The number of rows in the dataset.

__len__() -> int

Get the number of rows in the dataset.

Returns:

Type Description
int

The number of rows in the dataset.

head(num_rows: int = 5, columns: Union[str, List[str], None] = None, filter: Expression = None, batch_size: int = DEFAULT_BATCH_SIZE) -> DataFrame

Get the first rows of the dataset as a pandas DataFrame.

Parameters:

Name Type Description Default
num_rows int

The number of rows to get.

5
columns str, list of str, None

Names of columns to get. If None, all columns are returned.

None
filter Expression

The filter expression.

None
batch_size int

Number of rows to get at a time.

DEFAULT_BATCH_SIZE

Returns:

Type Description
DataFrame

A pandas DataFrame containing the first rows of the dataset.

__getitem__(indices: Union[int, slice, List[int], ArrayLike]) -> Union[dict, pa.Table]
__getitem__(indices: int) -> Dict[str, Any]
__getitem__(indices: Union[slice, List[int], ArrayLike]) -> pa.Table

Get rows from the dataset.

take(*, indices: Union[int, slice, List[int], ArrayLike] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> Union[dict, pa.Table]
take(*, indices: Optional[int] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> Dict[str, Any]
take(*, indices: Union[slice, List[int], ArrayLike] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> DataFrame

Take rows(/columns) from the dataset.

Parameters:

Name Type Description Default
indices int, slice, list of int, array-like

Indices of rows to take.

None
columns str, list of str, None

Names of columns to take. If None, all columns are taken.

None
batch_size int

Number of rows to take at a time.

DEFAULT_BATCH_SIZE

Returns:

Type Description
(Document, Table)

The taken rows or row.

map(func: Any, batch_size: int = DEFAULT_BATCH_SIZE, batched: bool = False, keep_cols: Union[bool, List[str], None] = True, exclude_cols: Union[List[str], None] = None, verbose: Union[bool, int] = 1) -> Dataset

Map a function over the dataset.

Parameters:

Name Type Description Default
func Any

The function to map over the dataset.

required
batch_size int

Number of rows to map at a time.

DEFAULT_BATCH_SIZE
batched bool

Whether the function is batched.

False
verbose bool | int

Whether to show a progress bar.

1

Returns:

Type Description
Dataset

A new dataset containing the mapped rows.

filter(expression: Expression = None) -> Dataset

Filter the dataset.

Parameters:

Name Type Description Default
expression Expression

The filter expression.

None

Returns:

Type Description
Dataset

A new dataset containing only the rows that match the filter expression.

select(columns: Union[str, List[str]], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset

Select columns from the dataset.

Parameters:

Name Type Description Default
columns str, list of str

Names of columns to select.

required

Returns:

Type Description
Dataset

A new dataset containing only the selected columns.

rename(columns: Dict[str, str], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset

Rename columns in the dataset.

Parameters:

Name Type Description Default
columns dict

Mapping of old column names to new column names.

required

Returns:

Type Description
Dataset

A new dataset with the columns renamed.

project(columns: Union[Dict[str, Expression], Dict[str, str], List[str]], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset

Project columns in the dataset.

Parameters:

Name Type Description Default
columns dict

Mapping of column names to expressions.

required
batch_size int

Number of rows to project at a time.

DEFAULT_BATCH_SIZE

Returns:

Type Description
Dataset

A new dataset with the columns projected.

load_dataset(path: Union[Path, str], format: str = DEFAULT_FORMAT, cache_dir: Union[Path, str, None] = None) -> Dataset classmethod

Load an existing dataset.

Parameters:

Name Type Description Default
path (str, Path)

The path to the dataset.

required
format str

The format of the dataset.

DEFAULT_FORMAT

Returns:

Type Description
Dataset

The loaded dataset.

to_polars() -> pl.LazyFrame

Convert the dataset to a Polars DataFrame.

Returns:

Type Description
LazyFrame

The Polars Lazy DataFrame.

Expression

Bases: BaseExpression

A class representing an expression in Octoflow.

__init__(expression: Union[Expression, ds.Expression])

Parameters:

Name Type Description Default
expression Union[Expression, Expression]

The (pyarrow) expression to wrap.

required
__eq__(other: Any) -> Expression

Compare two expressions for equality.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__ne__(other: Any) -> Expression

Compare two expressions for inequality.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__lt__(other: Any) -> Expression

Compare two expressions for less than.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__le__(other: Any) -> Expression

Compare two expressions for less than or equal to.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__gt__(other: Any) -> Expression

Compare two expressions for greater than.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__ge__(other: Any) -> Expression

Compare two expressions for greater than or equal to.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__and__(other: Any) -> Expression

Combine two expressions with a logical and.

Parameters:

Name Type Description Default
other Any

The other expression to combine with.

required

Returns:

Type Description
Expression

The expression representing the result of the combination.

__or__(other: Any) -> Expression

Combine two expressions with a logical or.

Parameters:

Name Type Description Default
other Any

The other expression to combine with.

required

Returns:

Type Description
Expression

The expression representing the result of the combination.

__invert__() -> Expression

Invert an expression.

Returns:

Type Description
Expression

The expression representing the inverted expression.

is_nan() -> Expression

Check if an expression is NaN.

Returns:

Type Description
Expression

The expression representing the result of the check.

is_null(nan_is_null: bool = False)

Check if an expression is null.

Parameters:

Name Type Description Default
nan_is_null bool

Whether to consider NaN values as null, by default False

False

Returns:

Type Description
Expression

The expression representing the result of the check.

is_valid() -> Expression

Check if an expression is valid.

Returns:

Type Description
Expression

The expression representing the result of the check.

isin(other: Expression) -> Expression

Check if an expression is in a set of values.

Parameters:

Name Type Description Default
other Expression

The set of values to check against.

required

Returns:

Type Description
Expression

The expression representing the result of the check.

equals(other: Expression) -> Expression

Check if an expression is equal to another expression.

Parameters:

Name Type Description Default
other Expression

The other expression to check against.

required

Returns:

Type Description
Expression

The expression representing the result of the check.

__hash__() -> int

Get the hash of the expression.

Returns:

Type Description
int

The hash of the expression.

__repr__() -> str

Get the representation of the expression.

Returns:

Type Description
str

The representation of the expression.

field(*args, **kwargs) -> Field

Create a new field getter.

scalar(value: Any) -> Expression

Create an expression from a scalar.

Parameters:

Name Type Description Default
value Any

The value of the scalar.

required

Returns:

Type Description
Expression

The expression representing the scalar.

dataloader(func: Union[F, str, None] = None, name: Optional[str] = None, extensions: Optional[list[str]] = None, wraps: Optional[Callable[..., Any]] = None, path_arg: Optional[str] = None) -> Union[F, Callable[[F], F]]

dataloader(func: F, name: Optional[str] = None, extensions: Optional[list[str]] = None, wraps: Optional[Callable[P, R]] = None, path_arg: Optional[str] = None) -> F
dataloader(name: str, extensions: Optional[list[str]] = None, wraps: Optional[Callable[P, R]] = None, path_arg: Optional[str] = None) -> Callable[[F], F]

Decorator to register a function as a dataset loader.

Parameters:

Name Type Description Default
func Union[Callable[..., Any], str, None]

The function to decorate, by default None.

None
name Optional[str]

The name of the loader, by default None.

None
extensions Optional[list[str]]

The extensions that the loader supports, by default None.

None
wraps Optional[Callable[..., Any]]

The function to wrap, by default None.

None
path_arg Optional[str]

The name of the argument that is the path, by default None.

None

Returns:

Type Description
DatasetLoader

The dataset loader.

load_dataset(__loader: str, __path: Optional[str], __force: bool = False, __dataset_format: str = DEFAULT_FORMAT, __dataset_path: Union[Path, str, None] = None, /, *args, **kwargs) -> Dataset

Load a dataset from a path.

Parameters:

Name Type Description Default
__loader str

The name of the loader.

required
__path Optional[str]

The path to the data (to be passed to the loader).

required
__dataset_format str

The format of the dataset, by default DEFAULT_FORMAT.

DEFAULT_FORMAT
__dataset_path Union[Path, str, None]

The path that the dataset will be stored.

None
*args tuple

The arguments to pass to the loader.

()
**kwargs dict

The keyword arguments to pass to the loader.

{}

Returns:

Type Description
Dataset

The loaded dataset.

base

ArrowType = TypeVar('ArrowType') module-attribute
P = ParamSpec('P') module-attribute
R = TypeVar('R') module-attribute
DEFAULT_BATCH_SIZE: Final[int] = 1048576 module-attribute
DEFAULT_FORMAT: Final[str] = 'arrow' module-attribute
BaseExpression = PyArrowWrapper[ds.Expression] module-attribute
BaseDataset = PyArrowWrapper[ds.Dataset] module-attribute
PyArrowWrapper

Bases: Generic[ArrowType]

__init__(wrapped: ArrowType) -> None
to_pyarrow() -> ArrowType
BaseDatasetLoader

Bases: Generic[P, R]

dataclass

T = TypeVar('T') module-attribute
Field

Bases: Field, Expression

name = name instance-attribute
__init__(name: Optional[str] = None, *, default=dc.MISSING, default_factory=dc.MISSING, init=True, repr=True, hash=None, compare=True, metadata=None, kw_only=dc.MISSING)
__call__(data: Mapping[str, Any]) -> Any

Get the value of the field.

Parameters:

Name Type Description Default
data dict

The data to be accessed.

required
FieldAccessor

Bases: tuple, Generic[T]

__new__(obj: Type[T]) -> FieldAccessor[T]
__getattr__(name: str) -> Field
ModelMeta

Bases: type

__new__(mcs, name, bases, attrs, **kwargs)
update_forward_refs(**kwargs: Any) -> None
BaseModel
__post_init__()
field(*args, **kwargs) -> Field

Create a new field getter.

field_from_dataclass_field(field: dc.Field) -> Field

Create a new field getter.

fields(cls: Type[T]) -> Union[FieldAccessor[T], Type[T]]

dataset

SourceType = Union[str, List[str], Union[Path, List[Path]], 'Dataset', List['Dataset']] module-attribute
Dataset

Bases: BaseDataset

cache_dir = cache_dir instance-attribute
path: Path property

The path to the dataset.

Returns:

Type Description
Path

The path to the dataset.

format: str property

The format of the dataset.

Returns:

Type Description
str

The format of the dataset.

columns: List[str] property

Get the names of the columns in the dataset.

Returns:

Type Description
List[str]

The names of the columns in the dataset.

__init__(data_or_loader: Union[List[dict], Dict[str, list], DataFrame, DatasetLoader, str] = None, format: str = DEFAULT_FORMAT, *, schema: Union[pa.Schema, BaseModel, None] = None, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None, loader_args: Optional[Tuple[Any, ...]] = None, loader_kwargs: Optional[Dict[str, Any]] = None, force: bool = False)
__init__(data: Union[List[dict], Dict[str, list], DataFrame] = None, format: str = DEFAULT_FORMAT)
__init__(data: Union[List[dict], Dict[str, list], DataFrame], format: str = DEFAULT_FORMAT, *, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None)
__init__(loader: DatasetLoader, format: str = DEFAULT_FORMAT, *, path: Optional[Union[str, Path]] = None, cache_dir: Optional[Union[str, Path]] = None, loader_args: Optional[Tuple[Any, ...]] = None, loader_kwargs: Optional[Dict[str, Any]] = None, force: bool = False)

Parameters:

Name Type Description Default
data_or_loader list of dict, dict of list, DataFrame, BaseDatasetLoader, str

The data to load into the dataset or the (name of) loader to use.

None
format str

The format of the dataset.

DEFAULT_FORMAT
path (str, Path, None)

Load the data to this path.

None
cache_dir (str, Path, None)

The directory to use for caching.

None
loader_args (tuple, None)

The arguments to pass to the loader function if provided as the first argument.

None
loader_kwargs (dict, None)

The keyword arguments to pass to the loader function if provided as the first argument.

None
count_rows() -> int

Count the number of rows in the dataset.

Returns:

Type Description
int

The number of rows in the dataset.

__len__() -> int

Get the number of rows in the dataset.

Returns:

Type Description
int

The number of rows in the dataset.

head(num_rows: int = 5, columns: Union[str, List[str], None] = None, filter: Expression = None, batch_size: int = DEFAULT_BATCH_SIZE) -> DataFrame

Get the first rows of the dataset as a pandas DataFrame.

Parameters:

Name Type Description Default
num_rows int

The number of rows to get.

5
columns str, list of str, None

Names of columns to get. If None, all columns are returned.

None
filter Expression

The filter expression.

None
batch_size int

Number of rows to get at a time.

DEFAULT_BATCH_SIZE

Returns:

Type Description
DataFrame

A pandas DataFrame containing the first rows of the dataset.

__getitem__(indices: Union[int, slice, List[int], ArrayLike]) -> Union[dict, pa.Table]
__getitem__(indices: int) -> Dict[str, Any]
__getitem__(indices: Union[slice, List[int], ArrayLike]) -> pa.Table

Get rows from the dataset.

take(*, indices: Union[int, slice, List[int], ArrayLike] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> Union[dict, pa.Table]
take(*, indices: Optional[int] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> Dict[str, Any]
take(*, indices: Union[slice, List[int], ArrayLike] = None, columns: Union[str, List[str], None] = None, batch_size: int = DEFAULT_BATCH_SIZE) -> DataFrame

Take rows(/columns) from the dataset.

Parameters:

Name Type Description Default
indices int, slice, list of int, array-like

Indices of rows to take.

None
columns str, list of str, None

Names of columns to take. If None, all columns are taken.

None
batch_size int

Number of rows to take at a time.

DEFAULT_BATCH_SIZE

Returns:

Type Description
(Document, Table)

The taken rows or row.

map(func: Any, batch_size: int = DEFAULT_BATCH_SIZE, batched: bool = False, keep_cols: Union[bool, List[str], None] = True, exclude_cols: Union[List[str], None] = None, verbose: Union[bool, int] = 1) -> Dataset

Map a function over the dataset.

Parameters:

Name Type Description Default
func Any

The function to map over the dataset.

required
batch_size int

Number of rows to map at a time.

DEFAULT_BATCH_SIZE
batched bool

Whether the function is batched.

False
verbose bool | int

Whether to show a progress bar.

1

Returns:

Type Description
Dataset

A new dataset containing the mapped rows.

filter(expression: Expression = None) -> Dataset

Filter the dataset.

Parameters:

Name Type Description Default
expression Expression

The filter expression.

None

Returns:

Type Description
Dataset

A new dataset containing only the rows that match the filter expression.

select(columns: Union[str, List[str]], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset

Select columns from the dataset.

Parameters:

Name Type Description Default
columns str, list of str

Names of columns to select.

required

Returns:

Type Description
Dataset

A new dataset containing only the selected columns.

rename(columns: Dict[str, str], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset

Rename columns in the dataset.

Parameters:

Name Type Description Default
columns dict

Mapping of old column names to new column names.

required

Returns:

Type Description
Dataset

A new dataset with the columns renamed.

project(columns: Union[Dict[str, Expression], Dict[str, str], List[str]], batch_size: int = DEFAULT_BATCH_SIZE) -> Dataset

Project columns in the dataset.

Parameters:

Name Type Description Default
columns dict

Mapping of column names to expressions.

required
batch_size int

Number of rows to project at a time.

DEFAULT_BATCH_SIZE

Returns:

Type Description
Dataset

A new dataset with the columns projected.

load_dataset(path: Union[Path, str], format: str = DEFAULT_FORMAT, cache_dir: Union[Path, str, None] = None) -> Dataset classmethod

Load an existing dataset.

Parameters:

Name Type Description Default
path (str, Path)

The path to the dataset.

required
format str

The format of the dataset.

DEFAULT_FORMAT

Returns:

Type Description
Dataset

The loaded dataset.

to_polars() -> pl.LazyFrame

Convert the dataset to a Polars DataFrame.

Returns:

Type Description
LazyFrame

The Polars Lazy DataFrame.

gen_unique_cached_path(*refs: Any, cache_dir: Union[str, Path, None] = None) -> Path
writable(data: Any, schema: Optional[pa.Schema] = None) -> Union[pa.RecordBatch, pa.Table, pa.RecordBatchReader]
write_dataset(path: Union[str, Path], data: Union[ds.Dataset, pa.Table, pa.RecordBatch, Iterable[pa.RecordBatch], pa.RecordBatchReader, pd.DataFrame, Mapping[str, List[Any]], Sequence[Mapping[str, Any]]], schema: pa.Schema = None, format: Optional[str] = None) -> bool
read_dataset(path: Union[str, Path], format: str) -> ds.dataset
to_batches(data: Union[pa.Table, pa.RecordBatch, Iterable[pa.RecordBatch], Iterable[pa.Table], pa.RecordBatchReader]) -> Generator[pa.RecordBatch, None, None]
create_mapped_table(data: Union[dict, list, pd.DataFrame, pa.RecordBatch, pa.Table], existing: Optional[pa.Table] = None, keep_cols: Union[bool, List[str], None] = True, exclude_cols: Optional[List[str]] = None) -> pa.Table

expression

Expression

Bases: BaseExpression

A class representing an expression in Octoflow.

__init__(expression: Union[Expression, ds.Expression])

Parameters:

Name Type Description Default
expression Union[Expression, Expression]

The (pyarrow) expression to wrap.

required
__eq__(other: Any) -> Expression

Compare two expressions for equality.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__ne__(other: Any) -> Expression

Compare two expressions for inequality.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__lt__(other: Any) -> Expression

Compare two expressions for less than.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__le__(other: Any) -> Expression

Compare two expressions for less than or equal to.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__gt__(other: Any) -> Expression

Compare two expressions for greater than.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__ge__(other: Any) -> Expression

Compare two expressions for greater than or equal to.

Parameters:

Name Type Description Default
other Any

The other expression to compare to.

required

Returns:

Type Description
Expression

The expression representing the result of the comparison.

__and__(other: Any) -> Expression

Combine two expressions with a logical and.

Parameters:

Name Type Description Default
other Any

The other expression to combine with.

required

Returns:

Type Description
Expression

The expression representing the result of the combination.

__or__(other: Any) -> Expression

Combine two expressions with a logical or.

Parameters:

Name Type Description Default
other Any

The other expression to combine with.

required

Returns:

Type Description
Expression

The expression representing the result of the combination.

__invert__() -> Expression

Invert an expression.

Returns:

Type Description
Expression

The expression representing the inverted expression.

is_nan() -> Expression

Check if an expression is NaN.

Returns:

Type Description
Expression

The expression representing the result of the check.

is_null(nan_is_null: bool = False)

Check if an expression is null.

Parameters:

Name Type Description Default
nan_is_null bool

Whether to consider NaN values as null, by default False

False

Returns:

Type Description
Expression

The expression representing the result of the check.

is_valid() -> Expression

Check if an expression is valid.

Returns:

Type Description
Expression

The expression representing the result of the check.

isin(other: Expression) -> Expression

Check if an expression is in a set of values.

Parameters:

Name Type Description Default
other Expression

The set of values to check against.

required

Returns:

Type Description
Expression

The expression representing the result of the check.

equals(other: Expression) -> Expression

Check if an expression is equal to another expression.

Parameters:

Name Type Description Default
other Expression

The other expression to check against.

required

Returns:

Type Description
Expression

The expression representing the result of the check.

__hash__() -> int

Get the hash of the expression.

Returns:

Type Description
int

The hash of the expression.

__repr__() -> str

Get the representation of the expression.

Returns:

Type Description
str

The representation of the expression.

scalar(value: Any) -> Expression

Create an expression from a scalar.

Parameters:

Name Type Description Default
value Any

The value of the scalar.

required

Returns:

Type Description
Expression

The expression representing the scalar.

loaders

P = ParamSpec('P') module-attribute
R = TypeVar('R') module-attribute
F = TypeVar('F', bound=Callable[..., Any]) module-attribute
loaders: Dict[str, DatasetLoader] = {} module-attribute
DatasetLoader

Bases: BaseDatasetLoader

func = func instance-attribute
name = name or self.func.__name__ instance-attribute
extensions = extensions instance-attribute
path_arg = path_arg instance-attribute
wraps = wraps instance-attribute
__init__(func: Callable[..., Any], name: Optional[str] = None, extensions: Optional[list[str]] = None, path_arg: Optional[str] = None, wraps: Optional[Callable[P, R]] = None)

Parameters:

Name Type Description Default
func Callable[..., Any]

The function to decorate.

required
name Optional[str]

The name of the loader, by default None.

None
extensions Optional[list[str]]

The extensions that the loader supports, by default None.

None
path_arg Optional[str]

The name of the argument that is the path, by default None.

None
wraps Optional[Callable[..., Any]]

The function to wrap, by default None.

None
__call__(*args: P.args, **kwargs: P.kwargs) -> R

Call the loader function.

Parameters:

Name Type Description Default
args tuple

The arguments to pass to the function.

()
kwargs dict

The keyword arguments to pass to the function.

{}

Returns:

Type Description
R

The result of the function.

bind(*args: P.args, **kwargs: P.kwargs) -> Callable[..., R]

Bind arguments to the loader function.

Notes

This method is useful for creating a partial function with pre-filled arguments and keyword arguments. This helps to improve the uniqueness of the fingerprint of the dataset.

Parameters:

Name Type Description Default
args tuple

The arguments to pre-fill.

()
kwargs dict

The keyword arguments to pre-fill.

{}

Returns:

Type Description
Callable[..., R]

The partial function.

dataloader(func: Union[F, str, None] = None, name: Optional[str] = None, extensions: Optional[list[str]] = None, wraps: Optional[Callable[..., Any]] = None, path_arg: Optional[str] = None) -> Union[F, Callable[[F], F]]
dataloader(func: F, name: Optional[str] = None, extensions: Optional[list[str]] = None, wraps: Optional[Callable[P, R]] = None, path_arg: Optional[str] = None) -> F
dataloader(name: str, extensions: Optional[list[str]] = None, wraps: Optional[Callable[P, R]] = None, path_arg: Optional[str] = None) -> Callable[[F], F]

Decorator to register a function as a dataset loader.

Parameters:

Name Type Description Default
func Union[Callable[..., Any], str, None]

The function to decorate, by default None.

None
name Optional[str]

The name of the loader, by default None.

None
extensions Optional[list[str]]

The extensions that the loader supports, by default None.

None
wraps Optional[Callable[..., Any]]

The function to wrap, by default None.

None
path_arg Optional[str]

The name of the argument that is the path, by default None.

None

Returns:

Type Description
DatasetLoader

The dataset loader.

load_json(path: Union[str, Path], encoding: str = 'utf-8') -> Generator[List[Dict], None, None]

Load a dataset from a JSON file.

Parameters:

Name Type Description Default
path (str, Path)

The path to the file.

required
encoding str

The encoding of the file, by default "utf-8".

'utf-8'

Returns:

Type Description
dict

The loaded dataset.

load_jsonl(path: Union[str, Path], encoding: str = 'utf-8') -> Generator[List[Dict], None, None]

Load a dataset from a JSONL file.

Parameters:

Name Type Description Default
path (str, Path)

The path to the file.

required
encoding str

The encoding of the file, by default "utf-8".

'utf-8'

Returns:

Type Description
list[dict]

The loaded dataset.

load_csv(path: Union[str, Path], encoding: str = 'utf-8') -> Generator[List[Dict], None, None]

Load a dataset from a CSV/TSV file.

Parameters:

Name Type Description Default
path (str, Path)

The path to the file.

required
encoding str

The encoding of the file, by default "utf-8".

'utf-8'

Returns:

Type Description
list[dict]

The loaded dataset.

metadata

unify_metadata(left: Any, right: Any) -> Optional[dict]

sampler

Sampler
args = (0, *list(columns.values())) instance-attribute
columns = list(columns.keys()) instance-attribute
__init__(columns: Mapping[str, int])
__call__(lst: Sequence[int])

schema

T = TypeVar('T') module-attribute
unify_schemas(this: pa.Schema, other: Optional[pa.Schema]) -> pa.Schema
infer_schema(data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> Self
validate(schema: pa.Schema, data: dict) -> bool

Validates a dictionary against a PyArrow schema.

Parameters:

Name Type Description Default
schema Schema

The PyArrow schema to validate against.

required
data dict

The dictionary to validate.

required

Raises:

Type Description
ValidationError

If the dictionary does not match the schema.

Examples:

>>> schema = pa.schema([pa.field('id', pa.int64()), pa.field('name', pa.string())])
>>> valid_dict = {'id': 1, 'name': 'Alice'}
>>> validate(schema, valid_dict)
>>> invalid_dict = {'id': '1', 'name': 'Alice'}
>>> validate(schema, invalid_dict)
Traceback (most recent call last):
...
ValidationError: ...
get_schema(data: T) -> Tuple[T, pa.Schema]

Extracts the schema from a PyArrow schema or a generator of PyArrow record batches.

Parameters:

Name Type Description Default
data Any

The PyArrow schema or generator of record batches.

required

Returns:

Type Description
Tuple[Any, Schema]

The data and the schema.

from_dataclass(cls: T) -> pa.Schema

Converts a dataclass to a PyArrow schema.

Parameters:

Name Type Description Default
cls Type[T]

The dataclass to convert.

required

Returns:

Type Description
Schema

The PyArrow schema.

Examples:

>>> import dataclasses
>>> @dataclasses.dataclass
... class Record:
...     id: int
...     name: str
>>> from_dataclass(Record)
pyarrow.Schema([...])
get_schema_from_dataclass(*args, **kwargs) -> pa.Schema

Alias for from_dataclass.

Examples:

>>> import dataclasses
>>> @dataclasses.dataclass
... class Record:
...     id: int
...     name: str
>>> get_schema_from_dataclass(Record)
pyarrow.Schema([...])

types

UNDEFINED = undefined() module-attribute
MonthDayNano

Bases: NamedTuple

months: int instance-attribute
days: int instance-attribute
nanoseconds: int instance-attribute
Undefined

Bases: ExtensionType

__init__()
__arrow_ext_serialize__() -> bytes
__arrow_ext_deserialize__(storage_type, serialized) -> Undefined classmethod
undefined() -> Undefined
is_undefined(obj: pa.DataType) -> bool
from_dataclass(cls: type) -> pa.DataType

Return the PyArrow data type of a dataclass.

Parameters:

Name Type Description Default
cls type

The dataclass.

required

Returns:

Type Description
DataType

The PyArrow data type.

from_typed_dict(cls: _TypedDictMeta) -> pa.DataType

Return the PyArrow data type of a TypedDict.

Parameters:

Name Type Description Default
cls _TypedDictMeta

The TypedDict.

required

Returns:

Type Description
DataType

The PyArrow data type.

from_union(args: tuple[type, ...]) -> pa.DataType
from_dtype(dtype: Union[type, np.dtype, None]) -> pa.DataType

Return the PyArrow data type of a provided native/NumPy data type.

Parameters:

Name Type Description Default
dtype type | dtype | None

The native or NumPy data type.

required

Returns:

Type Description
DataType

The PyArrow data type.

unify_types(left: pa.DataType, right: pa.DataType) -> pa.DataType

Return the PyArrow data type that can represent both left and right.

Parameters:

Name Type Description Default
left DataType

The left PyArrow data type.

required
right DataType

The right PyArrow data type.

required

Returns:

Type Description
DataType

The PyArrow data type.

infer_type(obj: Any) -> pa.DataType

Return the PyArrow data type of an object.

Parameters:

Name Type Description Default
obj Any

The object.

required

Returns:

Type Description
DataType

The PyArrow data type.

exceptions

ValidationError

Bases: Exception

logging

CRITICAL = logging.CRITICAL module-attribute

FATAL = CRITICAL module-attribute

ERROR = logging.ERROR module-attribute

WARNING = logging.WARNING module-attribute

WARN = WARNING module-attribute

INFO = logging.INFO module-attribute

DEBUG = logging.DEBUG module-attribute

NOTSET = logging.NOTSET module-attribute

set_level(level: Union[int, str], logger: Optional[logging.Logger] = None)

Set the logging level of the logger.

Parameters:

Name Type Description Default
logger Logger

Logger instance.

None
level int or str

Logging level.

required

plugin

Package

name = name instance-attribute
modules = modules instance-attribute
__init__(name: str, modules: list)

Parameters:

Name Type Description Default
name str

Name of the package.

required
modules list

List of modules to expose. Each module can be a string or a dict.

If a string, it is the name of the module to import.

If a dict, with keys: name indicating name of the module to import. package indicating the name of the package to import the module from. If not specified, the module is imported from the current package.

required
import_modules()

Import all modules in the package.

The modules are imported in the order they are defined in the package.

Returns:

Type Description
None

The modules are imported silently. If an error occurs, it is raised.

Raises:

Type Description
TypeError

If a module is not a string or a dict.

ImportError

If a module cannot be imported.

project

Project

name = path.name instance-attribute
base_path = path / '.octoflow' instance-attribute
experiments property
__init__(path: Union[str, Path]) -> None
get_repo() -> Generator[Repo, None, None]
sync(message: Optional[str] = None) -> str

project

ProjectExperiment
get_project = weakref.ref(project) instance-attribute
expr_name = expr_name instance-attribute
project: Project property
__init__(project: Project, expr_name: str) -> None
start_run(force: bool = False, description: Optional[str] = None) -> Run
ProjectExperimentDict

Bases: Mapping[str, ProjectExperiment]

get_project = weakref.ref(project) instance-attribute
project: Project property
experiments_path: Path property
names: Set[str] property
__init__(project: Project) -> None
__iter__()
__getitem__(key: str) -> ProjectExperiment
__contains__(key: str) -> bool
__len__() -> int
__repr__() -> str
first() -> ProjectExperiment
Project
name = path.name instance-attribute
base_path = path / '.octoflow' instance-attribute
experiments property
__init__(path: Union[str, Path]) -> None
get_repo() -> Generator[Repo, None, None]
sync(message: Optional[str] = None) -> str
update_project_gitgnore(path: Path) -> None

tracking

Experiment

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
name: str instance-attribute
description: Optional[str] instance-attribute
artifact_uri: Optional[str] instance-attribute
start_run(name: str, description: Optional[str] = None) -> Run
search_runs(**kwargs) -> List[Run]
delete_run(run: Union[Run, int]) -> None

Run

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
experiment_id: int instance-attribute
name: str instance-attribute
description: Optional[str] instance-attribute
created_at: Optional[dt.datetime] = None class-attribute instance-attribute
tags: MutableMapping[str, JSONType] = field(init=False) class-attribute instance-attribute
__post_init__()
log_param(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value
log_params(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value]
log_metric(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value
log_metrics(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value]
get_values() -> List[Tuple[Variable, Value]]

TrackingClient

store: TrackingStore property
__init__(store: TrackingStore) -> None
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment
get_experiment_by_name(name: str) -> Optional[Experiment]
get_or_create_experiment(name: str, *, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment
list_experiments()

Value

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
run_id: int instance-attribute
variable_id: int instance-attribute
value: ValueType instance-attribute
timestamp: Optional[dt.datetime] = None class-attribute instance-attribute
step_id: Optional[int] = None class-attribute instance-attribute

SQLAlchemyTrackingStore

Bases: TrackingStore

SQLAlchemy tracking store.

This class is used to define the interface for tracking store.

lock: Optional[FileLock] = lockfile instance-attribute
engine = create_engine(url) instance-attribute
__init__(url: Union[str, URL] = 'sqlite:///:memory:')
create_all(checkfirst: bool = True)
session()
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment
list_experiments() -> List[Experiment]
get_experiment(experiment_id: int) -> Experiment
get_experiment_by_name(name: str) -> Experiment
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run
delete_run(experiment_id: int, run_id: int) -> None
search_runs(experiment_id: int, expression: Optional[ColumnExpressionArgument[bool]] = None) -> List[Run]
list_runs(experiment_id: int) -> List[Run]
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags
get_tag(run_id: int, name: str) -> JSONType
get_tags(run_id: int) -> Dict[str, JSONType]
count_tags(run_id: int) -> int
delete_tag(run_id: int, name: str) -> RunTags
log_value(run_id: int, key: str, value: ValueType, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, value_id: Optional[int] = None, is_step: Optional[bool] = None) -> Value
get_values(run_id: int) -> List[Tuple[Variable, Value]]

TrackingStore

Abstract class for tracking store.

This class is used to define the interface for tracking store.

__enter__()
__exit__(exc_type, exc_value, traceback)
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment abstractmethod
list_experiments() -> List[Experiment] abstractmethod
get_experiment(experiment_id: int) -> Experiment abstractmethod
get_experiment_by_name(name: str) -> Experiment abstractmethod
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run abstractmethod
delete_run(experiment_id: int, run_id: int) -> None abstractmethod
list_runs(experiment_id: int) -> List[Run] abstractmethod
search_runs(experiment_id: int, **kwargs) -> List[Run] abstractmethod
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags abstractmethod
get_tag(run_id: int, name: str) -> JSONType abstractmethod
get_tags(run_id: int) -> Dict[str, JSONType] abstractmethod
count_tags(run_id: int) -> int abstractmethod
delete_tag(run_id: int, name: str) -> RunTags abstractmethod
log_value(run_id: int, key: str, value: str, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, is_step: Optional[bool] = None) -> Value abstractmethod
log_values(run_id: int, values: List[Union[ValueMapping, ValueTuple, Value]], *, step_id: Optional[int] = None, type: Optional[VariableType] = None) -> List[Value]
get_values(run_id: int) -> List[Tuple[Variable, Value]] abstractmethod
import_store(other: TrackingStore)

artifact

handler
ArtifactMetadata

Bases: MutableDict[str, Any]

handler_ref = weakref.ref(handler) instance-attribute
handler: ArtifactHandler property
__init__(handler: ArtifactHandler) -> None
ArtifactHandlerType

Bases: ABCMeta

name: str property
__new__(*args, **kwargs)
ArtifactHandler
path: Path = Path(path) instance-attribute
metadata = ArtifactMetadata(self) instance-attribute
__init__(path: Union[str, Path]) -> None

Parameters:

Name Type Description Default
path str

The path to the artifact

required
load() -> Any abstractmethod

Load the artifact from the path.

Returns:

Type Description
Any

The loaded artifact.

save(obj: Any, *args, **kwargs) abstractmethod

Save the given artifact to the path.

Parameters:

Name Type Description Default
obj Any

The artifact to save.

required
args tuple

Additional positional arguments.

()
kwargs dict

Additional keyword arguments.

{}

Returns:

Type Description
None
can_handle(obj: object) -> bool abstractmethod classmethod

Return True if this handler can handle the given object type.

Parameters:

Name Type Description Default
obj object

The object to check.

required

Returns:

Type Description
bool

True if this handler can handle the given object type.

exists() -> bool

Return True if the artifact exists.

Returns:

Type Description
bool

True if the artifact exists.

unlink()

Unlink/delete the artifact.

Returns:

Type Description
None

None

get_handler_type(name: str) -> Type[ArtifactHandler]
get_handler_type_by_object(obj: Any) -> Type[ArtifactHandler]
list_handler_types() -> List[str]

models

JSONType = Union[str, int, float, bool, None, Dict[str, Any], List[Any]] module-attribute
TrackingStore

Abstract class for tracking store.

This class is used to define the interface for tracking store.

__enter__()
__exit__(exc_type, exc_value, traceback)
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment abstractmethod
list_experiments() -> List[Experiment] abstractmethod
get_experiment(experiment_id: int) -> Experiment abstractmethod
get_experiment_by_name(name: str) -> Experiment abstractmethod
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run abstractmethod
delete_run(experiment_id: int, run_id: int) -> None abstractmethod
list_runs(experiment_id: int) -> List[Run] abstractmethod
search_runs(experiment_id: int, **kwargs) -> List[Run] abstractmethod
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags abstractmethod
get_tag(run_id: int, name: str) -> JSONType abstractmethod
get_tags(run_id: int) -> Dict[str, JSONType] abstractmethod
count_tags(run_id: int) -> int abstractmethod
delete_tag(run_id: int, name: str) -> RunTags abstractmethod
log_value(run_id: int, key: str, value: str, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, is_step: Optional[bool] = None) -> Value abstractmethod
log_values(run_id: int, values: List[Union[ValueMapping, ValueTuple, Value]], *, step_id: Optional[int] = None, type: Optional[VariableType] = None) -> List[Value]
get_values(run_id: int) -> List[Tuple[Variable, Value]] abstractmethod
import_store(other: TrackingStore)
TrackingClient
store: TrackingStore property
__init__(store: TrackingStore) -> None
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment
get_experiment_by_name(name: str) -> Optional[Experiment]
get_or_create_experiment(name: str, *, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment
list_experiments()
Experiment

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
name: str instance-attribute
description: Optional[str] instance-attribute
artifact_uri: Optional[str] instance-attribute
start_run(name: str, description: Optional[str] = None) -> Run
search_runs(**kwargs) -> List[Run]
delete_run(run: Union[Run, int]) -> None
TagsMapping

Bases: MutableMapping[str, JSONType]

data: Dict[str, JSONType] property
__init__(run: Run) -> None
__getitem__(key: str) -> JSONType
__setitem__(key: str, value: JSONType) -> None
__delitem__(key: str) -> None
__iter__() -> Iterator[str]
__len__() -> int
__repr__() -> str
Run

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
experiment_id: int instance-attribute
name: str instance-attribute
description: Optional[str] instance-attribute
created_at: Optional[dt.datetime] = None class-attribute instance-attribute
tags: MutableMapping[str, JSONType] = field(init=False) class-attribute instance-attribute
__post_init__()
log_param(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value
log_params(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value]
log_metric(key: str, value: ValueType, *, step: Union[Value, int, None] = None) -> Value
log_metrics(values: Mapping[str, ValueType], *, step: Optional[Value] = None, prefix: Optional[str] = None) -> List[Value]
get_values() -> List[Tuple[Variable, Value]]
Variable

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
experiment_id: int instance-attribute
key: str instance-attribute
parent_id: Optional[int] instance-attribute
type: Optional[VariableType] = None class-attribute instance-attribute
is_step: Optional[bool] = None class-attribute instance-attribute
Value

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
run_id: int instance-attribute
variable_id: int instance-attribute
value: ValueType instance-attribute
timestamp: Optional[dt.datetime] = None class-attribute instance-attribute
step_id: Optional[int] = None class-attribute instance-attribute
RunTags

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
run_id: int instance-attribute
tag_id: int instance-attribute
value: JSONType = None class-attribute instance-attribute
Tag

Bases: StoredModel

id: int = field(init=False) class-attribute instance-attribute
name: str instance-attribute
TreeNode

Bases: UserDict

is_nested: bool = False class-attribute instance-attribute
from_values(values: List[Tuple[Variable, Value]]) -> Self classmethod
flatten() -> Dict[Tuple, List]

sqlalchemy_store

mapper_registry = registry() module-attribute
variable_constraints = (Index('ix_experiment_id_key', Variable.experiment_id, Variable.key, case((Variable.parent_id.is_(None), '<NULL>'), else_=Variable.parent_id), unique=True),) module-attribute
SQLAlchemyModelMixin
init_on_load()
SQLAlchemyTrackingStore

Bases: TrackingStore

SQLAlchemy tracking store.

This class is used to define the interface for tracking store.

lock: Optional[FileLock] = lockfile instance-attribute
engine = create_engine(url) instance-attribute
__init__(url: Union[str, URL] = 'sqlite:///:memory:')
create_all(checkfirst: bool = True)
session()
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment
list_experiments() -> List[Experiment]
get_experiment(experiment_id: int) -> Experiment
get_experiment_by_name(name: str) -> Experiment
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run
delete_run(experiment_id: int, run_id: int) -> None
search_runs(experiment_id: int, expression: Optional[ColumnExpressionArgument[bool]] = None) -> List[Run]
list_runs(experiment_id: int) -> List[Run]
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags
get_tag(run_id: int, name: str) -> JSONType
get_tags(run_id: int) -> Dict[str, JSONType]
count_tags(run_id: int) -> int
delete_tag(run_id: int, name: str) -> RunTags
log_value(run_id: int, key: str, value: ValueType, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, value_id: Optional[int] = None, is_step: Optional[bool] = None) -> Value
get_values(run_id: int) -> List[Tuple[Variable, Value]]
SQLAlchemyStore

store

store_cv = ContextVar('store', default=None) module-attribute
VariableType = Literal['param', 'metric'] module-attribute
ValueType = Union[str, float, int, bool, None] module-attribute
StoredModel

Bases: BaseModel

store: TrackingStore property
__post_init__()
ValueMapping

Bases: TypedDict

key: str instance-attribute
value: ValueType instance-attribute
type: VariableType instance-attribute
step_id: Optional[int] instance-attribute
timestamp: Optional[dt.datetime] instance-attribute
is_step: Optional[bool] instance-attribute
ValueTuple

Bases: NamedTuple

key: str instance-attribute
value: ValueType instance-attribute
type: VariableType instance-attribute
step_id: Optional[int] = None class-attribute instance-attribute
timestamp: Optional[dt.datetime] = None class-attribute instance-attribute
is_step: Optional[bool] = None class-attribute instance-attribute
TrackingStoreMetaClass

Bases: ABCMeta

__new__(name, bases, attrs: Dict[str, Any], **kwargs)
TrackingStore

Abstract class for tracking store.

This class is used to define the interface for tracking store.

__enter__()
__exit__(exc_type, exc_value, traceback)
create_experiment(name: str, description: Optional[str] = None, artifact_uri: Optional[str] = None) -> Experiment abstractmethod
list_experiments() -> List[Experiment] abstractmethod
get_experiment(experiment_id: int) -> Experiment abstractmethod
get_experiment_by_name(name: str) -> Experiment abstractmethod
create_run(experiment_id: int, name: str, description: Optional[str] = None) -> Run abstractmethod
delete_run(experiment_id: int, run_id: int) -> None abstractmethod
list_runs(experiment_id: int) -> List[Run] abstractmethod
search_runs(experiment_id: int, **kwargs) -> List[Run] abstractmethod
set_tag(run_id: int, name: str, value: JSONType = None) -> RunTags abstractmethod
get_tag(run_id: int, name: str) -> JSONType abstractmethod
get_tags(run_id: int) -> Dict[str, JSONType] abstractmethod
count_tags(run_id: int) -> int abstractmethod
delete_tag(run_id: int, name: str) -> RunTags abstractmethod
log_value(run_id: int, key: str, value: str, *, step_id: Optional[int] = None, type: Optional[VariableType] = None, is_step: Optional[bool] = None) -> Value abstractmethod
log_values(run_id: int, values: List[Union[ValueMapping, ValueTuple, Value]], *, step_id: Optional[int] = None, type: Optional[VariableType] = None) -> List[Value]
get_values(run_id: int) -> List[Tuple[Variable, Value]] abstractmethod
import_store(other: TrackingStore)
wrap(method)

typing

T = TypeVar('T') module-attribute

Property

Bases: Generic[T]

__get__(obj, objtype=None) -> T
__set__(obj, value: T) -> None
__delete__(obj)

utils

cache

cache = Cache() module-attribute
Cache
path: Path property
__init__(path: Union[str, Path, None] = None) -> None
cleanup()

collections

EventTarget
__init__(*args, **kwargs)
add_event_listener(type: str, listener: callable)
remove_event_listener(type: str, listener: callable)
dispatch_event(event: str)
MutableCollection

Bases: EventTarget

__init__(*args, **kwargs)
set_parent(parent: MutableCollection) -> MutableCollection
changed()
coerce(value: any)
MutableDict

Bases: MutableCollection, MutableMapping[K, V]

__init__(*args, **kwargs)
__getitem__(key: K) -> V
__setitem__(key: K, value: V)
__delitem__(key: K)
__iter__() -> Generator[K, None, None]
__len__() -> int
__repr__() -> str
MutableList

Bases: MutableCollection, MutableSequence[V]

__init__(*args, **kwargs)
__getitem__(key: int) -> V
__setitem__(key: int, value: V)
insert(index: int, value: V)
__delitem__(key: int)
__len__() -> int
__repr__() -> str
sort(*args, **kwargs) -> None
MutableSet

Bases: MutableCollection, MutableSet[V]

__init__(*args, **kwargs)
add(item: V)
update(*s: Iterable[V]) -> None
discard(item: V)
__contains__(item: V) -> bool
__iter__() -> Generator[V, None, None]
__len__() -> int
__repr__() -> str
flatten(data: Dict[str, Any], *, separator: str = '.', parent_key: Optional[str] = None) -> Dict[Union[str, Tuple[str]], Any]

Flatten a nested dictionary.

Parameters:

Name Type Description Default
data Dict[str, Any]

The nested dictionary to flatten.

required
separator str

The separator, by default "."

'.'
parent_key Optional[str]

The parent key, by default None

None

Returns:

Type Description
Dict[str | tuple[str], Any]

The flattened dictionary.

config

T = TypeVar('T', bound=dataclass) module-attribute
ConfigWrapper
wrapped = wrapped instance-attribute
signature = inspect.signature(wrapped) instance-attribute
filter_keys = filter_keys instance-attribute
name = name instance-attribute
config = config instance-attribute
__init__(wrapped, config: Config, name: Optional[str] = None)
get_params(*args, **kwargs)
__call__(*args, **kwargs)
Config

Bases: MutableMapping

omconf = config if isinstance(config, OmegaConf) else OmegaConf.structured(config) if is_dataclass(config) else OmegaConf.create(config) instance-attribute
__new__(config: Union[Type[T], dict[str, Any]]) -> Union[T, Self]
__new__(config: Type[T]) -> T
__new__(config: dict[str, Any]) -> Self
__init__(config: Any) -> None
__getitem__(name: str) -> Any
__setitem__(name: str, value: Any) -> None
__delitem__(name: str) -> None
__iter__() -> Any
__len__() -> int
__getattr__(name: str) -> Any
wraps(wrapped: Union[type, callable, str, None] = None, **kwargs: Any) -> Union[ConfigWrapper, functools.partial]
wraps(wrapped: Union[type, callable], **kwargs) -> ConfigWrapper
wraps(wrapped: Union[str, None], **kwargs) -> functools.partial
load(path: str) -> Self classmethod
from_dotlist(dotlist: str) -> Self classmethod
from_cli(args: list[str]) -> Self classmethod
configmethod(func: T) -> T

escape

ESCAPED_CHARS = '\\\\`*_{}[]()#+\\-.!' module-attribute
ESCAPED_CHARS_RE = re.compile(f'([{re.escape(ESCAPED_CHARS)}])') module-attribute
UNESCAPED_CHARS_RE = re.compile(f'\\([{re.escape(ESCAPED_CHARS)}])') module-attribute
escape(text: str) -> str

Escape text.

unescape(text: str) -> str

Unescape text.

func

P = ParamSpec('P') module-attribute
T = TypeVar('T') module-attribute
bind(__func: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> Union[functools.partial, Callable[..., T]]

Bind arguments to a function and return a new function.

This function is similar to functools.partial but it allows to bind arguments by name and by position (converting positional arguments to keyword arguments when possible).

Parameters:

Name Type Description Default
__func Callable

The function to bind arguments to.

required
*args args

Positional arguments to bind to the function.

()
**kwargs kwargs

Keyword arguments to bind to the function.

{}

Returns:

Type Description
partial or Callable

A new function with the arguments bound.

hashing

Fast cryptographic hash of Python objects, with a special case for fast hashing of numpy arrays.

Pickler = pickle._Pickler module-attribute
T = TypeVar('T') module-attribute
P = ParamSpec('P') module-attribute
Hashable

Bases: Protocol

A class that can be hashed.

Hasher

Bases: Pickler

A subclass of pickler, to do cryptographic hashing, rather than pickling.

stream = io.BytesIO() instance-attribute
dispatch = Pickler.dispatch.copy() class-attribute instance-attribute
__init__()
hash(obj, return_digest=True)
save(obj)
memoize(obj)
save_global(obj, name=None, pack=struct.pack)
save_set(set_items)
NumpyHasher

Bases: Hasher

Special case the hasher for when numpy is loaded.

coerce_mmap = coerce_mmap instance-attribute
np = np instance-attribute
__init__(coerce_mmap=False)
save(obj)

Subclass the save method, to hash ndarray subclass, rather than pickling them. Off course, this is a total abuse of the Pickler class.

Wrapped

Bases: Generic[T]

cls = cls instance-attribute
__init__(cls: Callable[P, T])
__call__(*args: P.args, **kwargs: P.kwargs) -> T
__getattr__(name: str) -> Any
hash(*obj, coerce_mmap=False)

Quick calculation of a hash to identify uniquely Python objects containing numpy arrays.

Parameters:

Name Type Description Default
obj

The objects to hash

()
coerce_mmap

Make no difference between np.memmap and np.ndarray

False
init_based_hash(cls: Type[T]) -> Wrapped[T]

objects

import_class(cls: str)
create_object(__cls: str, __partial: bool = False, *args, **kwargs)

rsync

RSyncError

Bases: Exception

rsync(src: Path, dest: Path, exclude: Optional[list[str]] = None, ignore_errors: Optional[bool] = False, append_dir: Optional[bool] = False) -> Generator[str, None, None]