doc: content and formatting improvements to API docs

Signed-off-by: Sam Wright <samuel@plaindocs.com>
This commit is contained in:
Sam Wright
2026-03-22 12:13:46 +02:00
committed by Ethan Urbanski
parent 3042f3266d
commit dc9c51b878
7 changed files with 54 additions and 43 deletions
+2
View File
@@ -4,6 +4,7 @@ search:
---
# Writer
## Write to Delta Tables
::: deltalake.write_deltalake
@@ -15,6 +16,7 @@ search:
::: deltalake.WriterProperties
## Convert to Delta Tables
::: deltalake.convert_to_deltalake
+1 -1
View File
@@ -1,6 +1,6 @@
---
search:
boost: 2
boost: 2
---
+1 -1
View File
@@ -13,7 +13,7 @@ if TYPE_CHECKING:
class BaseDeltaStorageHandler:
"""
BaseDeltaStorageHandler is a concrete implementations of a PyArrow FileSystemHandler.
BaseDeltaStorageHandler is a implementations of a PyArrow FileSystemHandler.
"""
def __init__(
+17 -18
View File
@@ -14,25 +14,24 @@ class QueryBuilder:
"""
QueryBuilder is an API which exposes Apache DataFusion SQL to Python users of the deltalake library.
>>> qb = QueryBuilder()
```py
qb = QueryBuilder()
```
"""
def __init__(self) -> None:
self._query_builder = PyQueryBuilder()
def register(self, table_name: str, delta_table: DeltaTable) -> QueryBuilder:
"""
Add a table to the query builder instance by name. The `table_name`
will be how the referenced `DeltaTable` can be referenced in SQL
queries.
"""Add a table to the query builder instance by name. Table `DeltaTable`
is available in SQL queries as `table_name`.
For example:
```python
from deltalake import DeltaTable, QueryBuilder
dt = DeltaTable("my_table")
qb = QueryBuilder().register('test', dt)
```
Example:
```py
from deltalake import DeltaTable, QueryBuilder
dt = DeltaTable("my_table")
qb = QueryBuilder().register('test', dt)
```
"""
self._query_builder.register(
table_name=table_name,
@@ -44,11 +43,11 @@ class QueryBuilder:
"""
Prepares the sql query to be executed.
For example:
```python
from deltalake import DeltaTable, QueryBuilder
dt = DeltaTable("my_table")
data = QueryBuilder().register('test', dt).execute("select * from test").read_all()
```
Example:
```py
from deltalake import DeltaTable, QueryBuilder
dt = DeltaTable("my_table")
data = QueryBuilder().register('test', dt).execute("select * from test").read_all()
```
"""
return self._query_builder.execute(sql)
+23 -17
View File
@@ -117,7 +117,7 @@ class Metadata:
@property
def created_time(self) -> int:
"""
Return The time when this metadata action is created, in milliseconds since the Unix epoch of the DeltaTable.
Return The time when this metadata instance was created, in milliseconds since the Unix epoch of the DeltaTable.
"""
return self._metadata.created_time
@@ -330,7 +330,10 @@ class DeltaTable:
partition_filters: list[tuple[str, str, Any]] | None = None,
) -> list[dict[str, str]]:
"""
Returns the partitions as a list of dicts. Example: `[{'month': '1', 'year': '2020', 'day': '1'}, ...]`
Returns the partitions as a list of dicts.
Example:
`[{'month': '1', 'year': '2020', 'day': '1'}, ...]`
Args:
partition_filters: The partition filters that will be used for getting the matched partitions, defaults to `None` (no filtering).
@@ -361,14 +364,17 @@ class DeltaTable:
Returns:
list of the .parquet files with an absolute URI referenced for the current version of the DeltaTable
Predicates are expressed in disjunctive normal form (DNF), like [("x", "=", "a"), ...].
Predicates are expressed in disjunctive normal form (DNF), like `[("x", "=", "a"), ...]`.
DNF allows arbitrary boolean logical combinations of single partition predicates.
The innermost tuples each describe a single partition predicate. The list of inner
predicates is interpreted as a conjunction (AND), forming a more selective and
multiple partition predicates. Each tuple has format: (key, op, value) and compares
the key with the value. The supported op are: `=`, `!=`, `in`, and `not in`. If
the op is in or not in, the value must be a collection such as a list, a set or a tuple.
The supported type for value is str. Use empty string `''` for Null partition value.
multiple partition predicates.
Each tuple has format: `(key, op, value)` and compares the key with the value.
The supported op are: `=`, `!=`, `in`, and `not in`. If the op is `in` or `not in`,
the value must be a collection such as a list, a set or a tuple.
The supported type for value is `str`. Use empty string `''` for Null partition value.
Example:
```
@@ -888,7 +894,7 @@ class DeltaTable:
commit_properties: CommitProperties | None = None,
) -> dict[str, Any]:
"""
Run the Restore command on the Delta Table: restore table to a given version or datetime.
Restores table to a given version or datetime.
Args:
target: the expected version will restore, which represented by int, date str or datetime.
@@ -940,9 +946,9 @@ class DeltaTable:
More info: https://arrow.apache.org/docs/python/generated/pyarrow.dataset.ParquetReadOptions.html
Example:
``deltalake`` will work with any storage compliant with :class:`pyarrow.fs.FileSystem`, however the root of the filesystem has
``deltalake`` will work with any storage compliant with [pyarrow.fs.FileSystem][pyarrow.fs.FileSystem], however the root of the filesystem has
to be adjusted to point at the root of the Delta table. We can achieve this by wrapping the custom filesystem into
a :class:`pyarrow.fs.SubTreeFileSystem`.
a [pyarrow.fs.SubTreeFileSystem][pyarrow.fs.SubTreeFileSystem].
```
import pyarrow.fs as fs
from deltalake import DeltaTable
@@ -1672,7 +1678,7 @@ class TableMerger:
TableMerger: TableMerger Object
Example:
** Insert all columns **
**Insert all columns**
```python
from deltalake import DeltaTable, write_deltalake
@@ -2143,11 +2149,11 @@ class TableAlterer:
If the column name doesn't exist in the schema - an error is raised.
:param column: name of the column to update metadata for.
:param metadata: the metadata to be added or modified on the column.
:param commit_properties: properties of the transaction commit. If None, default values are used.
:param post_commithook_properties: properties for the post commit hook. If None, default values are used.
:return:
Args:
column: name of the column to update metadata for.
metadata: the metadata to be added or modified on the column.
commit_properties: properties of the transaction commit. If None, default values are used.
post_commithook_properties: properties for the post commit hook. If None, default values are used.
"""
commit_properties, post_commithook_properties = (
deprecate_positional_commit_args(
@@ -2163,7 +2169,7 @@ class TableAlterer:
class TableOptimizer:
"""API for various table optimization commands."""
"""API for table optimization commands."""
def __init__(self, table: DeltaTable) -> None:
self.table = table
+1 -1
View File
@@ -96,7 +96,7 @@ def write_deltalake(
description: User-provided description for this table.
configuration: A map containing configuration options for the metadata action.
schema_mode: If set to "overwrite", allows replacing the schema of the table. Set to "merge" to merge with existing schema.
storage_options: options passed to the native delta filesystem.
storage_options: Options passed to the native delta filesystem.
predicate: When using `Overwrite` mode, replace data that matches a predicate.'
target_file_size: Override for target file size for data files written to the delta table. If not passed, it's taken from `delta.targetFileSize`.
writer_properties: Pass writer properties to the Rust parquet writer.
+9 -5
View File
@@ -740,16 +740,20 @@ pub fn schema_to_pyobject(
/// A Delta Lake schema
///
/// Create using a list of :class:`Field`:
/// Create using a list of [deltalake.Field][deltalake.Field]:
///
/// >>> Schema([Field("x", "integer"), Field("y", "string")])
/// ```python
/// Schema([Field("x", "integer"), Field("y", "string")])
/// Schema([Field(x, PrimitiveType("integer"), nullable=True), Field(y, PrimitiveType("string"), nullable=True)])
/// ```
///
/// Or create from a PyArrow schema:
/// Or create from a [pyarrow.Schema][pyarrow.Schema]:
///
/// >>> from arro3.core import DateType, Schema as ArrowSchema
/// >>> Schema.from_pyarrow(ArrowSchema({"x": DateType.int32(), "y": DateType.string()}))
/// ```py
/// from arro3.core import DateType, Schema as ArrowSchema
/// Schema.from_pyarrow(ArrowSchema({"x": DateType.int32(), "y": DateType.string()}))
/// Schema([Field(x, PrimitiveType("integer"), nullable=True), Field(y, PrimitiveType("string"), nullable=True)])
/// ```
#[pyclass(extends = StructType, name = "Schema", module = "deltalake._internal")]
pub struct PySchema;