mirror of
https://github.com/langchain-ai/delta-rs.git
synced 2026-07-01 20:34:35 -04:00
doc: content and formatting improvements to API docs
Signed-off-by: Sam Wright <samuel@plaindocs.com>
This commit is contained in:
committed by
Ethan Urbanski
parent
3042f3266d
commit
dc9c51b878
@@ -4,6 +4,7 @@ search:
|
||||
---
|
||||
|
||||
# Writer
|
||||
|
||||
## Write to Delta Tables
|
||||
|
||||
::: deltalake.write_deltalake
|
||||
@@ -15,6 +16,7 @@ search:
|
||||
::: deltalake.WriterProperties
|
||||
|
||||
## Convert to Delta Tables
|
||||
|
||||
::: deltalake.convert_to_deltalake
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
search:
|
||||
boost: 2
|
||||
boost: 2
|
||||
---
|
||||
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ if TYPE_CHECKING:
|
||||
|
||||
class BaseDeltaStorageHandler:
|
||||
"""
|
||||
BaseDeltaStorageHandler is a concrete implementations of a PyArrow FileSystemHandler.
|
||||
BaseDeltaStorageHandler is a implementations of a PyArrow FileSystemHandler.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
||||
+17
-18
@@ -14,25 +14,24 @@ class QueryBuilder:
|
||||
"""
|
||||
QueryBuilder is an API which exposes Apache DataFusion SQL to Python users of the deltalake library.
|
||||
|
||||
>>> qb = QueryBuilder()
|
||||
```py
|
||||
qb = QueryBuilder()
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._query_builder = PyQueryBuilder()
|
||||
|
||||
def register(self, table_name: str, delta_table: DeltaTable) -> QueryBuilder:
|
||||
"""
|
||||
Add a table to the query builder instance by name. The `table_name`
|
||||
will be how the referenced `DeltaTable` can be referenced in SQL
|
||||
queries.
|
||||
"""Add a table to the query builder instance by name. Table `DeltaTable`
|
||||
is available in SQL queries as `table_name`.
|
||||
|
||||
For example:
|
||||
|
||||
```python
|
||||
from deltalake import DeltaTable, QueryBuilder
|
||||
dt = DeltaTable("my_table")
|
||||
qb = QueryBuilder().register('test', dt)
|
||||
```
|
||||
Example:
|
||||
```py
|
||||
from deltalake import DeltaTable, QueryBuilder
|
||||
dt = DeltaTable("my_table")
|
||||
qb = QueryBuilder().register('test', dt)
|
||||
```
|
||||
"""
|
||||
self._query_builder.register(
|
||||
table_name=table_name,
|
||||
@@ -44,11 +43,11 @@ class QueryBuilder:
|
||||
"""
|
||||
Prepares the sql query to be executed.
|
||||
|
||||
For example:
|
||||
```python
|
||||
from deltalake import DeltaTable, QueryBuilder
|
||||
dt = DeltaTable("my_table")
|
||||
data = QueryBuilder().register('test', dt).execute("select * from test").read_all()
|
||||
```
|
||||
Example:
|
||||
```py
|
||||
from deltalake import DeltaTable, QueryBuilder
|
||||
dt = DeltaTable("my_table")
|
||||
data = QueryBuilder().register('test', dt).execute("select * from test").read_all()
|
||||
```
|
||||
"""
|
||||
return self._query_builder.execute(sql)
|
||||
|
||||
+23
-17
@@ -117,7 +117,7 @@ class Metadata:
|
||||
@property
|
||||
def created_time(self) -> int:
|
||||
"""
|
||||
Return The time when this metadata action is created, in milliseconds since the Unix epoch of the DeltaTable.
|
||||
Return The time when this metadata instance was created, in milliseconds since the Unix epoch of the DeltaTable.
|
||||
"""
|
||||
return self._metadata.created_time
|
||||
|
||||
@@ -330,7 +330,10 @@ class DeltaTable:
|
||||
partition_filters: list[tuple[str, str, Any]] | None = None,
|
||||
) -> list[dict[str, str]]:
|
||||
"""
|
||||
Returns the partitions as a list of dicts. Example: `[{'month': '1', 'year': '2020', 'day': '1'}, ...]`
|
||||
Returns the partitions as a list of dicts.
|
||||
|
||||
Example:
|
||||
`[{'month': '1', 'year': '2020', 'day': '1'}, ...]`
|
||||
|
||||
Args:
|
||||
partition_filters: The partition filters that will be used for getting the matched partitions, defaults to `None` (no filtering).
|
||||
@@ -361,14 +364,17 @@ class DeltaTable:
|
||||
Returns:
|
||||
list of the .parquet files with an absolute URI referenced for the current version of the DeltaTable
|
||||
|
||||
Predicates are expressed in disjunctive normal form (DNF), like [("x", "=", "a"), ...].
|
||||
Predicates are expressed in disjunctive normal form (DNF), like `[("x", "=", "a"), ...]`.
|
||||
DNF allows arbitrary boolean logical combinations of single partition predicates.
|
||||
The innermost tuples each describe a single partition predicate. The list of inner
|
||||
predicates is interpreted as a conjunction (AND), forming a more selective and
|
||||
multiple partition predicates. Each tuple has format: (key, op, value) and compares
|
||||
the key with the value. The supported op are: `=`, `!=`, `in`, and `not in`. If
|
||||
the op is in or not in, the value must be a collection such as a list, a set or a tuple.
|
||||
The supported type for value is str. Use empty string `''` for Null partition value.
|
||||
multiple partition predicates.
|
||||
|
||||
Each tuple has format: `(key, op, value)` and compares the key with the value.
|
||||
|
||||
The supported op are: `=`, `!=`, `in`, and `not in`. If the op is `in` or `not in`,
|
||||
the value must be a collection such as a list, a set or a tuple.
|
||||
The supported type for value is `str`. Use empty string `''` for Null partition value.
|
||||
|
||||
Example:
|
||||
```
|
||||
@@ -888,7 +894,7 @@ class DeltaTable:
|
||||
commit_properties: CommitProperties | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Run the Restore command on the Delta Table: restore table to a given version or datetime.
|
||||
Restores table to a given version or datetime.
|
||||
|
||||
Args:
|
||||
target: the expected version will restore, which represented by int, date str or datetime.
|
||||
@@ -940,9 +946,9 @@ class DeltaTable:
|
||||
More info: https://arrow.apache.org/docs/python/generated/pyarrow.dataset.ParquetReadOptions.html
|
||||
|
||||
Example:
|
||||
``deltalake`` will work with any storage compliant with :class:`pyarrow.fs.FileSystem`, however the root of the filesystem has
|
||||
``deltalake`` will work with any storage compliant with [pyarrow.fs.FileSystem][pyarrow.fs.FileSystem], however the root of the filesystem has
|
||||
to be adjusted to point at the root of the Delta table. We can achieve this by wrapping the custom filesystem into
|
||||
a :class:`pyarrow.fs.SubTreeFileSystem`.
|
||||
a [pyarrow.fs.SubTreeFileSystem][pyarrow.fs.SubTreeFileSystem].
|
||||
```
|
||||
import pyarrow.fs as fs
|
||||
from deltalake import DeltaTable
|
||||
@@ -1672,7 +1678,7 @@ class TableMerger:
|
||||
TableMerger: TableMerger Object
|
||||
|
||||
Example:
|
||||
** Insert all columns **
|
||||
**Insert all columns**
|
||||
|
||||
```python
|
||||
from deltalake import DeltaTable, write_deltalake
|
||||
@@ -2143,11 +2149,11 @@ class TableAlterer:
|
||||
|
||||
If the column name doesn't exist in the schema - an error is raised.
|
||||
|
||||
:param column: name of the column to update metadata for.
|
||||
:param metadata: the metadata to be added or modified on the column.
|
||||
:param commit_properties: properties of the transaction commit. If None, default values are used.
|
||||
:param post_commithook_properties: properties for the post commit hook. If None, default values are used.
|
||||
:return:
|
||||
Args:
|
||||
column: name of the column to update metadata for.
|
||||
metadata: the metadata to be added or modified on the column.
|
||||
commit_properties: properties of the transaction commit. If None, default values are used.
|
||||
post_commithook_properties: properties for the post commit hook. If None, default values are used.
|
||||
"""
|
||||
commit_properties, post_commithook_properties = (
|
||||
deprecate_positional_commit_args(
|
||||
@@ -2163,7 +2169,7 @@ class TableAlterer:
|
||||
|
||||
|
||||
class TableOptimizer:
|
||||
"""API for various table optimization commands."""
|
||||
"""API for table optimization commands."""
|
||||
|
||||
def __init__(self, table: DeltaTable) -> None:
|
||||
self.table = table
|
||||
|
||||
@@ -96,7 +96,7 @@ def write_deltalake(
|
||||
description: User-provided description for this table.
|
||||
configuration: A map containing configuration options for the metadata action.
|
||||
schema_mode: If set to "overwrite", allows replacing the schema of the table. Set to "merge" to merge with existing schema.
|
||||
storage_options: options passed to the native delta filesystem.
|
||||
storage_options: Options passed to the native delta filesystem.
|
||||
predicate: When using `Overwrite` mode, replace data that matches a predicate.'
|
||||
target_file_size: Override for target file size for data files written to the delta table. If not passed, it's taken from `delta.targetFileSize`.
|
||||
writer_properties: Pass writer properties to the Rust parquet writer.
|
||||
|
||||
@@ -740,16 +740,20 @@ pub fn schema_to_pyobject(
|
||||
|
||||
/// A Delta Lake schema
|
||||
///
|
||||
/// Create using a list of :class:`Field`:
|
||||
/// Create using a list of [deltalake.Field][deltalake.Field]:
|
||||
///
|
||||
/// >>> Schema([Field("x", "integer"), Field("y", "string")])
|
||||
/// ```python
|
||||
/// Schema([Field("x", "integer"), Field("y", "string")])
|
||||
/// Schema([Field(x, PrimitiveType("integer"), nullable=True), Field(y, PrimitiveType("string"), nullable=True)])
|
||||
/// ```
|
||||
///
|
||||
/// Or create from a PyArrow schema:
|
||||
/// Or create from a [pyarrow.Schema][pyarrow.Schema]:
|
||||
///
|
||||
/// >>> from arro3.core import DateType, Schema as ArrowSchema
|
||||
/// >>> Schema.from_pyarrow(ArrowSchema({"x": DateType.int32(), "y": DateType.string()}))
|
||||
/// ```py
|
||||
/// from arro3.core import DateType, Schema as ArrowSchema
|
||||
/// Schema.from_pyarrow(ArrowSchema({"x": DateType.int32(), "y": DateType.string()}))
|
||||
/// Schema([Field(x, PrimitiveType("integer"), nullable=True), Field(y, PrimitiveType("string"), nullable=True)])
|
||||
/// ```
|
||||
#[pyclass(extends = StructType, name = "Schema", module = "deltalake._internal")]
|
||||
pub struct PySchema;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user