some api improvements + remove manual changelog (#12)

This commit is contained in:
Matthew Cramerus
2025-01-03 09:47:07 -06:00
committed by GitHub
parent da8ed33b5b
commit a739bc5301
3 changed files with 16 additions and 35 deletions
-10
View File
@@ -1,10 +0,0 @@
# Changelog
## 0.1.0
### Added
* v0.1.0 `FileMetadata` table, used for tracking object storage metadata.
* v0.1.0 `RowMetadataRegistry` and `RowMetadataSource`, an abstraction layer for metadata used in incremental view maintenance.
* v0.1.0 `ListingTableLike` and `Materialized` traits, an API for describing Hive-partitioned tables in object storage.
* v0.1.0 `mv_dependencies` and `stale_files` UDTFs, the core features for incremental view maintenance.
+4 -22
View File
@@ -842,13 +842,12 @@ mod test {
use datafusion_common::{Column, Result, ScalarValue};
use datafusion_expr::{Expr, JoinType, LogicalPlan, TableType};
use datafusion_physical_plan::ExecutionPlan;
use datafusion_sql::TableReference;
use itertools::Itertools;
use crate::materialized::{
dependencies::pushdown_projection_inexact,
register_materialized,
row_metadata::{ObjectStoreRowMetadataSource, RowMetadataRegistry, RowMetadataSource},
row_metadata::{ObjectStoreRowMetadataSource, RowMetadataRegistry},
ListingTableLike, Materialized,
};
@@ -1005,31 +1004,14 @@ mod test {
.collect()
.await?;
let row_metadata_registry = Arc::new(RowMetadataRegistry::default());
let t1_ref = TableReference::parse_str("t1").resolve(
&ctx.state().config_options().catalog.default_catalog,
&ctx.state().config_options().catalog.default_schema,
);
let t2_ref = TableReference::parse_str("t2").resolve(
&ctx.state().config_options().catalog.default_catalog,
&ctx.state().config_options().catalog.default_schema,
);
let t3_ref = TableReference::parse_str("t3").resolve(
&ctx.state().config_options().catalog.default_catalog,
&ctx.state().config_options().catalog.default_schema,
);
let metadata_table = ctx.table_provider("file_metadata").await?;
let object_store_metadata_source = Arc::new(
ObjectStoreRowMetadataSource::with_file_metadata(Arc::clone(&metadata_table)),
);
for r in [t1_ref, t2_ref, t3_ref] {
row_metadata_registry.register_source(
&r,
Arc::clone(&object_store_metadata_source) as Arc<dyn RowMetadataSource>,
);
}
let row_metadata_registry = Arc::new(RowMetadataRegistry::new_with_default_source(
object_store_metadata_source,
));
ctx.register_udtf(
"mv_dependencies",
+12 -3
View File
@@ -27,7 +27,6 @@ use super::{file_metadata::FileMetadata, hive_partition::hive_partition, META_CO
/// Registry that manages metadata sources for different tables.
/// Provides a centralized way to register and retrieve metadata sources
/// that can be used to obtain row-level metadata for tables.
#[derive(Default)]
pub struct RowMetadataRegistry {
metadata_sources: DashMap<String, Arc<dyn RowMetadataSource>>,
default_source: Option<Arc<dyn RowMetadataSource>>,
@@ -49,14 +48,24 @@ impl std::fmt::Debug for RowMetadataRegistry {
}
impl RowMetadataRegistry {
/// Initializes this `RowMetadataRegistry` with a default `RowMetadataSource`
/// Initializes this `RowMetadataRegistry` with a default [`RowMetadataSource`]
/// to be used if a table has not been explicitly registered with a specific source.
///
/// Typically the [`FileMetadata`] source should be used as the default.
pub fn new_with_default_source(default_source: Arc<dyn RowMetadataSource>) -> Self {
Self {
metadata_sources: Default::default(),
default_source: Some(default_source),
..Default::default()
}
}
/// Initializes a new `RowMetadataRegistry` with no default [`RowMetadataSource`].
///
/// Users should typically use [`RowMetadataRegistry::new_with_default_source`].
pub fn new_empty() -> Self {
Self {
metadata_sources: Default::default(),
default_source: None,
}
}