mirror of
https://github.com/langchain-ai/datafusion.git
synced 2026-07-01 21:24:06 -04:00
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #https://github.com/apache/datafusion/issues/20251. ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
This commit is contained in:
@@ -18,9 +18,8 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use datafusion::dataframe::DataFrameWriteOptions;
|
||||
use datafusion::error::Result;
|
||||
use datafusion::error::{DataFusionError, Result};
|
||||
use datafusion::prelude::{CsvReadOptions, SessionContext};
|
||||
use datafusion_common::DataFusionError;
|
||||
use tempfile::TempDir;
|
||||
use tokio::fs::create_dir_all;
|
||||
|
||||
|
||||
@@ -18,8 +18,7 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use arrow_schema::SchemaRef;
|
||||
use datafusion::error::Result;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datafusion::error::{DataFusionError, Result};
|
||||
|
||||
pub mod cars;
|
||||
pub mod regex;
|
||||
@@ -50,10 +49,11 @@ impl ExampleDataset {
|
||||
}
|
||||
|
||||
pub fn path_str(&self) -> Result<String> {
|
||||
self.path().to_str().map(String::from).ok_or_else(|| {
|
||||
let path = self.path();
|
||||
path.to_str().map(String::from).ok_or_else(|| {
|
||||
DataFusionError::Execution(format!(
|
||||
"CSV directory path is not valid UTF-8: {}",
|
||||
self.path().display()
|
||||
path.display()
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -20,10 +20,12 @@
|
||||
//! An example group is defined as a directory containing a `main.rs` file
|
||||
//! under the examples root. This module is intentionally filesystem-focused
|
||||
//! and does not perform any parsing or rendering.
|
||||
//! Discovery fails if no valid example groups are found.
|
||||
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use datafusion::common::exec_err;
|
||||
use datafusion::error::Result;
|
||||
|
||||
/// Discovers all example group directories under the given root.
|
||||
@@ -35,10 +37,15 @@ pub fn discover_example_groups(root: &Path) -> Result<Vec<PathBuf>> {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() && path.join("main.rs").exists() {
|
||||
if path.is_dir() && path.join("main.rs").is_file() {
|
||||
groups.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
if groups.is_empty() {
|
||||
return exec_err!("No example groups found under: {}", root.display());
|
||||
}
|
||||
|
||||
groups.sort();
|
||||
Ok(groups)
|
||||
}
|
||||
@@ -47,6 +54,8 @@ pub fn discover_example_groups(root: &Path) -> Result<Vec<PathBuf>> {
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::utils::example_metadata::test_utils::assert_exec_err_contains;
|
||||
|
||||
use std::fs::{self, File};
|
||||
|
||||
use tempfile::TempDir;
|
||||
@@ -66,10 +75,29 @@ mod tests {
|
||||
fs::create_dir(&group2)?;
|
||||
|
||||
let groups = discover_example_groups(root)?;
|
||||
|
||||
assert_eq!(groups.len(), 1);
|
||||
assert_eq!(groups[0], group1);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn discover_example_groups_errors_if_main_rs_is_a_directory() -> Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let root = tmp.path();
|
||||
let group = root.join("group");
|
||||
fs::create_dir(&group)?;
|
||||
fs::create_dir(group.join("main.rs"))?;
|
||||
|
||||
let err = discover_example_groups(root).unwrap_err();
|
||||
assert_exec_err_contains(err, "No example groups found");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn discover_example_groups_errors_if_none_found() -> Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let err = discover_example_groups(tmp.path()).unwrap_err();
|
||||
assert_exec_err_contains(err, "No example groups found");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,16 @@ use std::path::Path;
|
||||
|
||||
use datafusion::error::{DataFusionError, Result};
|
||||
|
||||
use crate::utils::example_metadata::{parse_main_rs_docs, render::ABBREVIATIONS};
|
||||
use crate::utils::example_metadata::parse_main_rs_docs;
|
||||
|
||||
/// Well-known abbreviations used to preserve correct capitalization
|
||||
/// when generating human-readable documentation titles.
|
||||
const ABBREVIATIONS: &[(&str, &str)] = &[
|
||||
("dataframe", "DataFrame"),
|
||||
("io", "IO"),
|
||||
("sql", "SQL"),
|
||||
("udf", "UDF"),
|
||||
];
|
||||
|
||||
/// A group of related examples (e.g. `builtin_functions`, `udf`).
|
||||
///
|
||||
|
||||
@@ -21,15 +21,16 @@
|
||||
//! and their associated metadata (file name and description), enforcing
|
||||
//! a strict ordering and structure to avoid ambiguous documentation.
|
||||
|
||||
use std::path::Path;
|
||||
use std::{collections::HashSet, fs};
|
||||
use std::{collections::HashSet, fs, path::Path};
|
||||
|
||||
use datafusion_common::{DataFusionError, Result};
|
||||
use datafusion::common::exec_err;
|
||||
use datafusion::error::Result;
|
||||
use nom::{
|
||||
IResult, Parser,
|
||||
Err, IResult, Parser,
|
||||
bytes::complete::{tag, take_until, take_while},
|
||||
character::complete::multispace0,
|
||||
combinator::all_consuming,
|
||||
error::{Error, ErrorKind},
|
||||
sequence::{delimited, preceded},
|
||||
};
|
||||
|
||||
@@ -77,19 +78,13 @@ fn parse_metadata_line(input: &str) -> IResult<&str, (&str, &str)> {
|
||||
let content = payload
|
||||
.strip_prefix("(")
|
||||
.and_then(|s| s.strip_suffix(")"))
|
||||
.ok_or_else(|| {
|
||||
nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag))
|
||||
})?;
|
||||
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?;
|
||||
|
||||
let (file, desc) = content
|
||||
.strip_prefix("file:")
|
||||
.ok_or_else(|| {
|
||||
nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag))
|
||||
})?
|
||||
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?
|
||||
.split_once(", desc:")
|
||||
.ok_or_else(|| {
|
||||
nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag))
|
||||
})?;
|
||||
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?;
|
||||
|
||||
Ok((rest, (file.trim(), desc.trim())))
|
||||
}
|
||||
@@ -119,18 +114,16 @@ pub fn parse_main_rs_docs(path: &Path) -> Result<Vec<ExampleEntry>> {
|
||||
let subcommand = match state {
|
||||
ParserState::SeenSubcommand(s) => s,
|
||||
ParserState::Idle => {
|
||||
return Err(DataFusionError::Execution(format!(
|
||||
return exec_err!(
|
||||
"Metadata without preceding subcommand at {}:{}",
|
||||
path.display(),
|
||||
line_no + 1
|
||||
)));
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
if !seen_subcommands.insert(subcommand) {
|
||||
return Err(DataFusionError::Execution(format!(
|
||||
"Duplicate metadata for subcommand `{subcommand}`"
|
||||
)));
|
||||
return exec_err!("Duplicate metadata for subcommand `{subcommand}`");
|
||||
}
|
||||
|
||||
entries.push(ExampleEntry {
|
||||
|
||||
@@ -85,15 +85,6 @@ cargo run --example dataframe -- dataframe
|
||||
```
|
||||
"#;
|
||||
|
||||
/// Well-known abbreviations used to preserve correct capitalization
|
||||
/// when generating human-readable documentation titles.
|
||||
pub const ABBREVIATIONS: &[(&str, &str)] = &[
|
||||
("dataframe", "DataFrame"),
|
||||
("io", "IO"),
|
||||
("sql", "SQL"),
|
||||
("udf", "UDF"),
|
||||
];
|
||||
|
||||
/// Generates Markdown documentation for DataFusion examples.
|
||||
///
|
||||
/// If `group` is `None`, documentation is generated for all example groups.
|
||||
|
||||
Reference in New Issue
Block a user