mirror of
https://github.com/langchain-ai/arrow-rs.git
synced 2026-07-01 21:34:01 -04:00
Define a "arrow-pyrarrow" crate to implement the "pyarrow" feature. (#7694)
This follows the pattern of other parts of the arrow-rs codebase: arrow-array, arrow-schema, etc. With this change, polyglot codebases can use pyarrow without making all their crates that use arrow pull in pyarrow (& pyo3). It also allows interfacing with PyArrow without pulling in Arrow. # Which issue does this PR close? Closes https://github.com/apache/arrow-rs/issues/7668. # Rationale for this change Part of a codebase can use pyarrow without arrow pulling in pyo3 across the codebase. # Are there any user-facing changes? Nope.
This commit is contained in:
@@ -41,6 +41,7 @@ on:
|
||||
- arrow-avro/**
|
||||
- arrow-ord/**
|
||||
- arrow-pyarrow-integration-testing/**
|
||||
- arrow-pyarrow/**
|
||||
- arrow-schema/**
|
||||
- arrow-select/**
|
||||
- arrow-sort/**
|
||||
@@ -164,7 +165,7 @@ jobs:
|
||||
- name: Run Rust tests
|
||||
run: |
|
||||
source venv/bin/activate
|
||||
cargo test -p arrow --test pyarrow --features pyarrow
|
||||
cargo test -p arrow-pyarrow
|
||||
- name: Run tests
|
||||
run: |
|
||||
source venv/bin/activate
|
||||
|
||||
@@ -51,7 +51,8 @@ jobs:
|
||||
run: |
|
||||
# do not produce debug symbols to keep memory usage down
|
||||
export RUSTFLAGS="-C debuginfo=0"
|
||||
cargo test
|
||||
# PyArrow tests happen in integration.yml.
|
||||
cargo test --workspace --exclude arrow-pyarrow
|
||||
|
||||
|
||||
# Check workspace wide compile and test with default features for
|
||||
@@ -83,7 +84,8 @@ jobs:
|
||||
# do not produce debug symbols to keep memory usage down
|
||||
export RUSTFLAGS="-C debuginfo=0"
|
||||
export PATH=$PATH:/d/protoc/bin
|
||||
cargo test
|
||||
# PyArrow tests happen in integration.yml.
|
||||
cargo test --workspace --exclude arrow-pyarrow
|
||||
|
||||
|
||||
# Run cargo fmt for all crates
|
||||
|
||||
@@ -33,6 +33,7 @@ members = [
|
||||
"arrow-ipc",
|
||||
"arrow-json",
|
||||
"arrow-ord",
|
||||
"arrow-pyarrow",
|
||||
"arrow-row",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
@@ -88,6 +89,7 @@ arrow-data = { version = "55.1.0", path = "./arrow-data" }
|
||||
arrow-ipc = { version = "55.1.0", path = "./arrow-ipc" }
|
||||
arrow-json = { version = "55.1.0", path = "./arrow-json" }
|
||||
arrow-ord = { version = "55.1.0", path = "./arrow-ord" }
|
||||
arrow-pyarrow = { version = "55.1.0", path = "./arrow-pyarrow" }
|
||||
arrow-row = { version = "55.1.0", path = "./arrow-row" }
|
||||
arrow-schema = { version = "55.1.0", path = "./arrow-schema" }
|
||||
arrow-select = { version = "55.1.0", path = "./arrow-select" }
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
[package]
|
||||
name = "arrow-pyarrow"
|
||||
version = { workspace = true }
|
||||
description = "Pyarrow bindings"
|
||||
homepage = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
authors = { workspace = true }
|
||||
license = { workspace = true }
|
||||
keywords = { workspace = true }
|
||||
include = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
|
||||
[lib]
|
||||
name = "arrow_pyarrow"
|
||||
bench = false
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
all-features = true
|
||||
|
||||
[dependencies]
|
||||
arrow-array = { workspace = true, features = ["ffi"] }
|
||||
arrow-data = { workspace = true }
|
||||
arrow-schema = { workspace = true }
|
||||
pyo3 = { version = "0.24.1", default-features = false }
|
||||
@@ -60,7 +60,15 @@ use std::convert::{From, TryFrom};
|
||||
use std::ptr::{addr_of, addr_of_mut};
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{RecordBatchIterator, RecordBatchOptions, RecordBatchReader, StructArray};
|
||||
use arrow_array::ffi;
|
||||
use arrow_array::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
|
||||
use arrow_array::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
|
||||
use arrow_array::{
|
||||
make_array, RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader,
|
||||
StructArray,
|
||||
};
|
||||
use arrow_data::ArrayData;
|
||||
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
||||
use pyo3::exceptions::{PyTypeError, PyValueError};
|
||||
use pyo3::ffi::Py_uintptr_t;
|
||||
use pyo3::import_exception;
|
||||
@@ -68,14 +76,6 @@ use pyo3::prelude::*;
|
||||
use pyo3::pybacked::PyBackedStr;
|
||||
use pyo3::types::{PyCapsule, PyList, PyTuple};
|
||||
|
||||
use crate::array::{make_array, ArrayData};
|
||||
use crate::datatypes::{DataType, Field, Schema};
|
||||
use crate::error::ArrowError;
|
||||
use crate::ffi;
|
||||
use crate::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
|
||||
use crate::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
|
||||
use crate::record_batch::RecordBatch;
|
||||
|
||||
import_exception!(pyarrow, ArrowException);
|
||||
/// Represents an exception raised by PyArrow.
|
||||
pub type PyArrowException = ArrowException;
|
||||
@@ -15,11 +15,11 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
use arrow::array::{ArrayRef, Int32Array, StringArray};
|
||||
use arrow::pyarrow::{FromPyArrow, ToPyArrow};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_array::builder::{BinaryViewBuilder, StringViewBuilder};
|
||||
use arrow_array::{Array, BinaryViewArray, StringViewArray};
|
||||
use arrow_array::{
|
||||
Array, ArrayRef, BinaryViewArray, Int32Array, RecordBatch, StringArray, StringViewArray,
|
||||
};
|
||||
use arrow_pyarrow::{FromPyArrow, ToPyArrow};
|
||||
use pyo3::Python;
|
||||
use std::sync::Arc;
|
||||
|
||||
+2
-6
@@ -48,13 +48,13 @@ arrow-data = { workspace = true }
|
||||
arrow-ipc = { workspace = true, optional = true }
|
||||
arrow-json = { workspace = true, optional = true }
|
||||
arrow-ord = { workspace = true }
|
||||
arrow-pyarrow = { workspace = true, optional = true }
|
||||
arrow-row = { workspace = true }
|
||||
arrow-schema = { workspace = true }
|
||||
arrow-select = { workspace = true }
|
||||
arrow-string = { workspace = true }
|
||||
|
||||
rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"], optional = true }
|
||||
pyo3 = { version = "0.24.1", default-features = false, optional = true }
|
||||
half = { version = "2.1", default-features = false, optional = true }
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
@@ -72,7 +72,7 @@ prettyprint = ["arrow-cast/prettyprint"]
|
||||
# an optional dependency for supporting compile to wasm32-unknown-unknown
|
||||
# target without assuming an environment containing JavaScript.
|
||||
test_utils = ["dep:rand", "dep:half"]
|
||||
pyarrow = ["pyo3", "ffi"]
|
||||
pyarrow = ["ffi", "dep:arrow-pyarrow"]
|
||||
# force_validate runs full data validation for all arrays that are created
|
||||
# this is not enabled by default as it is too computationally expensive
|
||||
# but is run as part of our CI checks
|
||||
@@ -299,10 +299,6 @@ required-features = ["test_utils"]
|
||||
name = "csv"
|
||||
required-features = ["csv", "chrono-tz"]
|
||||
|
||||
[[test]]
|
||||
name = "pyarrow"
|
||||
required-features = ["pyarrow"]
|
||||
|
||||
[[test]]
|
||||
name = "array_cast"
|
||||
required-features = ["chrono-tz", "prettyprint"]
|
||||
|
||||
+1
-1
@@ -397,7 +397,7 @@ pub use arrow_ipc as ipc;
|
||||
#[cfg(feature = "json")]
|
||||
pub use arrow_json as json;
|
||||
#[cfg(feature = "pyarrow")]
|
||||
pub mod pyarrow;
|
||||
pub use arrow_pyarrow as pyarrow;
|
||||
|
||||
/// Contains the `RecordBatch` type and associated traits
|
||||
pub mod record_batch {
|
||||
|
||||
Reference in New Issue
Block a user