Define a "arrow-pyrarrow" crate to implement the "pyarrow" feature. (#7694)

This follows the pattern of other parts of the arrow-rs codebase:
arrow-array, arrow-schema, etc.

With this change, polyglot codebases can use pyarrow without making all
their crates that use arrow pull in pyarrow (& pyo3).

It also allows interfacing with PyArrow without pulling in Arrow.

# Which issue does this PR close?

Closes https://github.com/apache/arrow-rs/issues/7668.

# Rationale for this change

Part of a codebase can use pyarrow without arrow pulling in pyo3 across
the codebase.

# Are there any user-facing changes?

Nope.
This commit is contained in:
Bruno
2025-06-20 21:55:10 +02:00
committed by GitHub
parent fbaf7cea2d
commit 469c7ee177
8 changed files with 66 additions and 23 deletions
+2 -1
View File
@@ -41,6 +41,7 @@ on:
- arrow-avro/**
- arrow-ord/**
- arrow-pyarrow-integration-testing/**
- arrow-pyarrow/**
- arrow-schema/**
- arrow-select/**
- arrow-sort/**
@@ -164,7 +165,7 @@ jobs:
- name: Run Rust tests
run: |
source venv/bin/activate
cargo test -p arrow --test pyarrow --features pyarrow
cargo test -p arrow-pyarrow
- name: Run tests
run: |
source venv/bin/activate
+4 -2
View File
@@ -51,7 +51,8 @@ jobs:
run: |
# do not produce debug symbols to keep memory usage down
export RUSTFLAGS="-C debuginfo=0"
cargo test
# PyArrow tests happen in integration.yml.
cargo test --workspace --exclude arrow-pyarrow
# Check workspace wide compile and test with default features for
@@ -83,7 +84,8 @@ jobs:
# do not produce debug symbols to keep memory usage down
export RUSTFLAGS="-C debuginfo=0"
export PATH=$PATH:/d/protoc/bin
cargo test
# PyArrow tests happen in integration.yml.
cargo test --workspace --exclude arrow-pyarrow
# Run cargo fmt for all crates
+2
View File
@@ -33,6 +33,7 @@ members = [
"arrow-ipc",
"arrow-json",
"arrow-ord",
"arrow-pyarrow",
"arrow-row",
"arrow-schema",
"arrow-select",
@@ -88,6 +89,7 @@ arrow-data = { version = "55.1.0", path = "./arrow-data" }
arrow-ipc = { version = "55.1.0", path = "./arrow-ipc" }
arrow-json = { version = "55.1.0", path = "./arrow-json" }
arrow-ord = { version = "55.1.0", path = "./arrow-ord" }
arrow-pyarrow = { version = "55.1.0", path = "./arrow-pyarrow" }
arrow-row = { version = "55.1.0", path = "./arrow-row" }
arrow-schema = { version = "55.1.0", path = "./arrow-schema" }
arrow-select = { version = "55.1.0", path = "./arrow-select" }
+42
View File
@@ -0,0 +1,42 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
[package]
name = "arrow-pyarrow"
version = { workspace = true }
description = "Pyarrow bindings"
homepage = { workspace = true }
repository = { workspace = true }
authors = { workspace = true }
license = { workspace = true }
keywords = { workspace = true }
include = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
[lib]
name = "arrow_pyarrow"
bench = false
[package.metadata.docs.rs]
all-features = true
[dependencies]
arrow-array = { workspace = true, features = ["ffi"] }
arrow-data = { workspace = true }
arrow-schema = { workspace = true }
pyo3 = { version = "0.24.1", default-features = false }
@@ -60,7 +60,15 @@ use std::convert::{From, TryFrom};
use std::ptr::{addr_of, addr_of_mut};
use std::sync::Arc;
use arrow_array::{RecordBatchIterator, RecordBatchOptions, RecordBatchReader, StructArray};
use arrow_array::ffi;
use arrow_array::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
use arrow_array::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
use arrow_array::{
make_array, RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader,
StructArray,
};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field, Schema};
use pyo3::exceptions::{PyTypeError, PyValueError};
use pyo3::ffi::Py_uintptr_t;
use pyo3::import_exception;
@@ -68,14 +76,6 @@ use pyo3::prelude::*;
use pyo3::pybacked::PyBackedStr;
use pyo3::types::{PyCapsule, PyList, PyTuple};
use crate::array::{make_array, ArrayData};
use crate::datatypes::{DataType, Field, Schema};
use crate::error::ArrowError;
use crate::ffi;
use crate::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
use crate::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
use crate::record_batch::RecordBatch;
import_exception!(pyarrow, ArrowException);
/// Represents an exception raised by PyArrow.
pub type PyArrowException = ArrowException;
@@ -15,11 +15,11 @@
// specific language governing permissions and limitations
// under the License.
use arrow::array::{ArrayRef, Int32Array, StringArray};
use arrow::pyarrow::{FromPyArrow, ToPyArrow};
use arrow::record_batch::RecordBatch;
use arrow_array::builder::{BinaryViewBuilder, StringViewBuilder};
use arrow_array::{Array, BinaryViewArray, StringViewArray};
use arrow_array::{
Array, ArrayRef, BinaryViewArray, Int32Array, RecordBatch, StringArray, StringViewArray,
};
use arrow_pyarrow::{FromPyArrow, ToPyArrow};
use pyo3::Python;
use std::sync::Arc;
+2 -6
View File
@@ -48,13 +48,13 @@ arrow-data = { workspace = true }
arrow-ipc = { workspace = true, optional = true }
arrow-json = { workspace = true, optional = true }
arrow-ord = { workspace = true }
arrow-pyarrow = { workspace = true, optional = true }
arrow-row = { workspace = true }
arrow-schema = { workspace = true }
arrow-select = { workspace = true }
arrow-string = { workspace = true }
rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"], optional = true }
pyo3 = { version = "0.24.1", default-features = false, optional = true }
half = { version = "2.1", default-features = false, optional = true }
[package.metadata.docs.rs]
@@ -72,7 +72,7 @@ prettyprint = ["arrow-cast/prettyprint"]
# an optional dependency for supporting compile to wasm32-unknown-unknown
# target without assuming an environment containing JavaScript.
test_utils = ["dep:rand", "dep:half"]
pyarrow = ["pyo3", "ffi"]
pyarrow = ["ffi", "dep:arrow-pyarrow"]
# force_validate runs full data validation for all arrays that are created
# this is not enabled by default as it is too computationally expensive
# but is run as part of our CI checks
@@ -299,10 +299,6 @@ required-features = ["test_utils"]
name = "csv"
required-features = ["csv", "chrono-tz"]
[[test]]
name = "pyarrow"
required-features = ["pyarrow"]
[[test]]
name = "array_cast"
required-features = ["chrono-tz", "prettyprint"]
+1 -1
View File
@@ -397,7 +397,7 @@ pub use arrow_ipc as ipc;
#[cfg(feature = "json")]
pub use arrow_json as json;
#[cfg(feature = "pyarrow")]
pub mod pyarrow;
pub use arrow_pyarrow as pyarrow;
/// Contains the `RecordBatch` type and associated traits
pub mod record_batch {