mirror of
https://github.com/openharmony/third_party_rust_os_str_bytes.git
synced 2026-06-30 22:08:37 -04:00
Initial commit
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
/target
|
||||
**/*.rs.bk
|
||||
Cargo.lock
|
||||
@@ -0,0 +1,5 @@
|
||||
Copyright (c) 2019 Dylan Iuzzolino
|
||||
|
||||
Licensed under the Apache License, Version 2.0 <LICENSE-APACHE> or the MIT
|
||||
license <LICENSE-MIT>, at your option. All files in this project may not be
|
||||
copied, modified, or distributed except according to those terms.
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "os_str_bytes"
|
||||
version = "0.1.0"
|
||||
authors = ["dylni"]
|
||||
edition = "2018"
|
||||
exclude = ["/.git*", "/target"]
|
||||
description = """
|
||||
Traits for converting between byte sequences and platform-native strings.
|
||||
"""
|
||||
repository = "https://github.com/dylni/os_str_bytes"
|
||||
readme = "README.md"
|
||||
keywords = ["osstr", "os_str", "osstring", "os_string", "bytes"]
|
||||
categories = ["command-line-interface", "development-tools::ffi", "os"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
getrandom = "0.1.13"
|
||||
+201
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Dylan Iuzzolino
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,35 @@
|
||||
# OsStr Bytes
|
||||
|
||||
Traits for converting between byte sequences and platform-native strings.
|
||||
|
||||
This crate allows interacting with the bytes stored internally by [`OsStr`] and
|
||||
[`OsString`], without resorting to panics or data corruption for invalid UTF-8.
|
||||
Thus, methods can be used that are already defined on [`[u8]`][slice] and
|
||||
[`Vec<u8>`].
|
||||
|
||||
Typically, the only way to losslessly construct [`OsStr`] or [`OsString`] from
|
||||
a byte sequence is to use `OsString::from(String::from(bytes).unwrap())`, which
|
||||
requires the bytes to be valid in UTF-8. However, since this crate makes
|
||||
conversions directly between the platform encoding and raw bytes, even some
|
||||
strings invalid in UTF-8 can be converted.
|
||||
|
||||
## Usage
|
||||
|
||||
Add the following lines to your "Cargo.toml" file:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
os_str_bytes = "0.1"
|
||||
```
|
||||
|
||||
See the [documentation] for available functionality and examples.
|
||||
|
||||
## Rust version support
|
||||
|
||||
The minimum supported Rust toolchain version is currently Rust 1.32.0.
|
||||
|
||||
[documentation]: https://docs.rs/os_str_bytes
|
||||
[slice]: https://doc.rust-lang.org/std/primitive.slice.html
|
||||
[`OsStr`]: https://doc.rust-lang.org/std/ffi/struct.OsStr.html
|
||||
[`OsString`]: https://doc.rust-lang.org/std/ffi/struct.OsString.html
|
||||
[`Vec<u8>`]: https://doc.rust-lang.org/std/vec/struct.Vec.html
|
||||
+435
@@ -0,0 +1,435 @@
|
||||
//! Traits for converting between byte sequences and platform-native strings.
|
||||
//!
|
||||
//! This crate allows interacting with the bytes stored internally by [`OsStr`]
|
||||
//! and [`OsString`], without resorting to panics or data corruption for
|
||||
//! invalid UTF-8. Thus, methods can be used that are already defined on
|
||||
//! [`[u8]`][slice] and [`Vec<u8>`].
|
||||
//!
|
||||
//! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`]
|
||||
//! from a byte sequence is to use `OsString::from(String::from(bytes)?)`,
|
||||
//! which requires the bytes to be valid in UTF-8. However, since this crate
|
||||
//! makes conversions directly between the platform encoding and raw bytes,
|
||||
//! even some strings invalid in UTF-8 can be converted.
|
||||
//!
|
||||
//! # Implementation
|
||||
//!
|
||||
//! All traits are [sealed], meaning that they can only be implemented by this
|
||||
//! crate. Otherwise, backwards compatibility would be more difficult to
|
||||
//! maintain for new features.
|
||||
//!
|
||||
//! # Complexity
|
||||
//!
|
||||
//! The time complexities of methods will vary based on what functionality is
|
||||
//! available for the platform. The most efficient implementation will be used,
|
||||
//! but it is important to use the most applicable method. For example,
|
||||
//! [`OsStringBytes::from_vec`] will be at least as efficient as
|
||||
//! [`OsStringBytes::from_bytes`], but the latter should be used when only a
|
||||
//! slice is available.
|
||||
//!
|
||||
//! # Safety
|
||||
//!
|
||||
//! Some unsafe assumptions are made, with the most egregious being that
|
||||
//! [`str::from_utf8_unchecked`] returns a partially usable string for invalid
|
||||
//! UTF-8. The alternative would be to encode and decode strings manually,
|
||||
//! which would be more dangerous, as it would create a reliance on how the
|
||||
//! standard library encodes invalid UTF-8 strings.
|
||||
//!
|
||||
//! To make this implementation less problematic, it is best to not make any
|
||||
//! assumptions about the representation of invalid UTF-8 bytes. However, given
|
||||
//! the purpose of this crate, every measure will be taken to ensure that it
|
||||
//! matches the raw byte sequence, meaning this is usually not a concern. Tests
|
||||
//! exist to validate that the conversions are sound.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```
|
||||
//! use std::env::temp_dir;
|
||||
//! use std::ffi::OsStr;
|
||||
//! use std::fs::read_to_string;
|
||||
//! use std::fs::write;
|
||||
//! # use std::io::Result;
|
||||
//!
|
||||
//! use os_str_bytes::OsStrBytes;
|
||||
//!
|
||||
//! # fn main() -> Result<()> {
|
||||
//! let string = "hello world";
|
||||
//! let file_name = b"\xC3\xA9os_str\xED\xA0\xBDbytes\xF0\x9F\x92\xA9.txt";
|
||||
//!
|
||||
//! let mut file = temp_dir();
|
||||
//! // In this example, conversion always succeeds, so `unwrap()` can be used.
|
||||
//! file.push(OsStr::from_bytes(file_name).unwrap());
|
||||
//!
|
||||
//! write(&file, string)?;
|
||||
//! assert_eq!(string, read_to_string(file)?);
|
||||
//! #
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed
|
||||
//! [slice]: https://doc.rust-lang.org/std/primitive.slice.html
|
||||
//! [`OsStr`]: https://doc.rust-lang.org/std/ffi/struct.OsStr.html
|
||||
//! [`OsString`]: https://doc.rust-lang.org/std/ffi/struct.OsString.html
|
||||
//! [`OsStringBytes::from_bytes`]: trait.OsStringBytes.html#tymethod.from_bytes
|
||||
//! [`OsStringBytes::from_vec`]: trait.OsStringBytes.html#tymethod.from_vec
|
||||
//! [`str::from_utf8_unchecked`]: https://doc.rust-lang.org/std/str/fn.from_utf8_unchecked.html
|
||||
//! [`Vec<u8>`]: https://doc.rust-lang.org/std/vec/struct.Vec.html
|
||||
|
||||
#![doc(html_root_url = "https://docs.rs/os_str_bytes/0.1.0")]
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::error::Error;
|
||||
use std::fmt::Display;
|
||||
use std::fmt::Formatter;
|
||||
use std::fmt::Result as FmtResult;
|
||||
|
||||
#[cfg(unix)]
|
||||
#[path = "unix.rs"]
|
||||
mod imp;
|
||||
#[cfg(windows)]
|
||||
#[path = "windows.rs"]
|
||||
mod imp;
|
||||
|
||||
/// The error that occurs when a byte sequence is not representable in the
|
||||
/// platform encoding.
|
||||
///
|
||||
/// On Unix, this error should never occur, but [`OsStrExt`] or [`OsStringExt`]
|
||||
/// should be used instead if that needs to be guaranteed.
|
||||
///
|
||||
/// [`OsStrExt`]: https://doc.rust-lang.org/std/os/unix/ffi/trait.OsStrExt.html
|
||||
/// [`OsStringExt`]: https://doc.rust-lang.org/std/os/unix/ffi/trait.OsStringExt.html
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct EncodingError(());
|
||||
|
||||
impl Display for EncodingError {
|
||||
fn fmt(&self, formatter: &mut Formatter<'_>) -> FmtResult {
|
||||
"byte sequence is not representable in the platform encoding"
|
||||
.fmt(formatter)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for EncodingError {}
|
||||
|
||||
/// A platform agnostic variant of [`OsStrExt`].
|
||||
///
|
||||
/// For more information, see [the module-level documentation][module].
|
||||
///
|
||||
/// [module]: index.html
|
||||
/// [`OsStrExt`]: https://doc.rust-lang.org/std/os/unix/ffi/trait.OsStrExt.html
|
||||
pub trait OsStrBytes: private::Sealed + ToOwned {
|
||||
/// Converts a byte slice into an equivalent platform-native string
|
||||
/// reference.
|
||||
///
|
||||
/// This method returns [`Cow<Self>`] to account for platform differences.
|
||||
/// However, no guarantee is made that the same variant of that enum will
|
||||
/// always be returned for the same platform. Whichever can be constructed
|
||||
/// most efficiently will be returned.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use std::ffi::OsStr;
|
||||
/// #
|
||||
/// # use os_str_bytes::EncodingError;
|
||||
/// use os_str_bytes::OsStrBytes;
|
||||
///
|
||||
/// # fn main() -> Result<(), EncodingError> {
|
||||
/// let string = b"foo\xED\xA0\xBDbar";
|
||||
/// assert_eq!(string.len(), OsStr::from_bytes(string)?.len());
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [`Cow<Self>`]: https://doc.rust-lang.org/std/borrow/enum.Cow.html
|
||||
fn from_bytes(string: &[u8]) -> Result<Cow<'_, Self>, EncodingError>;
|
||||
|
||||
/// The unsafe equivalent of [`from_bytes`].
|
||||
///
|
||||
/// More information is given in that method's documentation.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This method is unsafe, because it does not check that the bytes passed
|
||||
/// are representable in the platform encoding. If this constraint is
|
||||
/// violated, it may cause memory unsafety issues with future uses of this
|
||||
/// string, as the rest of the standard library assumes that [`OsStr`] and
|
||||
/// [`OsString`] will be usable for the platform. However, the most likely
|
||||
/// issue is that the data gets corrupted.
|
||||
///
|
||||
/// [`from_bytes`]: #tymethod.from_bytes
|
||||
/// [`OsStr`]: https://doc.rust-lang.org/std/ffi/struct.OsStr.html
|
||||
/// [`OsString`]: https://doc.rust-lang.org/std/ffi/struct.OsString.html
|
||||
unsafe fn from_bytes_unchecked(string: &[u8]) -> Cow<'_, Self>;
|
||||
|
||||
/// Converts the internal byte representation into a byte slice.
|
||||
///
|
||||
/// For more information, see [`from_bytes`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use std::ffi::OsStr;
|
||||
/// #
|
||||
/// # use os_str_bytes::EncodingError;
|
||||
/// use os_str_bytes::OsStrBytes;
|
||||
///
|
||||
/// # fn main() -> Result<(), EncodingError> {
|
||||
/// let string = b"foo\xED\xA0\xBDbar";
|
||||
/// let os_string = OsStr::from_bytes(string)?.into_owned();
|
||||
/// assert_eq!(string, os_string.to_bytes().as_ref());
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [`from_bytes`]: #tymethod.from_bytes
|
||||
fn to_bytes(&self) -> Cow<'_, [u8]>;
|
||||
}
|
||||
|
||||
/// A platform agnostic variant of [`OsStringExt`].
|
||||
///
|
||||
/// For more information, see [the module-level documentation][module].
|
||||
///
|
||||
/// [module]: index.html
|
||||
/// [`OsStringExt`]: https://doc.rust-lang.org/std/os/unix/ffi/trait.OsStringExt.html
|
||||
pub trait OsStringBytes: private::Sealed + Sized {
|
||||
/// Copies a byte slice into a new equivalent platform-native string.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use std::ffi::OsString;
|
||||
/// #
|
||||
/// # use os_str_bytes::EncodingError;
|
||||
/// use os_str_bytes::OsStringBytes;
|
||||
///
|
||||
/// # fn main() -> Result<(), EncodingError> {
|
||||
/// let string = b"foo\xED\xA0\xBDbar";
|
||||
/// assert_eq!(string.len(), OsString::from_bytes(string)?.len());
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
fn from_bytes<TString>(string: TString) -> Result<Self, EncodingError>
|
||||
where
|
||||
TString: AsRef<[u8]>;
|
||||
|
||||
/// The unsafe equivalent of [`from_bytes`].
|
||||
///
|
||||
/// More information is given in that method's documentation.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This method is unsafe for the same reason as
|
||||
/// [`OsStrBytes::from_bytes_unchecked`].
|
||||
///
|
||||
/// [`from_bytes`]: #tymethod.from_bytes
|
||||
/// [`OsStrBytes::from_bytes_unchecked`]: trait.OsStrBytes.html#tymethod.from_bytes_unchecked
|
||||
unsafe fn from_bytes_unchecked<TString>(string: TString) -> Self
|
||||
where
|
||||
TString: AsRef<[u8]>;
|
||||
|
||||
/// Converts a byte vector into an equivalent platform-native string.
|
||||
///
|
||||
/// Whenever possible, the conversion will be performed without copying.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use std::ffi::OsString;
|
||||
/// #
|
||||
/// # use os_str_bytes::EncodingError;
|
||||
/// use os_str_bytes::OsStringBytes;
|
||||
///
|
||||
/// # fn main() -> Result<(), EncodingError> {
|
||||
/// let string = b"foo\xED\xA0\xBDbar".to_vec();
|
||||
/// assert_eq!(string.len(), OsString::from_vec(string)?.len());
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
fn from_vec(string: Vec<u8>) -> Result<Self, EncodingError>;
|
||||
|
||||
/// The unsafe equivalent of [`from_vec`].
|
||||
///
|
||||
/// More information is given in that method's documentation.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This method is unsafe for the same reason as
|
||||
/// [`OsStrBytes::from_bytes_unchecked`].
|
||||
///
|
||||
/// [`from_vec`]: #tymethod.from_vec
|
||||
/// [`OsStrBytes::from_bytes_unchecked`]: trait.OsStrBytes.html#tymethod.from_bytes_unchecked
|
||||
unsafe fn from_vec_unchecked(string: Vec<u8>) -> Self;
|
||||
|
||||
/// Converts the internal byte representation into a byte vector.
|
||||
///
|
||||
/// Whenever possible, the conversion will be performed without copying.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use std::ffi::OsString;
|
||||
/// #
|
||||
/// # use os_str_bytes::EncodingError;
|
||||
/// use os_str_bytes::OsStringBytes;
|
||||
///
|
||||
/// # fn main() -> Result<(), EncodingError> {
|
||||
/// let string = b"foo\xED\xA0\xBDbar".to_vec();
|
||||
/// let os_string = OsString::from_vec(string.clone())?;
|
||||
/// assert_eq!(string, os_string.into_vec());
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
fn into_vec(self) -> Vec<u8>;
|
||||
}
|
||||
|
||||
mod private {
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::OsString;
|
||||
|
||||
pub trait Sealed {}
|
||||
impl Sealed for OsStr {}
|
||||
impl Sealed for OsString {}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::OsString;
|
||||
use std::str;
|
||||
|
||||
use getrandom::getrandom;
|
||||
use getrandom::Error as GetRandomError;
|
||||
|
||||
use crate::EncodingError;
|
||||
use crate::OsStrBytes;
|
||||
use crate::OsStringBytes;
|
||||
|
||||
const UTF8_STRING: &str = "string";
|
||||
|
||||
const WTF8_STRING: &[u8] = b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9bar";
|
||||
|
||||
pub(crate) const INVALID_STRING: &[u8] =
|
||||
b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
|
||||
|
||||
const RANDOM_BYTES_LENGTH: usize = 100;
|
||||
|
||||
#[inline]
|
||||
fn assert_os_eq<TRight>(left: &OsStr, right: Result<TRight, EncodingError>)
|
||||
where
|
||||
TRight: AsRef<OsStr>,
|
||||
{
|
||||
assert_eq!(Ok(left), right.as_ref().map(TRight::as_ref));
|
||||
}
|
||||
|
||||
fn random_os_string(
|
||||
buffer_length: usize,
|
||||
) -> Result<OsString, GetRandomError> {
|
||||
let mut buffer = vec![0; buffer_length];
|
||||
#[cfg(unix)]
|
||||
{
|
||||
getrandom(&mut buffer)?;
|
||||
Ok(::std::os::unix::ffi::OsStringExt::from_vec(buffer))
|
||||
}
|
||||
#[cfg(windows)]
|
||||
{
|
||||
// SAFETY: These bytes are random, so their values are arbitrary.
|
||||
getrandom(unsafe {
|
||||
::std::mem::transmute::<&mut [u16], &mut [u8]>(&mut buffer)
|
||||
})?;
|
||||
Ok(::std::os::windows::ffi::OsStringExt::from_wide(&buffer))
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_bytes() {
|
||||
assert_os_eq(&OsString::new(), OsStr::from_bytes(&[]));
|
||||
assert_os_eq(&OsString::new(), OsString::from_bytes([]));
|
||||
assert_eq!(
|
||||
// Assist type inference.
|
||||
&[b'\0'; 0],
|
||||
OsString::new().as_os_str().to_bytes().as_ref(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_vec() -> Result<(), EncodingError> {
|
||||
assert_eq!(0, OsString::from_vec(Vec::new())?.len());
|
||||
assert_eq!(Vec::<u8>::new(), OsString::new().into_vec());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_bytes() {
|
||||
let os_str = OsString::from(UTF8_STRING);
|
||||
let os_str = os_str.as_os_str();
|
||||
assert_os_eq(&os_str, OsStr::from_bytes(UTF8_STRING.as_bytes()));
|
||||
assert_os_eq(&os_str, OsString::from_bytes(UTF8_STRING));
|
||||
assert_eq!(UTF8_STRING.as_bytes(), os_str.to_bytes().as_ref());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_vec() {
|
||||
let os_string = OsString::from(UTF8_STRING);
|
||||
assert_os_eq(
|
||||
&os_string,
|
||||
OsString::from_vec(UTF8_STRING.to_string().into_bytes()),
|
||||
);
|
||||
assert_eq!(UTF8_STRING.to_string().into_bytes(), os_string.into_vec());
|
||||
}
|
||||
|
||||
fn test_string_is_invalid_utf8(string: &[u8]) {
|
||||
assert!(str::from_utf8(string).is_err());
|
||||
}
|
||||
|
||||
pub(crate) fn test_bytes(string: &[u8]) -> Result<(), EncodingError> {
|
||||
let os_string = OsStr::from_bytes(string)?;
|
||||
assert_eq!(string.len(), os_string.len());
|
||||
assert_os_eq(&os_string, OsString::from_bytes(string));
|
||||
assert_eq!(string, os_string.to_bytes().as_ref());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn test_vec(string: &[u8]) -> Result<(), EncodingError> {
|
||||
let os_string = OsString::from_vec(string.to_vec())?;
|
||||
assert_eq!(string.len(), os_string.len());
|
||||
assert_eq!(string, os_string.into_vec().as_slice());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_string_is_invalid() {
|
||||
test_string_is_invalid_utf8(INVALID_STRING);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wtf8_string_is_invalid_utf8() {
|
||||
test_string_is_invalid_utf8(WTF8_STRING);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wtf8_bytes() -> Result<(), EncodingError> {
|
||||
test_bytes(WTF8_STRING)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wtf8_vec() -> Result<(), EncodingError> {
|
||||
test_vec(WTF8_STRING)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_random_bytes() {
|
||||
let os_string = random_os_string(RANDOM_BYTES_LENGTH).unwrap();
|
||||
let string = os_string.to_bytes();
|
||||
assert_eq!(os_string.len(), string.len());
|
||||
assert_os_eq(&os_string, OsStr::from_bytes(&string));
|
||||
assert_os_eq(&os_string, OsString::from_bytes(string));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_random_vec() {
|
||||
let os_string = random_os_string(RANDOM_BYTES_LENGTH).unwrap();
|
||||
let string = os_string.clone().into_vec();
|
||||
assert_eq!(os_string.len(), string.len());
|
||||
assert_os_eq(&os_string, OsString::from_vec(string));
|
||||
}
|
||||
}
|
||||
+83
@@ -0,0 +1,83 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::OsString;
|
||||
|
||||
use crate::EncodingError;
|
||||
use crate::OsStrBytes;
|
||||
use crate::OsStringBytes;
|
||||
|
||||
#[inline]
|
||||
fn from_bytes(string: &[u8]) -> Cow<'_, OsStr> {
|
||||
Cow::Borrowed(::std::os::unix::ffi::OsStrExt::from_bytes(string))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_vec(string: Vec<u8>) -> OsString {
|
||||
::std::os::unix::ffi::OsStringExt::from_vec(string)
|
||||
}
|
||||
|
||||
impl OsStrBytes for OsStr {
|
||||
#[inline]
|
||||
fn from_bytes(string: &[u8]) -> Result<Cow<'_, Self>, EncodingError> {
|
||||
Ok(from_bytes(string))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn from_bytes_unchecked(string: &[u8]) -> Cow<'_, Self> {
|
||||
from_bytes(string)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_bytes(&self) -> Cow<'_, [u8]> {
|
||||
Cow::Borrowed(::std::os::unix::ffi::OsStrExt::as_bytes(self))
|
||||
}
|
||||
}
|
||||
|
||||
impl OsStringBytes for OsString {
|
||||
#[inline]
|
||||
fn from_bytes<TString>(string: TString) -> Result<Self, EncodingError>
|
||||
where
|
||||
TString: AsRef<[u8]>,
|
||||
{
|
||||
Ok(from_bytes(string.as_ref()).into_owned())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn from_bytes_unchecked<TString>(string: TString) -> Self
|
||||
where
|
||||
TString: AsRef<[u8]>,
|
||||
{
|
||||
from_bytes(string.as_ref()).into_owned()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_vec(string: Vec<u8>) -> Result<Self, EncodingError> {
|
||||
Ok(from_vec(string))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn from_vec_unchecked(string: Vec<u8>) -> Self {
|
||||
from_vec(string)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn into_vec(self) -> Vec<u8> {
|
||||
::std::os::unix::ffi::OsStringExt::into_vec(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::tests::*;
|
||||
use crate::EncodingError;
|
||||
|
||||
#[test]
|
||||
fn test_invalid_bytes() -> Result<(), EncodingError> {
|
||||
test_bytes(INVALID_STRING)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_vec() -> Result<(), EncodingError> {
|
||||
test_vec(INVALID_STRING)
|
||||
}
|
||||
}
|
||||
+138
@@ -0,0 +1,138 @@
|
||||
// These methods are necessarily inefficient, because they must revert encoding
|
||||
// conversions performed by the standard library. However, there is currently
|
||||
// no better alternative.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::char;
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::OsString;
|
||||
use std::mem::size_of;
|
||||
use std::str;
|
||||
|
||||
use crate::EncodingError;
|
||||
use crate::OsStrBytes;
|
||||
use crate::OsStringBytes;
|
||||
|
||||
fn from_bytes_unchecked(string: &[u8]) -> OsString {
|
||||
// https://github.com/rust-lang/rust/blob/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/sys_common/wtf8.rs#L813-L831
|
||||
|
||||
// SAFETY: This conversion technically causes undefined behavior when
|
||||
// [string] is not representable as UTF-8. However,
|
||||
// [str::next_code_point()] is not exposed; it is only available
|
||||
// through [Chars::next()]. This string will be dropped at the end of
|
||||
// this method.
|
||||
// https://github.com/rust-lang/rust/blob/4560ea788cb760f0a34127156c78e2552949f734/src/libcore/str/mod.rs#L500-L528
|
||||
let unchecked_string = unsafe {
|
||||
str::from_utf8_unchecked(string)
|
||||
};
|
||||
let mut encoded_chars = Vec::new();
|
||||
let mut buffer = [0; 2];
|
||||
for unchecked_char in unchecked_string.chars() {
|
||||
encoded_chars.extend(&*unchecked_char.encode_utf16(&mut buffer));
|
||||
}
|
||||
::std::os::windows::ffi::OsStringExt::from_wide(&encoded_chars)
|
||||
}
|
||||
|
||||
impl OsStrBytes for OsStr {
|
||||
#[inline]
|
||||
fn from_bytes(string: &[u8]) -> Result<Cow<'_, Self>, EncodingError> {
|
||||
Ok(Cow::Owned(OsString::from_bytes(string)?))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn from_bytes_unchecked(string: &[u8]) -> Cow<'_, Self> {
|
||||
Cow::Owned(OsString::from_bytes_unchecked(string))
|
||||
}
|
||||
|
||||
fn to_bytes(&self) -> Cow<'_, [u8]> {
|
||||
// https://github.com/rust-lang/rust/blob/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/sys_common/wtf8.rs#L183-L201
|
||||
|
||||
let mut string = Vec::with_capacity(self.len());
|
||||
let mut buffer = [0; size_of::<char>()];
|
||||
for ch in char::decode_utf16(
|
||||
::std::os::windows::ffi::OsStrExt::encode_wide(self),
|
||||
) {
|
||||
let unchecked_char = match ch {
|
||||
Ok(ch) => ch,
|
||||
Err(surrogate) => {
|
||||
let surrogate = surrogate.unpaired_surrogate().into();
|
||||
// SAFETY: This conversion creates an invalid [char] value.
|
||||
// However, there is otherwise no way to encode a [u32]
|
||||
// value as invalid UTF-8, which is why the standard
|
||||
// library uses the same approach:
|
||||
// https://github.com/rust-lang/rust/blob/4560ea788cb760f0a34127156c78e2552949f734/src/libstd/sys_common/wtf8.rs#L206-L208
|
||||
unsafe {
|
||||
char::from_u32_unchecked(surrogate)
|
||||
}
|
||||
},
|
||||
};
|
||||
string.extend_from_slice(
|
||||
unchecked_char.encode_utf8(&mut buffer).as_bytes(),
|
||||
);
|
||||
}
|
||||
Cow::Owned(string)
|
||||
}
|
||||
}
|
||||
|
||||
impl OsStringBytes for OsString {
|
||||
fn from_bytes<TString>(string: TString) -> Result<Self, EncodingError>
|
||||
where
|
||||
TString: AsRef<[u8]>,
|
||||
{
|
||||
let string = string.as_ref();
|
||||
let os_string = from_bytes_unchecked(string);
|
||||
if os_string.to_bytes() == string { Ok(os_string) }
|
||||
else { Err(EncodingError(())) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn from_bytes_unchecked<TString>(string: TString) -> Self
|
||||
where
|
||||
TString: AsRef<[u8]>,
|
||||
{
|
||||
from_bytes_unchecked(string.as_ref())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_vec(string: Vec<u8>) -> Result<Self, EncodingError> {
|
||||
Self::from_bytes(string)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn from_vec_unchecked(string: Vec<u8>) -> Self {
|
||||
Self::from_bytes_unchecked(string)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn into_vec(self) -> Vec<u8> {
|
||||
self.as_os_str().to_bytes().into_owned()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::OsString;
|
||||
|
||||
use crate::tests::*;
|
||||
use crate::EncodingError;
|
||||
use crate::OsStrBytes;
|
||||
use crate::OsStringBytes;
|
||||
|
||||
#[test]
|
||||
fn test_invalid_bytes() {
|
||||
assert_eq!(Err(EncodingError(())), OsStr::from_bytes(INVALID_STRING));
|
||||
assert_eq!(
|
||||
Err(EncodingError(())),
|
||||
OsString::from_bytes(INVALID_STRING),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_vec() {
|
||||
assert_eq!(
|
||||
Err(EncodingError(())),
|
||||
OsString::from_vec(INVALID_STRING.to_vec()),
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user