From da12b748dca6f8e0a83078d557a92c0240351913 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 11 Oct 2021 15:03:55 +0200 Subject: [PATCH] 'alloc'-feature and owning tar archive type (v0.1.5) --- .github/workflows/rust.yml | 12 ++-- Cargo.toml | 7 ++- README.md | 12 +++- build.sh | 11 ++++ examples/alloc_feature.rs | 41 +++++++++++++ examples/minimal.rs | 6 +- src/archive.rs | 117 ++++++++++++++++++++++++++++++------- src/lib.rs | 7 ++- 8 files changed, 178 insertions(+), 35 deletions(-) create mode 100755 build.sh create mode 100644 examples/alloc_feature.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 651495d..92a18ae 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -21,14 +21,14 @@ jobs: steps: - uses: actions/checkout@v2 - name: Build - run: cargo build --verbose - # use some no_std target + run: cargo build --all-targets --verbose --features all + # use some arbitrary no_std target - name: Install no_std target thumbv7em-none-eabihf run: rustup target add thumbv7em-none-eabihf - name: Build (no_std) - run: cargo build --verbose --target thumbv7em-none-eabihf + run: cargo build --verbose --target thumbv7em-none-eabihf --features all - name: Run tests - run: cargo test --verbose + run: cargo test --verbose --features all style_checks: runs-on: ubuntu-latest @@ -43,6 +43,6 @@ jobs: - name: Rustfmt run: cargo fmt -- --check - name: Clippy - run: cargo clippy + run: cargo clippy --features all - name: Rustdoc - run: cargo doc + run: cargo doc --features all diff --git a/Cargo.toml b/Cargo.toml index 374f292..b72fc18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ as GNU Longname. The maximum supported file name length is 100 characters includ The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat collections of files. """ -version = "0.1.4" +version = "0.1.5" edition = "2018" keywords = ["tar", "tarball", "archive"] categories = ["data-structures", "no-std", "parser-implementations"] @@ -20,6 +20,11 @@ documentation = "https://docs.rs/tar-no-std" # required because "env_logger" uses "log" but with dependency to std.. resolver = "2" +[features] +default = [] +alloc = [] +all = ["alloc"] + [dependencies] bitflags = "1.3" arrayvec = { version = "0.7", default-features = false } diff --git a/README.md b/README.md index b07390a..3fde65f 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,10 @@ GNU Extensions such as sparse files, incremental archives, and long filename ext [This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible archive formats and their limitations. -## Example +## Example (without `alloc`-feature) ```rust +use tar_no_std::TarArchiveRef; + fn main() { // log: not mandatory std::env::set_var("RUST_LOG", "trace"); @@ -28,7 +30,7 @@ fn main() { // also works in no_std environment (except the println!, of course) let archive = include_bytes!("../tests/gnu_tar_default.tar"); - let archive = TarArchive::new(archive); + let archive = TarArchiveRef::new(archive); // Vec needs an allocator of course, but the library itself doesn't need one let entries = archive.entries().collect::>(); println!("{:#?}", entries); @@ -37,7 +39,11 @@ fn main() { } ``` -## Compression +## Alloc Feature +This crate allows the additional Cargo build time feature `alloc`. When this is used, the crate +also provides the type `TarArchive`, which owns the data on the heap. + +## Compression (`tar.gz`) If your tar file is compressed, e.g. by `.tar.gz`/`gzip`, you need to uncompress the bytes first (e.g. by a *gzip* library). Afterwards, this crate can read and write the Tar archive format from the bytes. diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..c0e0b6d --- /dev/null +++ b/build.sh @@ -0,0 +1,11 @@ +#!/usr/bin/bash + +cargo build --all-targets --verbose --features all +# use some random no_std target +rustup target add thumbv7em-none-eabihf +cargo build --verbose --target thumbv7em-none-eabihf --features all +cargo test --verbose --features all + +cargo fmt -- --check +cargo clippy --features all +cargo doc --features all diff --git a/examples/alloc_feature.rs b/examples/alloc_feature.rs new file mode 100644 index 0000000..5cefb4a --- /dev/null +++ b/examples/alloc_feature.rs @@ -0,0 +1,41 @@ +/* +MIT License + +Copyright (c) 2021 Philipp Schuster + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ +use tar_no_std::TarArchive; + +/// This example needs the `alloc` feature. +fn main() { + // log: not mandatory + std::env::set_var("RUST_LOG", "trace"); + env_logger::init(); + + // also works in no_std environment (except the println!, of course) + let archive = include_bytes!("../tests/gnu_tar_default.tar"); + let archive_heap_owned = archive.to_vec().into_boxed_slice(); + let archive = TarArchive::new(archive_heap_owned); + // Vec needs an allocator of course, but the library itself doesn't need one + let entries = archive.entries().collect::>(); + println!("{:#?}", entries); + println!("content of last file:"); + println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8")); +} diff --git a/examples/minimal.rs b/examples/minimal.rs index d77fc0c..40e2547 100644 --- a/examples/minimal.rs +++ b/examples/minimal.rs @@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -use tar_no_std::TarArchive; +use tar_no_std::TarArchiveRef; fn main() { // log: not mandatory @@ -30,10 +30,10 @@ fn main() { // also works in no_std environment (except the println!, of course) let archive = include_bytes!("../tests/gnu_tar_default.tar"); - let archive = TarArchive::new(archive); + let archive = TarArchiveRef::new(archive); // Vec needs an allocator of course, but the library itself doesn't need one let entries = archive.entries().collect::>(); println!("{:#?}", entries); println!("content of last file:"); - println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8") ); + println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8")); } diff --git a/src/archive.rs b/src/archive.rs index e288bca..1fccb2d 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -21,10 +21,13 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -//! Module for [`TarArchive`]. +//! Module for [`TarArchiveRef`]. If the `alloc`-feature is enabled, this crate +//! also exports `TarArchive`, which owns data on the heap. use crate::header::PosixHeader; use crate::{TypeFlag, BLOCKSIZE}; +#[cfg(feature = "alloc")] +use alloc::boxed::Box; use arrayvec::ArrayString; use core::fmt::{Debug, Formatter}; use core::str::{FromStr, Utf8Error}; @@ -78,15 +81,72 @@ impl<'a> Debug for ArchiveEntry<'a> { } } -/// Wrapper type around the bytes, which represents an archive. +/// Type that owns bytes on the heap, that represents a Tar archive. +/// Unlike [`TarArchive`], this type is useful, if you need to own the +/// data as long as you need the archive, but not longer. +/// +/// This is only available with the `alloc` feature of this crate. +#[cfg(feature = "alloc")] #[derive(Debug)] -pub struct TarArchive<'a> { +pub struct TarArchive { + data: Box<[u8]>, +} + +#[cfg(feature = "alloc")] +impl TarArchive { + /// Creates a new archive type, that owns the data on the heap. The provided byte array is + /// interpreted as bytes in Tar archive format. + pub fn new(data: Box<[u8]>) -> Self { + assert_eq!( + data.len() % BLOCKSIZE, + 0, + "data must be a multiple of BLOCKSIZE={}, len is {}", + BLOCKSIZE, + data.len(), + ); + Self { data } + } + + /// Iterates over all entries of the Tar archive. + /// Returns items of type [`ArchiveEntry`]. + /// See also [`ArchiveIterator`]. + pub fn entries(&self) -> ArchiveIterator { + ArchiveIterator::new(self.data.as_ref()) + } +} + +#[cfg(feature = "alloc")] +impl From> for TarArchive { + fn from(data: Box<[u8]>) -> Self { + Self::new(data) + } +} + +/*#[cfg(feature = "alloc")] +impl Into> for TarArchive { + fn into(self) -> Box<[u8]> { + self.data + } +}*/ + +#[cfg(feature = "alloc")] +impl From for Box<[u8]> { + fn from(ar: TarArchive) -> Self { + ar.data + } +} + +/// Wrapper type around bytes, which represents a Tar archive. +/// Unlike [`TarArchiveRef`], this uses only a reference to data. +#[derive(Debug)] +pub struct TarArchiveRef<'a> { data: &'a [u8], } #[allow(unused)] -impl<'a> TarArchive<'a> { - /// Interprets the provided byte array as Tar archive. +impl<'a> TarArchiveRef<'a> { + /// Creates a new archive wrapper type. The provided byte array is interpreted as + /// bytes in Tar archive format. pub fn new(data: &'a [u8]) -> Self { assert_eq!( data.len() % BLOCKSIZE, @@ -97,32 +157,33 @@ impl<'a> TarArchive<'a> { Self { data } } - /// Iterates over all entries of the TAR Archive. + /// Iterates over all entries of the Tar archive. /// Returns items of type [`ArchiveEntry`]. + /// See also [`ArchiveIterator`]. pub const fn entries(&self) -> ArchiveIterator { - ArchiveIterator::new(self) + ArchiveIterator::new(self.data) } } -/// Iterator over the files. Each iteration step starts +/// Iterator over the files of the archive. Each iteration starts /// at the next Tar header entry. #[derive(Debug)] pub struct ArchiveIterator<'a> { - archive: &'a TarArchive<'a>, + archive_data: &'a [u8], block_index: usize, } impl<'a> ArchiveIterator<'a> { - pub const fn new(archive: &'a TarArchive<'a>) -> Self { + pub const fn new(archive: &'a [u8]) -> Self { Self { - archive, + archive_data: archive, block_index: 0, } } /// Returns a reference to the next Header. fn next_hdr(&self, block_index: usize) -> &'a PosixHeader { - let hdr_ptr = &self.archive.data[block_index * BLOCKSIZE]; + let hdr_ptr = &self.archive_data[block_index * BLOCKSIZE]; unsafe { (hdr_ptr as *const u8).cast::().as_ref() }.unwrap() } } @@ -131,7 +192,7 @@ impl<'a> Iterator for ArchiveIterator<'a> { type Item = ArchiveEntry<'a>; fn next(&mut self) -> Option { - if self.block_index * BLOCKSIZE >= self.archive.data.len() { + if self.block_index * BLOCKSIZE >= self.archive_data.len() { log::warn!("Reached end of Tar archive data without finding zero/end blocks!"); return None; } @@ -170,7 +231,7 @@ impl<'a> Iterator for ArchiveIterator<'a> { let i_begin = (self.block_index + 1) * BLOCKSIZE; // i_end is the exclusive byte end index of the data of the current file let i_end = i_begin + data_block_count * BLOCKSIZE; - let file_block_bytes = &self.archive.data[i_begin..i_end]; + let file_block_bytes = &self.archive_data[i_begin..i_end]; // because each block is 512 bytes long, the file is not necessarily a multiple of 512 bytes let file_bytes = &file_block_bytes[0..hdr.size.val()]; @@ -192,22 +253,38 @@ mod tests { #[test] fn test_archive_list() { - let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar")); + let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")); let entries = archive.entries().collect::>(); println!("{:#?}", entries); } #[test] fn test_archive_entries() { - let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar")); + #[cfg(feature = "alloc")] + { + let data = include_bytes!("../tests/gnu_tar_default.tar") + .to_vec() + .into_boxed_slice(); + let archive = TarArchive::new(data.clone()); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries); + + let archive = TarArchive::from(data.clone()); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries); + + assert_eq!(data, archive.into()); + } + + let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar")); let entries = archive.entries().collect::>(); assert_archive_content(&entries); - let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_gnu.tar")); + let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_gnu.tar")); let entries = archive.entries().collect::>(); assert_archive_content(&entries); - let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_oldgnu.tar")); + let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_oldgnu.tar")); let entries = archive.entries().collect::>(); assert_archive_content(&entries); @@ -221,11 +298,11 @@ mod tests { let entries = archive.entries().collect::>(); assert_archive_content(&entries);*/ - let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_ustar.tar")); + let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar.tar")); let entries = archive.entries().collect::>(); assert_archive_content(&entries); - let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_v7.tar")); + let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_v7.tar")); let entries = archive.entries().collect::>(); assert_archive_content(&entries); } diff --git a/src/lib.rs b/src/lib.rs index 327e2d7..dabe32d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,7 +41,7 @@ SOFTWARE. //! //! # Example //! ```rust -//! use tar_no_std::TarArchive; +//! use tar_no_std::TarArchiveRef; //! //! // log: not mandatory //! std::env::set_var("RUST_LOG", "trace"); @@ -49,7 +49,7 @@ SOFTWARE. //! //! // also works in no_std environment (except the println!, of course) //! let archive = include_bytes!("../tests/gnu_tar_default.tar"); -//! let archive = TarArchive::new(archive); +//! let archive = TarArchiveRef::new(archive); //! // Vec needs an allocator of course, but the library itself doesn't need one //! let entries = archive.entries().collect::>(); //! println!("{:#?}", entries); @@ -69,6 +69,9 @@ SOFTWARE. #[cfg(test)] extern crate std; +#[cfg(feature = "alloc")] +extern crate alloc; + /// Each Archive Entry (either Header or Data Block) is a block of 512 bytes. const BLOCKSIZE: usize = 512;