prepare v0.2.0 with code improvements

This commit is contained in:
Philipp Schuster 2023-04-11 10:47:06 +02:00
parent 547429a3dd
commit 1412663d2c
11 changed files with 158 additions and 120 deletions

View File

@ -9,6 +9,7 @@ insert_final_newline = true
indent_style = space
indent_size = 4
trim_trailing_whitespace = true
max_line_length = 80
[*.yml]
indent_size = 2

View File

@ -1,23 +1,22 @@
name: Build
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
on: [push, pull_request]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
runs-on: ${{ matrix.runs-on }}
strategy:
matrix:
runs-on:
- windows-latest
- ubuntu-latest
rust:
- stable
- nightly
- 1.52.1 # MSVR
- 1.60.0 # MSVR
steps:
- uses: actions/checkout@v2
# Important preparation step: override the latest default Rust version in GitHub CI
@ -28,21 +27,21 @@ jobs:
toolchain: ${{ matrix.rust }}
override: true
- name: Build
run: cargo build --all-targets --verbose --features all
run: cargo build --all-targets --verbose --features alloc
# use some arbitrary no_std target
- name: Install no_std target thumbv7em-none-eabihf
run: rustup target add thumbv7em-none-eabihf
- name: Build (no_std)
run: cargo build --verbose --target thumbv7em-none-eabihf --features all
run: cargo build --verbose --target thumbv7em-none-eabihf --features alloc
- name: Run tests
run: cargo test --verbose --features all
run: cargo test --verbose --features alloc
style_checks:
runs-on: ubuntu-latest
strategy:
matrix:
rust:
- stable
- 1.60.0
steps:
- uses: actions/checkout@v2
# Important preparation step: override the latest default Rust version in GitHub CI
@ -55,6 +54,6 @@ jobs:
- name: Rustfmt
run: cargo fmt -- --check
- name: Clippy
run: cargo clippy --features all
run: cargo clippy --features alloc
- name: Rustdoc
run: cargo doc --features all
run: cargo doc --no-deps --document-private-items --features alloc

6
CHANGELOG.md Normal file
View File

@ -0,0 +1,6 @@
# v0.2.0 (2023-04-11)
- MSRV is 1.60.0
- bitflags bump: 1.x -> 2.x
- few internal code improvements (less possible panics)
- `Mode::to_flags` now returns a Result
- Feature `all` was removed. Use `alloc` instead.

View File

@ -7,8 +7,8 @@ as GNU Longname. The maximum supported file name length is 100 characters includ
The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
collections of files.
"""
version = "0.1.8"
edition = "2018"
version = "0.2.0"
edition = "2021"
keywords = ["tar", "tarball", "archive"]
categories = ["data-structures", "no-std", "parser-implementations"]
readme = "README.md"
@ -23,12 +23,11 @@ resolver = "2"
[features]
default = []
alloc = []
all = ["alloc"]
[dependencies]
arrayvec = { version = "0.7", default-features = false }
log = { version = "0.4", default-features = false }
bitflags = "2.0"
log = { version = "0.4", default-features = false }
[dev-dependencies]
env_logger = "0.10"

View File

@ -39,7 +39,7 @@ fn main() {
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8") );
println!("{:#?}", entries[2].data_as_str().expect("Should be valid UTF-8"));
}
```

View File

@ -1,11 +1,11 @@
#!/usr/bin/bash
#!/usr/bin/env bash
cargo build --all-targets --verbose --features all
cargo build --all-targets --verbose --features alloc
# use some random no_std target
rustup target add thumbv7em-none-eabihf
cargo build --verbose --target thumbv7em-none-eabihf --features all
cargo test --verbose --features all
cargo build --verbose --target thumbv7em-none-eabihf --features alloc
cargo test --verbose --features alloc
cargo fmt -- --check
cargo clippy --features all
cargo doc --features all
cargo +1.60.0 clippy --features alloc
cargo +1.60.0 doc --no-deps --document-private-items --features alloc

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -37,5 +37,8 @@ fn main() {
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8"));
println!(
"{:#?}",
entries[2].data_as_str().expect("Should be valid UTF-8")
);
}

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -35,5 +35,8 @@ fn main() {
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8"));
println!(
"{:#?}",
entries[2].data_as_str().expect("Should be valid UTF-8")
);
}

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -25,24 +25,25 @@ SOFTWARE.
//! also exports `TarArchive`, which owns data on the heap.
use crate::header::PosixHeader;
use crate::{TypeFlag, BLOCKSIZE};
use crate::{TypeFlag, BLOCKSIZE, FILENAME_MAX_LEN};
#[cfg(feature = "alloc")]
use alloc::boxed::Box;
use arrayvec::ArrayString;
use core::fmt::{Debug, Formatter};
use core::str::{FromStr, Utf8Error};
use log::warn;
/// Describes an entry in an archive.
/// Currently only supports files but no directories.
pub struct ArchiveEntry<'a> {
filename: ArrayString<100>,
filename: ArrayString<FILENAME_MAX_LEN>,
data: &'a [u8],
size: usize,
}
#[allow(unused)]
impl<'a> ArchiveEntry<'a> {
pub const fn new(filename: ArrayString<100>, data: &'a [u8]) -> Self {
const fn new(filename: ArrayString<FILENAME_MAX_LEN>, data: &'a [u8]) -> Self {
ArchiveEntry {
filename,
data,
@ -50,8 +51,9 @@ impl<'a> ArchiveEntry<'a> {
}
}
/// Filename of the entry. Max 99 characters.
pub const fn filename(&self) -> ArrayString<100> {
/// Filename of the entry with a maximum of 100 characters (including the
/// terminating NULL-byte).
pub const fn filename(&self) -> ArrayString<{ FILENAME_MAX_LEN }> {
self.filename
}
@ -122,13 +124,6 @@ impl From<Box<[u8]>> for TarArchive {
}
}
/*#[cfg(feature = "alloc")]
impl Into<Box<[u8]>> for TarArchive {
fn into(self) -> Box<[u8]> {
self.data
}
}*/
#[cfg(feature = "alloc")]
impl From<TarArchive> for Box<[u8]> {
fn from(ar: TarArchive) -> Self {
@ -137,7 +132,7 @@ impl From<TarArchive> for Box<[u8]> {
}
/// Wrapper type around bytes, which represents a Tar archive.
/// Unlike [`TarArchive`], this uses only a reference to data.
/// Unlike [`TarArchive`], this uses only a reference to the data.
#[derive(Debug)]
pub struct TarArchiveRef<'a> {
data: &'a [u8],
@ -193,7 +188,7 @@ impl<'a> Iterator for ArchiveIterator<'a> {
fn next(&mut self) -> Option<Self::Item> {
if self.block_index * BLOCKSIZE >= self.archive_data.len() {
log::warn!("Reached end of Tar archive data without finding zero/end blocks!");
warn!("Reached end of Tar archive data without finding zero/end blocks!");
return None;
}
@ -221,28 +216,40 @@ impl<'a> Iterator for ArchiveIterator<'a> {
}
if hdr.name.is_empty() {
log::warn!("Found empty file name",);
warn!("Found empty file name",);
}
// fetch data of file from next block(s)
let data_block_count = hdr.payload_block_count();
let hdr_size = hdr.size.val();
if let Err(e) = hdr_size {
warn!("Can't parse the file size from the header block. Stop iterating Tar archive. {e:#?}");
return None;
}
let hdr_size = hdr_size.unwrap();
// Fetch data of file from next block(s).
// .unwrap() is fine as we checked that hdr.size().val() is valid
// above
let data_block_count = hdr.payload_block_count().unwrap();
// +1: skip hdr block itself and start at data!
// i_begin is the byte begin index of this file in the array of the whole archive
let i_begin = (self.block_index + 1) * BLOCKSIZE;
// i_end is the exclusive byte end index of the data of the current file
let i_end = i_begin + data_block_count * BLOCKSIZE;
let file_block_bytes = &self.archive_data[i_begin..i_end];
// because each block is 512 bytes long, the file is not necessarily a multiple of 512 bytes
let file_bytes = &file_block_bytes[0..hdr.size.val()];
// Each block is 512 bytes long, but the file size is not necessarily a
// multiple of 512.
let file_bytes = &file_block_bytes[0..hdr_size];
// in next iteration: start at next Archive entry header
// +1 for current hdr block itself + all data blocks
self.block_index += data_block_count + 1;
Some(ArchiveEntry::new(
ArrayString::from_str(hdr.name.as_string().as_str()).unwrap(),
file_bytes,
))
let filename = ArrayString::from_str(hdr.name.as_string().as_str());
// .unwrap is fine as the capacity is MUST be ok.
let filename = filename.unwrap();
Some(ArchiveEntry::new(filename, file_bytes))
}
}
@ -258,24 +265,9 @@ mod tests {
println!("{:#?}", entries);
}
/// Tests to read the entries from existing archives in various Tar flavors.
#[test]
fn test_archive_entries() {
#[cfg(feature = "alloc")]
{
let data = include_bytes!("../tests/gnu_tar_default.tar")
.to_vec()
.into_boxed_slice();
let archive = TarArchive::new(data.clone());
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
let archive = TarArchive::from(data.clone());
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
assert_eq!(data, archive.into());
}
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar"));
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
@ -307,30 +299,52 @@ mod tests {
assert_archive_content(&entries);
}
/// Like [`test_archive_entries`] but with additional `alloc` functionality.
#[cfg(feature = "alloc")]
#[test]
fn test_archive_entries_alloc() {
let data = include_bytes!("../tests/gnu_tar_default.tar")
.to_vec()
.into_boxed_slice();
let archive = TarArchive::new(data.clone());
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
// Test that the archive can be transformed into owned heap data.
assert_eq!(data, archive.into());
}
/// Tests that the parsed archive matches the expected order. The tarballs
/// the tests directory were created once by me with files in the order
/// specified in this test.
fn assert_archive_content(entries: &[ArchiveEntry]) {
assert_eq!(entries.len(), 3);
// order in that I stored the files into the archive
assert_eq!(entries[0].filename().as_str(), "bye_world_513b.txt");
assert_eq!(entries[0].size(), 513);
assert_eq!(entries[0].data().len(), 513);
assert_eq!(
entries[0].data_as_str().expect("Invalid UTF-8"),
include_str!("../tests/bye_world_513b.txt")
entries[0].data_as_str().expect("Should be valid UTF-8"),
// .replace: Ensure that the test also works on Windows
include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
);
// Test that an entry that needs two 512 byte data blocks is read
// properly.
assert_eq!(entries[1].filename().as_str(), "hello_world_513b.txt");
assert_eq!(entries[1].size(), 513);
assert_eq!(entries[1].data().len(), 513);
assert_eq!(
entries[1].data_as_str().expect("Invalid UTF-8"),
include_str!("../tests/hello_world_513b.txt")
entries[1].data_as_str().expect("Should be valid UTF-8"),
// .replace: Ensure that the test also works on Windows
include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
);
assert_eq!(entries[2].filename().as_str(), "hello_world.txt");
assert_eq!(entries[2].size(), 12);
assert_eq!(entries[2].data().len(), 12);
assert_eq!(
entries[2].data_as_str().expect("Invalid UTF-8"),
entries[2].data_as_str().expect("Should be valid UTF-8"),
"Hello World\n",
"file content must match"
);

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -30,9 +30,10 @@ SOFTWARE.
#![allow(non_upper_case_globals)]
use crate::BLOCKSIZE;
use crate::{BLOCKSIZE, FILENAME_MAX_LEN};
use arrayvec::ArrayString;
use core::fmt::{Debug, Formatter};
use core::num::ParseIntError;
/// The file size is encoded as octal ASCII number inside a Tar header.
#[derive(Copy, Clone)]
@ -41,8 +42,8 @@ pub struct Size(StaticCString<12>);
impl Size {
/// Returns the octal ASCII number as actual size in bytes.
pub fn val(&self) -> usize {
usize::from_str_radix(self.0.as_string().as_str(), 8).unwrap()
pub fn val(&self) -> Result<usize, ParseIntError> {
usize::from_str_radix(self.0.as_string().as_str(), 8)
}
}
@ -54,16 +55,24 @@ impl Debug for Size {
}
}
#[derive(Debug)]
pub enum ModeError {
ParseInt(ParseIntError),
IllegalMode,
}
/// Wrapper around the UNIX file permissions given in octal ASCII.
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct Mode(StaticCString<8>);
impl Mode {
pub fn to_flags(self) -> ModeFlags {
/// Parses the [`ModeFlags`] from the mode string.
pub fn to_flags(self) -> Result<ModeFlags, ModeError> {
let octal_number_str = self.0.as_string();
let bits = u64::from_str_radix(octal_number_str.as_str(), 8).unwrap();
ModeFlags::from_bits(bits).unwrap()
let bits =
u64::from_str_radix(octal_number_str.as_str(), 8).map_err(ModeError::ParseInt)?;
ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode)
}
}
@ -75,8 +84,11 @@ impl Debug for Mode {
}
}
/// A C-String that is stored in a static array. All unused
/// chars must be a NULL-byte.
/// A C-String that is stored in a static array. There is always a terminating
/// NULL-byte.
///
/// The content is likely to be UTF-8/ASCII, but that is not verified by this
/// type.
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct StaticCString<const N: usize>([u8; N]);
@ -99,19 +111,16 @@ impl<const N: usize> StaticCString<N> {
self.len() == 0
}
/// Returns a string without null bytes.
/// Returns a string that includes all characters until the first null.
pub fn as_string(&self) -> ArrayString<N> {
let mut string = ArrayString::new();
// copy all bytes (=ASCII) into string
self.0
.clone()
.iter()
// remove all zero bytes; there is always one
// zero byte at the end. Furtherore, the other
// unused bytes are also zero, but not part of the
// string.
.filter(|x| **x != 0)
.for_each(|b| string.push(*b as char));
.copied()
// Take all chars until the terminating null.
.take_while(|byte| *byte != 0)
.for_each(|byte| string.push(byte as char));
string
}
}
@ -145,7 +154,7 @@ impl<const N: usize> Debug for StaticCString<N> {
pub struct PosixHeader {
/// Name. There is always a null byte, therefore
/// the max len is 99.
pub name: StaticCString<100>,
pub name: StaticCString<{ FILENAME_MAX_LEN }>,
pub mode: Mode,
pub uid: [u8; 8],
pub gid: [u8; 8],
@ -156,7 +165,7 @@ pub struct PosixHeader {
pub typeflag: TypeFlag,
/// Name. There is always a null byte, therefore
/// the max len is 99.
pub linkname: StaticCString<100>,
pub linkname: StaticCString<{ FILENAME_MAX_LEN }>,
pub magic: StaticCString<6>,
pub version: StaticCString<2>,
/// Username. There is always a null byte, therefore
@ -175,16 +184,14 @@ pub struct PosixHeader {
}
impl PosixHeader {
/// Returns the number of blocks that are required to
/// read the whole file content.
pub fn payload_block_count(&self) -> usize {
let div = self.size.val() / BLOCKSIZE;
let modulo = self.size.val() % BLOCKSIZE;
if modulo > 0 {
(div + 1) as usize
} else {
div as usize
}
/// Returns the number of blocks that are required to read the whole file
/// content. Returns an error, if the file size can't be parsed from the
/// header.
pub fn payload_block_count(&self) -> Result<usize, ParseIntError> {
let div = self.size.val()? / BLOCKSIZE;
let modulo = self.size.val()? % BLOCKSIZE;
let block_count = if modulo > 0 { div + 1 } else { div };
Ok(block_count)
}
/// A Tar archive is terminated, if a end-of-archive entry, which consists of two 512 blocks
@ -199,7 +206,7 @@ impl PosixHeader {
/// Describes the kind of payload, that follows after a
/// [`PosixHeader`]. The properties of this payload are
/// described inside the header.
#[derive(Debug, Copy, Clone, PartialEq)]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u8)]
#[allow(unused)]
pub enum TypeFlag {
@ -252,7 +259,7 @@ pub enum TypeFlag {
}
bitflags::bitflags! {
/// UNIX file permissions on octal format.
/// UNIX file permissions in octal format.
#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ModeFlags: u64 {

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -21,23 +21,27 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations.
//! If you have a standard environment and need full feature support, I recommend the use of
//! <https://crates.io/crates/tar> instead.
//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero
//! allocations. If you have a standard environment and need full feature
//! support, I recommend the use of <https://crates.io/crates/tar> instead.
//!
//! The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
//! as GNU Longname. The maximum supported file name length is 100 characters including the NULL-byte.
//! The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
//! collections of files.
//! The crate is simple and only supports reading of "basic" archives, therefore
//! no extensions, such as GNU Longname. The maximum supported file name length
//! is 100 characters including the NULL-byte. The maximum supported file size
//! is 8 GiB. Also, directories are not supported yet but only flat collections
//! of files.
//!
//! This library is useful, if you write a kernel or a similar low-level application, which needs
//! "a bunch of files" from an archive ("init ram disk"). The Tar file could for example come
//! as a Multiboot2 boot module provided by the bootloader.
//! This library is useful, if you write a kernel or a similar low-level
//! application, which needs "a bunch of files" from an archive ("init ram
//! disk"). The Tar file could for example come as a Multiboot2 boot module
//! provided by the bootloader.
//!
//! This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**.
//! GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet.
//! [This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible
//! archive formats and their limitations.
//! This crate focuses on extracting files from uncompressed Tar archives
//! created with default options by **GNU Tar**. GNU Extensions such as sparse
//! files, incremental archives, and long filename extension are not supported
//! yet. [gnu.org](https://www.gnu.org/software/tar/manual/html_section/Formats.html)
//! provides a good overview over possible archive formats and their
//! limitations.
//!
//! # Example
//! ```rust
@ -85,6 +89,8 @@ extern crate alloc;
/// Each Archive Entry (either Header or Data Block) is a block of 512 bytes.
const BLOCKSIZE: usize = 512;
/// Maximum filename length of the base Tar format including the terminating NULL-byte.
const FILENAME_MAX_LEN: usize = 100;
mod archive;
mod header;