Compare commits

..

1 Commits

Author SHA1 Message Date
2ab6dc58e5 remove rustdoc from lib 2023-10-24 16:05:06 +08:00
12 changed files with 128 additions and 181 deletions

View File

@ -9,7 +9,6 @@ insert_final_newline = true
indent_style = space
indent_size = 4
trim_trailing_whitespace = true
max_line_length = 80
[*.yml]
indent_size = 2

View File

@ -1,12 +0,0 @@
name: QA
on: [ push, pull_request ]
jobs:
spellcheck:
name: Spellcheck
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
# Executes "typos ."
- uses: crate-ci/typos@v1.13.20

View File

@ -1,22 +1,23 @@
name: Build
on: [push, pull_request]
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ${{ matrix.runs-on }}
runs-on: ubuntu-latest
strategy:
matrix:
runs-on:
- windows-latest
- ubuntu-latest
rust:
- stable
- nightly
- 1.60.0 # MSVR
- 1.52.1 # MSVR
steps:
- uses: actions/checkout@v2
# Important preparation step: override the latest default Rust version in GitHub CI
@ -27,21 +28,21 @@ jobs:
toolchain: ${{ matrix.rust }}
override: true
- name: Build
run: cargo build --all-targets --verbose --features alloc
run: cargo build --all-targets --verbose --features all
# use some arbitrary no_std target
- name: Install no_std target thumbv7em-none-eabihf
run: rustup target add thumbv7em-none-eabihf
- name: Build (no_std)
run: cargo build --verbose --target thumbv7em-none-eabihf --features alloc
run: cargo build --verbose --target thumbv7em-none-eabihf --features all
- name: Run tests
run: cargo test --verbose --features alloc
run: cargo test --verbose --features all
style_checks:
runs-on: ubuntu-latest
strategy:
matrix:
rust:
- 1.60.0
- stable
steps:
- uses: actions/checkout@v2
# Important preparation step: override the latest default Rust version in GitHub CI
@ -54,6 +55,6 @@ jobs:
- name: Rustfmt
run: cargo fmt -- --check
- name: Clippy
run: cargo clippy --features alloc
run: cargo clippy --features all
- name: Rustdoc
run: cargo doc --no-deps --document-private-items --features alloc
run: cargo doc --features all

View File

@ -1,6 +0,0 @@
# v0.2.0 (2023-04-11)
- MSRV is 1.60.0
- bitflags bump: 1.x -> 2.x
- few internal code improvements (less possible panics)
- `Mode::to_flags` now returns a Result
- Feature `all` was removed. Use `alloc` instead.

View File

@ -7,8 +7,8 @@ as GNU Longname. The maximum supported file name length is 100 characters includ
The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
collections of files.
"""
version = "0.2.0"
edition = "2021"
version = "0.1.8"
edition = "2018"
keywords = ["tar", "tarball", "archive"]
categories = ["data-structures", "no-std", "parser-implementations"]
readme = "README.md"
@ -23,11 +23,12 @@ resolver = "2"
[features]
default = []
alloc = []
all = ["alloc"]
[dependencies]
bitflags = "1.3"
arrayvec = { version = "0.7", default-features = false }
bitflags = "2.0"
log = { version = "0.4", default-features = false }
[dev-dependencies]
env_logger = "0.10"
env_logger = "0.9"

View File

@ -39,7 +39,7 @@ fn main() {
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
println!("{:#?}", entries[2].data_as_str().expect("Should be valid UTF-8"));
println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8") );
}
```

View File

@ -1,11 +1,11 @@
#!/usr/bin/env bash
#!/usr/bin/bash
cargo build --all-targets --verbose --features alloc
cargo build --all-targets --verbose --features all
# use some random no_std target
rustup target add thumbv7em-none-eabihf
cargo build --verbose --target thumbv7em-none-eabihf --features alloc
cargo test --verbose --features alloc
cargo build --verbose --target thumbv7em-none-eabihf --features all
cargo test --verbose --features all
cargo fmt -- --check
cargo +1.60.0 clippy --features alloc
cargo +1.60.0 doc --no-deps --document-private-items --features alloc
cargo clippy --features all
cargo doc --features all

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2023 Philipp Schuster
Copyright (c) 2021 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -37,8 +37,5 @@ fn main() {
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
println!(
"{:#?}",
entries[2].data_as_str().expect("Should be valid UTF-8")
);
println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8"));
}

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2023 Philipp Schuster
Copyright (c) 2021 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -35,8 +35,5 @@ fn main() {
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
println!(
"{:#?}",
entries[2].data_as_str().expect("Should be valid UTF-8")
);
println!("{:#?}", entries[2].data_as_str().expect("Invalid UTF-8"));
}

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2023 Philipp Schuster
Copyright (c) 2021 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -25,25 +25,24 @@ SOFTWARE.
//! also exports `TarArchive`, which owns data on the heap.
use crate::header::PosixHeader;
use crate::{TypeFlag, BLOCKSIZE, FILENAME_MAX_LEN};
use crate::{TypeFlag, BLOCKSIZE};
#[cfg(feature = "alloc")]
use alloc::boxed::Box;
use arrayvec::ArrayString;
use core::fmt::{Debug, Formatter};
use core::str::{FromStr, Utf8Error};
use log::warn;
/// Describes an entry in an archive.
/// Currently only supports files but no directories.
pub struct ArchiveEntry<'a> {
filename: ArrayString<FILENAME_MAX_LEN>,
filename: ArrayString<100>,
data: &'a [u8],
size: usize,
}
#[allow(unused)]
impl<'a> ArchiveEntry<'a> {
const fn new(filename: ArrayString<FILENAME_MAX_LEN>, data: &'a [u8]) -> Self {
pub const fn new(filename: ArrayString<100>, data: &'a [u8]) -> Self {
ArchiveEntry {
filename,
data,
@ -51,9 +50,8 @@ impl<'a> ArchiveEntry<'a> {
}
}
/// Filename of the entry with a maximum of 100 characters (including the
/// terminating NULL-byte).
pub const fn filename(&self) -> ArrayString<{ FILENAME_MAX_LEN }> {
/// Filename of the entry. Max 99 characters.
pub const fn filename(&self) -> ArrayString<100> {
self.filename
}
@ -124,6 +122,13 @@ impl From<Box<[u8]>> for TarArchive {
}
}
/*#[cfg(feature = "alloc")]
impl Into<Box<[u8]>> for TarArchive {
fn into(self) -> Box<[u8]> {
self.data
}
}*/
#[cfg(feature = "alloc")]
impl From<TarArchive> for Box<[u8]> {
fn from(ar: TarArchive) -> Self {
@ -132,7 +137,7 @@ impl From<TarArchive> for Box<[u8]> {
}
/// Wrapper type around bytes, which represents a Tar archive.
/// Unlike [`TarArchive`], this uses only a reference to the data.
/// Unlike [`TarArchive`], this uses only a reference to data.
#[derive(Debug)]
pub struct TarArchiveRef<'a> {
data: &'a [u8],
@ -188,7 +193,7 @@ impl<'a> Iterator for ArchiveIterator<'a> {
fn next(&mut self) -> Option<Self::Item> {
if self.block_index * BLOCKSIZE >= self.archive_data.len() {
warn!("Reached end of Tar archive data without finding zero/end blocks!");
log::warn!("Reached end of Tar archive data without finding zero/end blocks!");
return None;
}
@ -216,40 +221,28 @@ impl<'a> Iterator for ArchiveIterator<'a> {
}
if hdr.name.is_empty() {
warn!("Found empty file name",);
log::warn!("Found empty file name",);
}
let hdr_size = hdr.size.val();
if let Err(e) = hdr_size {
warn!("Can't parse the file size from the header block. Stop iterating Tar archive. {e:#?}");
return None;
}
let hdr_size = hdr_size.unwrap();
// Fetch data of file from next block(s).
// .unwrap() is fine as we checked that hdr.size().val() is valid
// above
let data_block_count = hdr.payload_block_count().unwrap();
// fetch data of file from next block(s)
let data_block_count = hdr.payload_block_count();
// +1: skip hdr block itself and start at data!
// i_begin is the byte begin index of this file in the array of the whole archive
let i_begin = (self.block_index + 1) * BLOCKSIZE;
// i_end is the exclusive byte end index of the data of the current file
let i_end = i_begin + data_block_count * BLOCKSIZE;
let file_block_bytes = &self.archive_data[i_begin..i_end];
// Each block is 512 bytes long, but the file size is not necessarily a
// multiple of 512.
let file_bytes = &file_block_bytes[0..hdr_size];
// because each block is 512 bytes long, the file is not necessarily a multiple of 512 bytes
let file_bytes = &file_block_bytes[0..hdr.size.val()];
// in next iteration: start at next Archive entry header
// +1 for current hdr block itself + all data blocks
self.block_index += data_block_count + 1;
let filename = ArrayString::from_str(hdr.name.as_string().as_str());
// .unwrap is fine as the capacity is MUST be ok.
let filename = filename.unwrap();
Some(ArchiveEntry::new(filename, file_bytes))
Some(ArchiveEntry::new(
ArrayString::from_str(hdr.name.as_string().as_str()).unwrap(),
file_bytes,
))
}
}
@ -265,9 +258,24 @@ mod tests {
println!("{:#?}", entries);
}
/// Tests to read the entries from existing archives in various Tar flavors.
#[test]
fn test_archive_entries() {
#[cfg(feature = "alloc")]
{
let data = include_bytes!("../tests/gnu_tar_default.tar")
.to_vec()
.into_boxed_slice();
let archive = TarArchive::new(data.clone());
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
let archive = TarArchive::from(data.clone());
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
assert_eq!(data, archive.into());
}
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar"));
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
@ -299,52 +307,30 @@ mod tests {
assert_archive_content(&entries);
}
/// Like [`test_archive_entries`] but with additional `alloc` functionality.
#[cfg(feature = "alloc")]
#[test]
fn test_archive_entries_alloc() {
let data = include_bytes!("../tests/gnu_tar_default.tar")
.to_vec()
.into_boxed_slice();
let archive = TarArchive::new(data.clone());
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
// Test that the archive can be transformed into owned heap data.
assert_eq!(data, archive.into());
}
/// Tests that the parsed archive matches the expected order. The tarballs
/// the tests directory were created once by me with files in the order
/// specified in this test.
fn assert_archive_content(entries: &[ArchiveEntry]) {
assert_eq!(entries.len(), 3);
// order in that I stored the files into the archive
assert_eq!(entries[0].filename().as_str(), "bye_world_513b.txt");
assert_eq!(entries[0].size(), 513);
assert_eq!(entries[0].data().len(), 513);
assert_eq!(
entries[0].data_as_str().expect("Should be valid UTF-8"),
// .replace: Ensure that the test also works on Windows
include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
entries[0].data_as_str().expect("Invalid UTF-8"),
include_str!("../tests/bye_world_513b.txt")
);
// Test that an entry that needs two 512 byte data blocks is read
// properly.
assert_eq!(entries[1].filename().as_str(), "hello_world_513b.txt");
assert_eq!(entries[1].size(), 513);
assert_eq!(entries[1].data().len(), 513);
assert_eq!(
entries[1].data_as_str().expect("Should be valid UTF-8"),
// .replace: Ensure that the test also works on Windows
include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
entries[1].data_as_str().expect("Invalid UTF-8"),
include_str!("../tests/hello_world_513b.txt")
);
assert_eq!(entries[2].filename().as_str(), "hello_world.txt");
assert_eq!(entries[2].size(), 12);
assert_eq!(entries[2].data().len(), 12);
assert_eq!(
entries[2].data_as_str().expect("Should be valid UTF-8"),
entries[2].data_as_str().expect("Invalid UTF-8"),
"Hello World\n",
"file content must match"
);

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2023 Philipp Schuster
Copyright (c) 2021 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -30,10 +30,9 @@ SOFTWARE.
#![allow(non_upper_case_globals)]
use crate::{BLOCKSIZE, FILENAME_MAX_LEN};
use crate::BLOCKSIZE;
use arrayvec::ArrayString;
use core::fmt::{Debug, Formatter};
use core::num::ParseIntError;
/// The file size is encoded as octal ASCII number inside a Tar header.
#[derive(Copy, Clone)]
@ -42,8 +41,8 @@ pub struct Size(StaticCString<12>);
impl Size {
/// Returns the octal ASCII number as actual size in bytes.
pub fn val(&self) -> Result<usize, ParseIntError> {
usize::from_str_radix(self.0.as_string().as_str(), 8)
pub fn val(&self) -> usize {
usize::from_str_radix(self.0.as_string().as_str(), 8).unwrap()
}
}
@ -55,24 +54,16 @@ impl Debug for Size {
}
}
#[derive(Debug)]
pub enum ModeError {
ParseInt(ParseIntError),
IllegalMode,
}
/// Wrapper around the UNIX file permissions given in octal ASCII.
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct Mode(StaticCString<8>);
impl Mode {
/// Parses the [`ModeFlags`] from the mode string.
pub fn to_flags(self) -> Result<ModeFlags, ModeError> {
pub fn to_flags(self) -> ModeFlags {
let octal_number_str = self.0.as_string();
let bits =
u64::from_str_radix(octal_number_str.as_str(), 8).map_err(ModeError::ParseInt)?;
ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode)
let bits = u64::from_str_radix(octal_number_str.as_str(), 8).unwrap();
ModeFlags::from_bits(bits).unwrap()
}
}
@ -84,11 +75,8 @@ impl Debug for Mode {
}
}
/// A C-String that is stored in a static array. There is always a terminating
/// NULL-byte.
///
/// The content is likely to be UTF-8/ASCII, but that is not verified by this
/// type.
/// A C-String that is stored in a static array. All unused
/// chars must be a NULL-byte.
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct StaticCString<const N: usize>([u8; N]);
@ -111,16 +99,19 @@ impl<const N: usize> StaticCString<N> {
self.len() == 0
}
/// Returns a string that includes all characters until the first null.
/// Returns a string without null bytes.
pub fn as_string(&self) -> ArrayString<N> {
let mut string = ArrayString::new();
// copy all bytes (=ASCII) into string
self.0
.clone()
.iter()
.copied()
// Take all chars until the terminating null.
.take_while(|byte| *byte != 0)
.for_each(|byte| string.push(byte as char));
// remove all zero bytes; there is always one
// zero byte at the end. Furtherore, the other
// unused bytes are also zero, but not part of the
// string.
.filter(|x| **x != 0)
.for_each(|b| string.push(*b as char));
string
}
}
@ -154,7 +145,7 @@ impl<const N: usize> Debug for StaticCString<N> {
pub struct PosixHeader {
/// Name. There is always a null byte, therefore
/// the max len is 99.
pub name: StaticCString<{ FILENAME_MAX_LEN }>,
pub name: StaticCString<100>,
pub mode: Mode,
pub uid: [u8; 8],
pub gid: [u8; 8],
@ -165,7 +156,7 @@ pub struct PosixHeader {
pub typeflag: TypeFlag,
/// Name. There is always a null byte, therefore
/// the max len is 99.
pub linkname: StaticCString<{ FILENAME_MAX_LEN }>,
pub linkname: StaticCString<100>,
pub magic: StaticCString<6>,
pub version: StaticCString<2>,
/// Username. There is always a null byte, therefore
@ -184,14 +175,16 @@ pub struct PosixHeader {
}
impl PosixHeader {
/// Returns the number of blocks that are required to read the whole file
/// content. Returns an error, if the file size can't be parsed from the
/// header.
pub fn payload_block_count(&self) -> Result<usize, ParseIntError> {
let div = self.size.val()? / BLOCKSIZE;
let modulo = self.size.val()? % BLOCKSIZE;
let block_count = if modulo > 0 { div + 1 } else { div };
Ok(block_count)
/// Returns the number of blocks that are required to
/// read the whole file content.
pub fn payload_block_count(&self) -> usize {
let div = self.size.val() / BLOCKSIZE;
let modulo = self.size.val() % BLOCKSIZE;
if modulo > 0 {
(div + 1) as usize
} else {
div as usize
}
}
/// A Tar archive is terminated, if a end-of-archive entry, which consists of two 512 blocks
@ -206,7 +199,7 @@ impl PosixHeader {
/// Describes the kind of payload, that follows after a
/// [`PosixHeader`]. The properties of this payload are
/// described inside the header.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[derive(Debug, Copy, Clone, PartialEq)]
#[repr(u8)]
#[allow(unused)]
pub enum TypeFlag {
@ -223,35 +216,35 @@ pub enum TypeFlag {
/// This flag represents a file linked to another file, of any type, previously archived. Such
/// files are identified in Unix by each file having the same device and inode number. The
/// linked-to name is specified in the linkname field with a trailing null.
LINK = b'1',
LINK = 1,
/// This represents a symbolic link to another file. The linked-to name is specified in the
/// linkname field with a trailing null.
SYMTYPE = b'2',
SYMTYPE = 2,
/// Represents character special files and block special files respectively. In this case the
/// devmajor and devminor fields will contain the major and minor device numbers respectively.
/// Operating systems may map the device specifications to their own local specification, or
/// may ignore the entry.
CHRTYPE = b'3',
CHRTYPE = 3,
/// Represents character special files and block special files respectively. In this case the
/// devmajor and devminor fields will contain the major and minor device numbers respectively.
/// Operating systems may map the device specifications to their own local specification, or
/// may ignore the entry.
BLKTYPE = b'4',
BLKTYPE = 4,
/// This flag specifies a directory or sub-directory. The directory name in the name field
/// should end with a slash. On systems where disk allocation is performed on a directory
/// basis, the size field will contain the maximum number of bytes (which may be rounded to
/// the nearest disk block allocation unit) which the directory may hold. A size field of zero
/// indicates no such limiting. Systems which do not support limiting in this manner should
/// ignore the size field.
DIRTYPE = b'5',
DIRTYPE = 5,
/// This specifies a FIFO special file. Note that the archiving of a FIFO file archives the
/// existence of this file and not its contents.
FIFOTYPE = b'6',
FIFOTYPE = 6,
/// This specifies a contiguous file, which is the same as a normal file except that, in
/// operating systems which support it, all its space is allocated contiguously on the disk.
/// Operating systems which do not allow contiguous allocation should silently treat this type
/// as a normal file.
CONTTYPE = b'7',
CONTTYPE = 7,
/// Extended header referring to the next file in the archive
XHDTYPE = b'x',
/// Global extended header
@ -259,9 +252,7 @@ pub enum TypeFlag {
}
bitflags::bitflags! {
/// UNIX file permissions in octal format.
#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
/// UNIX file permissions on octal format.
pub struct ModeFlags: u64 {
/// Set UID on execution.
const SetUID = 0o4000;

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2023 Philipp Schuster
Copyright (c) 2021 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -21,27 +21,23 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero
//! allocations. If you have a standard environment and need full feature
//! support, I recommend the use of <https://crates.io/crates/tar> instead.
//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations.
//! If you have a standard environment and need full feature support, I recommend the use of
//! <https://crates.io/crates/tar> instead.
//!
//! The crate is simple and only supports reading of "basic" archives, therefore
//! no extensions, such as GNU Longname. The maximum supported file name length
//! is 100 characters including the NULL-byte. The maximum supported file size
//! is 8 GiB. Also, directories are not supported yet but only flat collections
//! of files.
//! The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
//! as GNU Longname. The maximum supported file name length is 100 characters including the NULL-byte.
//! The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
//! collections of files.
//!
//! This library is useful, if you write a kernel or a similar low-level
//! application, which needs "a bunch of files" from an archive ("init ram
//! disk"). The Tar file could for example come as a Multiboot2 boot module
//! provided by the bootloader.
//! This library is useful, if you write a kernel or a similar low-level application, which needs
//! "a bunch of files" from an archive ("init ram disk"). The Tar file could for example come
//! as a Multiboot2 boot module provided by the bootloader.
//!
//! This crate focuses on extracting files from uncompressed Tar archives
//! created with default options by **GNU Tar**. GNU Extensions such as sparse
//! files, incremental archives, and long filename extension are not supported
//! yet. [gnu.org](https://www.gnu.org/software/tar/manual/html_section/Formats.html)
//! provides a good overview over possible archive formats and their
//! limitations.
//! This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**.
//! GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet.
//! [This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible
//! archive formats and their limitations.
//!
//! # Example
//! ```rust
@ -78,7 +74,6 @@ SOFTWARE.
clippy::fallible_impl_from
)]
#![deny(missing_debug_implementations)]
#![deny(rustdoc::all)]
#[cfg_attr(test, macro_use)]
#[cfg(test)]
@ -89,8 +84,6 @@ extern crate alloc;
/// Each Archive Entry (either Header or Data Block) is a block of 512 bytes.
const BLOCKSIZE: usize = 512;
/// Maximum filename length of the base Tar format including the terminating NULL-byte.
const FILENAME_MAX_LEN: usize = 100;
mod archive;
mod header;