Compare commits

...

21 Commits
v0.1.2 ... main

Author SHA1 Message Date
Philipp Schuster 57a0f950ef
Merge pull request #7 from schnoberts1/bugfix-entry-type-enum
Fix the values in typeflag so they match https://www.gnu.org/software
2023-08-30 17:54:57 +02:00
Andy Schneider f598cb3f9f Fix the values in typeflag so they match https://www.gnu.org/software/tar/manual/html_node/Standard.html. i.e DIRTYPE is b'5' not 5. 2023-08-27 12:55:21 +01:00
Philipp Schuster 382c10c217
Merge pull request #5 from phip1611/dev
v0.2.0
2023-04-11 13:08:27 +02:00
Philipp Schuster 3eccc3c199 ci: add typos check 2023-04-11 12:33:28 +02:00
Philipp Schuster 1412663d2c prepare v0.2.0 with code improvements 2023-04-11 12:09:15 +02:00
Philipp Schuster 547429a3dd
Merge pull request #4 from semiviral/main
update bitflags dependency `1.3` -> `2.0`
2023-04-11 10:39:45 +02:00
semiviral e417692b46 undo version bump 2023-04-02 08:27:47 -05:00
semiviral c8c12e7c0b `#[repr(transparent)]` for `ModeFlags` 2023-04-02 06:15:42 -05:00
semiviral 48bec03dfe update bitflags `1.3` -> `2.0` 2023-04-02 06:06:21 -05:00
Philipp Schuster 530d058154 v0.1.8 2022-05-02 22:00:18 +02:00
Philipp Schuster f70030eb38 MSRV fix 2022-05-02 21:59:51 +02:00
Philipp Schuster b0442bffa8 CI fix 2022-05-02 21:41:13 +02:00
Philipp Schuster 30dcdb395a stricter clippy rules 2022-01-19 10:44:20 +01:00
Philipp Schuster a01ea0f26f README update (v0.1.7) 2022-01-03 10:43:51 +01:00
Philipp Schuster a92e2ec39d doc fixes (v0.1.6) 2021-10-11 15:37:01 +02:00
Philipp Schuster 89dbfc6acd
Merge pull request #3 from phip1611/dev
'alloc'-feature and owning tar archive type (v0.1.5)
2021-10-11 15:32:16 +02:00
Philipp Schuster da12b748dc 'alloc'-feature and owning tar archive type (v0.1.5) 2021-10-11 15:30:24 +02:00
Philipp Schuster b35f7a5179 convenient "data_as_str" getter (v0.1.4) 2021-10-09 16:37:10 +02:00
Philipp Schuster a8c44fb83d small memory usage improvement (v0.1.3) 2021-10-05 12:01:52 +02:00
Philipp Schuster 40e6da5e97 clippy fix 2021-10-04 14:40:15 +02:00
Philipp Schuster 6cb4d04cef typo 2021-10-04 13:46:03 +02:00
12 changed files with 410 additions and 184 deletions

View File

@ -9,3 +9,7 @@ insert_final_newline = true
indent_style = space
indent_size = 4
trim_trailing_whitespace = true
max_line_length = 80
[*.yml]
indent_size = 2

12
.github/workflows/qa.yml vendored Normal file
View File

@ -0,0 +1,12 @@
name: QA
on: [ push, pull_request ]
jobs:
spellcheck:
name: Spellcheck
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
# Executes "typos ."
- uses: crate-ci/typos@v1.13.20

View File

@ -1,48 +1,59 @@
name: Build
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
on: [push, pull_request]
env:
CARGO_TERM_COLOR: always
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
rust:
- stable
- nightly
- 1.51.0 # MSVR
steps:
- uses: actions/checkout@v2
- name: Build
run: cargo build --verbose
# use some no_std target
- name: Install no_std target thumbv7em-none-eabihf
run: rustup target add thumbv7em-none-eabihf
- name: Build (no_std)
run: cargo build --verbose --target thumbv7em-none-eabihf
- name: Run tests
run: cargo test --verbose
build:
runs-on: ${{ matrix.runs-on }}
strategy:
matrix:
runs-on:
- windows-latest
- ubuntu-latest
rust:
- stable
- nightly
- 1.60.0 # MSVR
steps:
- uses: actions/checkout@v2
# Important preparation step: override the latest default Rust version in GitHub CI
# with the current value of the iteration in the "strategy.matrix.rust"-array.
- uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: ${{ matrix.rust }}
override: true
- name: Build
run: cargo build --all-targets --verbose --features alloc
# use some arbitrary no_std target
- name: Install no_std target thumbv7em-none-eabihf
run: rustup target add thumbv7em-none-eabihf
- name: Build (no_std)
run: cargo build --verbose --target thumbv7em-none-eabihf --features alloc
- name: Run tests
run: cargo test --verbose --features alloc
style_checks:
runs-on: ubuntu-latest
strategy:
matrix:
rust:
- stable
- nightly
- 1.51.0 # MSVR
steps:
- uses: actions/checkout@v2
- name: Rustfmt
run: cargo fmt -- --check
- name: Clippy
run: cargo clippy
- name: Rustdoc
run: cargo doc
style_checks:
runs-on: ubuntu-latest
strategy:
matrix:
rust:
- 1.60.0
steps:
- uses: actions/checkout@v2
# Important preparation step: override the latest default Rust version in GitHub CI
# with the current value of the iteration in the "strategy.matrix.rust"-array.
- uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: ${{ matrix.rust }}
override: true
- name: Rustfmt
run: cargo fmt -- --check
- name: Clippy
run: cargo clippy --features alloc
- name: Rustdoc
run: cargo doc --no-deps --document-private-items --features alloc

6
CHANGELOG.md Normal file
View File

@ -0,0 +1,6 @@
# v0.2.0 (2023-04-11)
- MSRV is 1.60.0
- bitflags bump: 1.x -> 2.x
- few internal code improvements (less possible panics)
- `Mode::to_flags` now returns a Result
- Feature `all` was removed. Use `alloc` instead.

View File

@ -7,8 +7,8 @@ as GNU Longname. The maximum supported file name length is 100 characters includ
The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
collections of files.
"""
version = "0.1.2"
edition = "2018"
version = "0.2.0"
edition = "2021"
keywords = ["tar", "tarball", "archive"]
categories = ["data-structures", "no-std", "parser-implementations"]
readme = "README.md"
@ -20,10 +20,14 @@ documentation = "https://docs.rs/tar-no-std"
# required because "env_logger" uses "log" but with dependency to std..
resolver = "2"
[features]
default = []
alloc = []
[dependencies]
bitflags = "1.3"
arrayvec = { version = "0.7", default-features = false }
bitflags = "2.0"
log = { version = "0.4", default-features = false }
[dev-dependencies]
env_logger = "0.9"
env_logger = "0.10"

View File

@ -5,11 +5,15 @@ but have some subtle differences that often make them incompatible with each oth
Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations. If you have a standard
environment and need full feature support, I recommend the use of <https://crates.io/crates/tar> instead.
## Limitations
The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
as *GNU Longname*. The maximum supported file name length is 100 characters including the NULL-byte.
The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
collections of files.
## Use Case
This library is useful, if you write a kernel or a similar low-level application, which needs
"a bunch of files" from an archive ("init ramdisk"). The Tar file could for example come
as a Multiboot2 boot module provided by the bootloader.
@ -19,8 +23,10 @@ GNU Extensions such as sparse files, incremental archives, and long filename ext
[This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible
archive formats and their limitations.
## Example
## Example (without `alloc`-feature)
```rust
use tar_no_std::TarArchiveRef;
fn main() {
// log: not mandatory
std::env::set_var("RUST_LOG", "trace");
@ -28,22 +34,26 @@ fn main() {
// also works in no_std environment (except the println!, of course)
let archive = include_bytes!("../tests/gnu_tar_default.tar");
let archive = TarArchive::new(archive);
let archive = TarArchiveRef::new(archive);
// Vec needs an allocator of course, but the library itself doesn't need one
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) };
println!("{:#?}", last_file_content);
println!("{:#?}", entries[2].data_as_str().expect("Should be valid UTF-8"));
}
```
## Compression
If your tar file is compressed, e.g. bei `.tar.gz`/`gzip`, you need to uncompress the bytes first
(e.g. by a *gzip* library). Afterwards, this crate can read and write the Tar archive format from the bytes.
## Alloc Feature
This crate allows the usage of the additional Cargo build time feature `alloc`. When this is used,
the crate also provides the type `TarArchive`, which owns the data on the heap.
## Compression (`tar.gz`)
If your tar file is compressed, e.g. by `.tar.gz`/`gzip`, you need to uncompress the bytes first
(e.g. by a *gzip* library). Afterwards, this crate can read the Tar archive format from the uncompressed
bytes.
## MSRV
The MSRV is 1.51.0 stable.
The MSRV is 1.52.1 stable.
[0]: https://www.gnu.org/software/tar/manual/html_section/Formats.html

11
build.sh Executable file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
cargo build --all-targets --verbose --features alloc
# use some random no_std target
rustup target add thumbv7em-none-eabihf
cargo build --verbose --target thumbv7em-none-eabihf --features alloc
cargo test --verbose --features alloc
cargo fmt -- --check
cargo +1.60.0 clippy --features alloc
cargo +1.60.0 doc --no-deps --document-private-items --features alloc

44
examples/alloc_feature.rs Normal file
View File

@ -0,0 +1,44 @@
/*
MIT License
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
use tar_no_std::TarArchive;
/// This example needs the `alloc` feature.
fn main() {
// log: not mandatory
std::env::set_var("RUST_LOG", "trace");
env_logger::init();
// also works in no_std environment (except the println!, of course)
let archive = include_bytes!("../tests/gnu_tar_default.tar");
let archive_heap_owned = archive.to_vec().into_boxed_slice();
let archive = TarArchive::new(archive_heap_owned);
// Vec needs an allocator of course, but the library itself doesn't need one
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
println!(
"{:#?}",
entries[2].data_as_str().expect("Should be valid UTF-8")
);
}

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
use tar_no_std::TarArchive;
use tar_no_std::TarArchiveRef;
fn main() {
// log: not mandatory
@ -30,11 +30,13 @@ fn main() {
// also works in no_std environment (except the println!, of course)
let archive = include_bytes!("../tests/gnu_tar_default.tar");
let archive = TarArchive::new(archive);
let archive = TarArchiveRef::new(archive);
// Vec needs an allocator of course, but the library itself doesn't need one
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
println!("content of last file:");
let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) };
println!("{:#?}", last_file_content);
println!(
"{:#?}",
entries[2].data_as_str().expect("Should be valid UTF-8")
);
}

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -21,26 +21,29 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
//! Module for [`TarArchive`].
//! Module for [`TarArchiveRef`]. If the `alloc`-feature is enabled, this crate
//! also exports `TarArchive`, which owns data on the heap.
use crate::header::PosixHeader;
use crate::{TypeFlag, BLOCKSIZE};
use crate::{TypeFlag, BLOCKSIZE, FILENAME_MAX_LEN};
#[cfg(feature = "alloc")]
use alloc::boxed::Box;
use arrayvec::ArrayString;
use core::fmt::{Debug, Formatter};
use core::ptr;
use core::str::FromStr;
use core::str::{FromStr, Utf8Error};
use log::warn;
/// Describes an entry in an archive.
/// Currently only supports files but no directories.
pub struct ArchiveEntry<'a> {
filename: ArrayString<100>,
filename: ArrayString<FILENAME_MAX_LEN>,
data: &'a [u8],
size: usize,
}
#[allow(unused)]
impl<'a> ArchiveEntry<'a> {
pub const fn new(filename: ArrayString<100>, data: &'a [u8]) -> Self {
const fn new(filename: ArrayString<FILENAME_MAX_LEN>, data: &'a [u8]) -> Self {
ArchiveEntry {
filename,
data,
@ -48,8 +51,9 @@ impl<'a> ArchiveEntry<'a> {
}
}
/// Filename of the entry. Max 99 characters.
pub const fn filename(&self) -> ArrayString<100> {
/// Filename of the entry with a maximum of 100 characters (including the
/// terminating NULL-byte).
pub const fn filename(&self) -> ArrayString<{ FILENAME_MAX_LEN }> {
self.filename
}
@ -58,6 +62,11 @@ impl<'a> ArchiveEntry<'a> {
self.data
}
/// Data of the file as string slice, if data is valid UTF-8.
pub fn data_as_str(&self) -> Result<&'a str, Utf8Error> {
core::str::from_utf8(self.data)
}
/// Filesize in bytes.
pub const fn size(&self) -> usize {
self.size
@ -74,15 +83,65 @@ impl<'a> Debug for ArchiveEntry<'a> {
}
}
/// Wrapper type around the bytes, which represents an archive.
/// Type that owns bytes on the heap, that represents a Tar archive.
/// Unlike [`TarArchiveRef`], this type is useful, if you need to own the
/// data as long as you need the archive, but not longer.
///
/// This is only available with the `alloc` feature of this crate.
#[cfg(feature = "alloc")]
#[derive(Debug)]
pub struct TarArchive<'a> {
pub struct TarArchive {
data: Box<[u8]>,
}
#[cfg(feature = "alloc")]
impl TarArchive {
/// Creates a new archive type, that owns the data on the heap. The provided byte array is
/// interpreted as bytes in Tar archive format.
pub fn new(data: Box<[u8]>) -> Self {
assert_eq!(
data.len() % BLOCKSIZE,
0,
"data must be a multiple of BLOCKSIZE={}, len is {}",
BLOCKSIZE,
data.len(),
);
Self { data }
}
/// Iterates over all entries of the Tar archive.
/// Returns items of type [`ArchiveEntry`].
/// See also [`ArchiveIterator`].
pub fn entries(&self) -> ArchiveIterator {
ArchiveIterator::new(self.data.as_ref())
}
}
#[cfg(feature = "alloc")]
impl From<Box<[u8]>> for TarArchive {
fn from(data: Box<[u8]>) -> Self {
Self::new(data)
}
}
#[cfg(feature = "alloc")]
impl From<TarArchive> for Box<[u8]> {
fn from(ar: TarArchive) -> Self {
ar.data
}
}
/// Wrapper type around bytes, which represents a Tar archive.
/// Unlike [`TarArchive`], this uses only a reference to the data.
#[derive(Debug)]
pub struct TarArchiveRef<'a> {
data: &'a [u8],
}
#[allow(unused)]
impl<'a> TarArchive<'a> {
/// Interprets the provided byte array as Tar archive.
impl<'a> TarArchiveRef<'a> {
/// Creates a new archive wrapper type. The provided byte array is interpreted as
/// bytes in Tar archive format.
pub fn new(data: &'a [u8]) -> Self {
assert_eq!(
data.len() % BLOCKSIZE,
@ -93,34 +152,34 @@ impl<'a> TarArchive<'a> {
Self { data }
}
/// Iterates over all entries of the TAR Archive.
/// Iterates over all entries of the Tar archive.
/// Returns items of type [`ArchiveEntry`].
/// See also [`ArchiveIterator`].
pub const fn entries(&self) -> ArchiveIterator {
ArchiveIterator::new(self)
ArchiveIterator::new(self.data)
}
}
/// Iterator over the files. Each iteration step starts
/// Iterator over the files of the archive. Each iteration starts
/// at the next Tar header entry.
#[derive(Debug)]
pub struct ArchiveIterator<'a> {
archive: &'a TarArchive<'a>,
archive_data: &'a [u8],
block_index: usize,
}
impl<'a> ArchiveIterator<'a> {
pub const fn new(archive: &'a TarArchive<'a>) -> Self {
pub const fn new(archive: &'a [u8]) -> Self {
Self {
archive,
archive_data: archive,
block_index: 0,
}
}
/// Returns a pointer to the next Header.
const fn next_hdr(&self, block_index: usize) -> *const PosixHeader {
let hdr_ptr = &self.archive.data[block_index * BLOCKSIZE];
let hdr_ptr = hdr_ptr as *const u8;
hdr_ptr as *const PosixHeader
/// Returns a reference to the next Header.
fn next_hdr(&self, block_index: usize) -> &'a PosixHeader {
let hdr_ptr = &self.archive_data[block_index * BLOCKSIZE];
unsafe { (hdr_ptr as *const u8).cast::<PosixHeader>().as_ref() }.unwrap()
}
}
@ -128,17 +187,16 @@ impl<'a> Iterator for ArchiveIterator<'a> {
type Item = ArchiveEntry<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.block_index * BLOCKSIZE >= self.archive.data.len() {
log::warn!("Reached end of Tar archive data without finding zero/end blocks!");
if self.block_index * BLOCKSIZE >= self.archive_data.len() {
warn!("Reached end of Tar archive data without finding zero/end blocks!");
return None;
}
let hdr = self.next_hdr(self.block_index);
let hdr = unsafe { ptr::read(hdr) };
// check if we found end of archive
if hdr.is_zero_block() {
let next_hdr = unsafe { ptr::read(self.next_hdr(self.block_index + 1)) };
let next_hdr = self.next_hdr(self.block_index + 1);
if next_hdr.is_zero_block() {
// gracefully terminated Archive
log::debug!("End of Tar archive with two zero blocks!");
@ -158,55 +216,67 @@ impl<'a> Iterator for ArchiveIterator<'a> {
}
if hdr.name.is_empty() {
log::warn!("Found empty file name",);
warn!("Found empty file name",);
}
// fetch data of file from next block(s)
let data_block_count = hdr.payload_block_count();
let hdr_size = hdr.size.val();
if let Err(e) = hdr_size {
warn!("Can't parse the file size from the header block. Stop iterating Tar archive. {e:#?}");
return None;
}
let hdr_size = hdr_size.unwrap();
// Fetch data of file from next block(s).
// .unwrap() is fine as we checked that hdr.size().val() is valid
// above
let data_block_count = hdr.payload_block_count().unwrap();
// +1: skip hdr block itself and start at data!
// i_begin is the byte begin index of this file in the array of the whole archive
let i_begin = (self.block_index + 1) * BLOCKSIZE;
// i_end is the exclusive byte end index of the data of the current file
let i_end = i_begin + data_block_count * BLOCKSIZE;
let file_block_bytes = &self.archive.data[i_begin..i_end];
// because each block is 512 bytes long, the file is not necessarily a multiple of 512 bytes
let file_bytes = &file_block_bytes[0..hdr.size.val()];
let file_block_bytes = &self.archive_data[i_begin..i_end];
// Each block is 512 bytes long, but the file size is not necessarily a
// multiple of 512.
let file_bytes = &file_block_bytes[0..hdr_size];
// in next iteration: start at next Archive entry header
// +1 for current hdr block itself + all data blocks
self.block_index += data_block_count + 1;
Some(ArchiveEntry::new(
ArrayString::from_str(hdr.name.as_string().as_str()).unwrap(),
file_bytes,
))
let filename = ArrayString::from_str(hdr.name.as_string().as_str());
// .unwrap is fine as the capacity is MUST be ok.
let filename = filename.unwrap();
Some(ArchiveEntry::new(filename, file_bytes))
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::str;
use std::vec::Vec;
#[test]
fn test_archive_list() {
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar"));
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar"));
let entries = archive.entries().collect::<Vec<_>>();
println!("{:#?}", entries);
}
/// Tests to read the entries from existing archives in various Tar flavors.
#[test]
fn test_archive_entries() {
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar"));
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar"));
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_gnu.tar"));
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_gnu.tar"));
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
@ -220,39 +290,61 @@ mod tests {
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);*/
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_ustar.tar"));
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar.tar"));
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_v7.tar"));
let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_v7.tar"));
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
}
/// Like [`test_archive_entries`] but with additional `alloc` functionality.
#[cfg(feature = "alloc")]
#[test]
fn test_archive_entries_alloc() {
let data = include_bytes!("../tests/gnu_tar_default.tar")
.to_vec()
.into_boxed_slice();
let archive = TarArchive::new(data.clone());
let entries = archive.entries().collect::<Vec<_>>();
assert_archive_content(&entries);
// Test that the archive can be transformed into owned heap data.
assert_eq!(data, archive.into());
}
/// Tests that the parsed archive matches the expected order. The tarballs
/// the tests directory were created once by me with files in the order
/// specified in this test.
fn assert_archive_content(entries: &[ArchiveEntry]) {
assert_eq!(entries.len(), 3);
// order in that I stored the files into the archive
assert_eq!(entries[0].filename().as_str(), "bye_world_513b.txt");
assert_eq!(entries[0].size(), 513);
assert_eq!(entries[0].data().len(), 513);
assert_eq!(
unsafe { str::from_utf8_unchecked(entries[0].data) },
include_str!("../tests/bye_world_513b.txt")
entries[0].data_as_str().expect("Should be valid UTF-8"),
// .replace: Ensure that the test also works on Windows
include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
);
// Test that an entry that needs two 512 byte data blocks is read
// properly.
assert_eq!(entries[1].filename().as_str(), "hello_world_513b.txt");
assert_eq!(entries[1].size(), 513);
assert_eq!(entries[1].data().len(), 513);
assert_eq!(
unsafe { str::from_utf8_unchecked(entries[1].data) },
include_str!("../tests/hello_world_513b.txt")
entries[1].data_as_str().expect("Should be valid UTF-8"),
// .replace: Ensure that the test also works on Windows
include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
);
assert_eq!(entries[2].filename().as_str(), "hello_world.txt");
assert_eq!(entries[2].size(), 12);
assert_eq!(entries[2].data().len(), 12);
assert_eq!(
unsafe { str::from_utf8_unchecked(entries[2].data) },
entries[2].data_as_str().expect("Should be valid UTF-8"),
"Hello World\n",
"file content must match"
);

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -30,9 +30,10 @@ SOFTWARE.
#![allow(non_upper_case_globals)]
use crate::BLOCKSIZE;
use crate::{BLOCKSIZE, FILENAME_MAX_LEN};
use arrayvec::ArrayString;
use core::fmt::{Debug, Formatter};
use core::num::ParseIntError;
/// The file size is encoded as octal ASCII number inside a Tar header.
#[derive(Copy, Clone)]
@ -41,8 +42,8 @@ pub struct Size(StaticCString<12>);
impl Size {
/// Returns the octal ASCII number as actual size in bytes.
pub fn val(&self) -> usize {
usize::from_str_radix(self.0.as_string().as_str(), 8).unwrap()
pub fn val(&self) -> Result<usize, ParseIntError> {
usize::from_str_radix(self.0.as_string().as_str(), 8)
}
}
@ -54,16 +55,24 @@ impl Debug for Size {
}
}
#[derive(Debug)]
pub enum ModeError {
ParseInt(ParseIntError),
IllegalMode,
}
/// Wrapper around the UNIX file permissions given in octal ASCII.
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct Mode(StaticCString<8>);
impl Mode {
pub fn to_flags(self) -> ModeFlags {
/// Parses the [`ModeFlags`] from the mode string.
pub fn to_flags(self) -> Result<ModeFlags, ModeError> {
let octal_number_str = self.0.as_string();
let bits = u64::from_str_radix(octal_number_str.as_str(), 8).unwrap();
ModeFlags::from_bits(bits).unwrap()
let bits =
u64::from_str_radix(octal_number_str.as_str(), 8).map_err(ModeError::ParseInt)?;
ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode)
}
}
@ -75,8 +84,11 @@ impl Debug for Mode {
}
}
/// A C-String that is stored in a static array. All unused
/// chars must be a NULL-byte.
/// A C-String that is stored in a static array. There is always a terminating
/// NULL-byte.
///
/// The content is likely to be UTF-8/ASCII, but that is not verified by this
/// type.
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct StaticCString<const N: usize>([u8; N]);
@ -99,19 +111,16 @@ impl<const N: usize> StaticCString<N> {
self.len() == 0
}
/// Returns a string without null bytes.
/// Returns a string that includes all characters until the first null.
pub fn as_string(&self) -> ArrayString<N> {
let mut string = ArrayString::new();
// copy all bytes (=ASCII) into string
self.0
.clone()
.iter()
// remove all zero bytes; there is always one
// zero byte at the end. Furtherore, the other
// unused bytes are also zero, but not part of the
// string.
.filter(|x| **x != 0)
.for_each(|b| string.push(*b as char));
.copied()
// Take all chars until the terminating null.
.take_while(|byte| *byte != 0)
.for_each(|byte| string.push(byte as char));
string
}
}
@ -145,7 +154,7 @@ impl<const N: usize> Debug for StaticCString<N> {
pub struct PosixHeader {
/// Name. There is always a null byte, therefore
/// the max len is 99.
pub name: StaticCString<100>,
pub name: StaticCString<{ FILENAME_MAX_LEN }>,
pub mode: Mode,
pub uid: [u8; 8],
pub gid: [u8; 8],
@ -156,7 +165,7 @@ pub struct PosixHeader {
pub typeflag: TypeFlag,
/// Name. There is always a null byte, therefore
/// the max len is 99.
pub linkname: StaticCString<100>,
pub linkname: StaticCString<{ FILENAME_MAX_LEN }>,
pub magic: StaticCString<6>,
pub version: StaticCString<2>,
/// Username. There is always a null byte, therefore
@ -175,16 +184,14 @@ pub struct PosixHeader {
}
impl PosixHeader {
/// Returns the number of blocks that are required to
/// read the whole file content.
pub fn payload_block_count(&self) -> usize {
let div = self.size.val() / BLOCKSIZE;
let modulo = self.size.val() % BLOCKSIZE;
if modulo > 0 {
(div + 1) as usize
} else {
div as usize
}
/// Returns the number of blocks that are required to read the whole file
/// content. Returns an error, if the file size can't be parsed from the
/// header.
pub fn payload_block_count(&self) -> Result<usize, ParseIntError> {
let div = self.size.val()? / BLOCKSIZE;
let modulo = self.size.val()? % BLOCKSIZE;
let block_count = if modulo > 0 { div + 1 } else { div };
Ok(block_count)
}
/// A Tar archive is terminated, if a end-of-archive entry, which consists of two 512 blocks
@ -196,7 +203,10 @@ impl PosixHeader {
}
}
#[derive(Debug, Copy, Clone, PartialEq)]
/// Describes the kind of payload, that follows after a
/// [`PosixHeader`]. The properties of this payload are
/// described inside the header.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u8)]
#[allow(unused)]
pub enum TypeFlag {
@ -213,35 +223,35 @@ pub enum TypeFlag {
/// This flag represents a file linked to another file, of any type, previously archived. Such
/// files are identified in Unix by each file having the same device and inode number. The
/// linked-to name is specified in the linkname field with a trailing null.
LINK = 1,
LINK = b'1',
/// This represents a symbolic link to another file. The linked-to name is specified in the
/// linkname field with a trailing null.
SYMTYPE = 2,
SYMTYPE = b'2',
/// Represents character special files and block special files respectively. In this case the
/// devmajor and devminor fields will contain the major and minor device numbers respectively.
/// Operating systems may map the device specifications to their own local specification, or
/// may ignore the entry.
CHRTYPE = 3,
CHRTYPE = b'3',
/// Represents character special files and block special files respectively. In this case the
/// devmajor and devminor fields will contain the major and minor device numbers respectively.
/// Operating systems may map the device specifications to their own local specification, or
/// may ignore the entry.
BLKTYPE = 4,
BLKTYPE = b'4',
/// This flag specifies a directory or sub-directory. The directory name in the name field
/// should end with a slash. On systems where disk allocation is performed on a directory
/// basis, the size field will contain the maximum number of bytes (which may be rounded to
/// the nearest disk block allocation unit) which the directory may hold. A size field of zero
/// indicates no such limiting. Systems which do not support limiting in this manner should
/// ignore the size field.
DIRTYPE = 5,
DIRTYPE = b'5',
/// This specifies a FIFO special file. Note that the archiving of a FIFO file archives the
/// existence of this file and not its contents.
FIFOTYPE = 6,
FIFOTYPE = b'6',
/// This specifies a contiguous file, which is the same as a normal file except that, in
/// operating systems which support it, all its space is allocated contiguously on the disk.
/// Operating systems which do not allow contiguous allocation should silently treat this type
/// as a normal file.
CONTTYPE = 7,
CONTTYPE = b'7',
/// Extended header referring to the next file in the archive
XHDTYPE = b'x',
/// Global extended header
@ -249,7 +259,9 @@ pub enum TypeFlag {
}
bitflags::bitflags! {
/// UNIX file permissions on octal format.
/// UNIX file permissions in octal format.
#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ModeFlags: u64 {
/// Set UID on execution.
const SetUID = 0o4000;
@ -284,9 +296,9 @@ mod tests {
use crate::BLOCKSIZE;
use std::mem::size_of;
fn bytes_to_archive(bytes: &[u8]) -> PosixHeader {
let hdr = bytes.as_ptr() as *const PosixHeader;
unsafe { core::ptr::read(hdr) }
/// Casts the bytes to a reference to a PosixhHeader.
fn bytes_to_archive(bytes: &[u8]) -> &PosixHeader {
unsafe { (bytes.as_ptr() as *const PosixHeader).as_ref() }.unwrap()
}
#[test]

View File

@ -1,7 +1,7 @@
/*
MIT License
Copyright (c) 2021 Philipp Schuster
Copyright (c) 2023 Philipp Schuster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -21,58 +21,76 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations.
//! If you have a standard environment and need full feature support, I recommend the use of
//! <https://crates.io/crates/tar> instead.
//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero
//! allocations. If you have a standard environment and need full feature
//! support, I recommend the use of <https://crates.io/crates/tar> instead.
//!
//! The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
//! as GNU Longname. The maximum supported file name length is 100 characters including the NULL-byte.
//! The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
//! collections of files.
//! The crate is simple and only supports reading of "basic" archives, therefore
//! no extensions, such as GNU Longname. The maximum supported file name length
//! is 100 characters including the NULL-byte. The maximum supported file size
//! is 8 GiB. Also, directories are not supported yet but only flat collections
//! of files.
//!
//! This library is useful, if you write a kernel or a similar low-level application, which needs
//! "a bunch of files" from an archive ("init ram disk"). The Tar file could for example come
//! as a Multiboot2 boot module provided by the bootloader.
//! This library is useful, if you write a kernel or a similar low-level
//! application, which needs "a bunch of files" from an archive ("init ram
//! disk"). The Tar file could for example come as a Multiboot2 boot module
//! provided by the bootloader.
//!
//! This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**.
//! GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet.
//! [This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible
//! archive formats and their limitations.
//! This crate focuses on extracting files from uncompressed Tar archives
//! created with default options by **GNU Tar**. GNU Extensions such as sparse
//! files, incremental archives, and long filename extension are not supported
//! yet. [gnu.org](https://www.gnu.org/software/tar/manual/html_section/Formats.html)
//! provides a good overview over possible archive formats and their
//! limitations.
//!
//! # Example
//! ```rust
//! use tar_no_std::TarArchive;
//! use tar_no_std::TarArchiveRef;
//!
//! fn main() {
//! // log: not mandatory
//! std::env::set_var("RUST_LOG", "trace");
//! env_logger::init();
//! // log: not mandatory
//! std::env::set_var("RUST_LOG", "trace");
//! env_logger::init();
//!
//! // also works in no_std environment (except the println!, of course)
//! let archive = include_bytes!("../tests/gnu_tar_default.tar");
//! let archive = TarArchive::new(archive);
//! // Vec needs an allocator of course, but the library itself doesn't need one
//! let entries = archive.entries().collect::<Vec<_>>();
//! println!("{:#?}", entries);
//! println!("content of last file:");
//! let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) };
//! println!("{:#?}", last_file_content);
//! }
//! // also works in no_std environment (except the println!, of course)
//! let archive = include_bytes!("../tests/gnu_tar_default.tar");
//! let archive = TarArchiveRef::new(archive);
//! // Vec needs an allocator of course, but the library itself doesn't need one
//! let entries = archive.entries().collect::<Vec<_>>();
//! println!("{:#?}", entries);
//! println!("content of last file:");
//! let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) };
//! println!("{:#?}", last_file_content);
//! ```
#![cfg_attr(not(test), no_std)]
#![deny(rustdoc::all)]
#![allow(rustdoc::missing_doc_code_examples)]
#![deny(clippy::all)]
#![deny(clippy::missing_const_for_fn)]
#![deny(
clippy::all,
clippy::cargo,
clippy::nursery,
// clippy::restriction,
// clippy::pedantic
)]
// now allow a few rules which are denied by the above statement
// --> they are ridiculous and not necessary
#![allow(
clippy::suboptimal_flops,
clippy::redundant_pub_crate,
clippy::fallible_impl_from
)]
#![deny(missing_debug_implementations)]
#![deny(rustdoc::all)]
#[cfg_attr(test, macro_use)]
#[cfg(test)]
extern crate std;
#[cfg(feature = "alloc")]
extern crate alloc;
/// Each Archive Entry (either Header or Data Block) is a block of 512 bytes.
const BLOCKSIZE: usize = 512;
/// Maximum filename length of the base Tar format including the terminating NULL-byte.
const FILENAME_MAX_LEN: usize = 100;
mod archive;
mod header;