initial commit
This commit is contained in:
commit
3a5d764d72
11
.editorconfig
Normal file
11
.editorconfig
Normal file
@ -0,0 +1,11 @@
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# Unix-style newlines with a newline ending every file
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
trim_trailing_whitespace = true
|
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
157
Cargo.lock
generated
Normal file
157
Cargo.lock
generated
Normal file
@ -0,0 +1,157 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"humantime",
|
||||
"log",
|
||||
"regex",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||
|
||||
[[package]]
|
||||
name = "tar-no-std"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"bitflags",
|
||||
"env_logger",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
24
Cargo.toml
Normal file
24
Cargo.toml
Normal file
@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "tar-no-std"
|
||||
description = """
|
||||
Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations.
|
||||
The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
|
||||
as GNU Longname. The maximum supported file size is 100 characters including the NULL-byte.
|
||||
"""
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
keywords = ["tar", "tarball", "archive"]
|
||||
categories = ["data-structures", "no-std", "parser-implementations"]
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/phip1611/tar-no-std"
|
||||
repository = "https://github.com/phip1611/tar-no-std"
|
||||
documentation = "https://docs.rs/tar-no-std"
|
||||
|
||||
[dependencies]
|
||||
bitflags = "1.3"
|
||||
arrayvec = { version = "0.7", default-features = false }
|
||||
log = { version = "0.4", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
env_logger = "0.9"
|
32
README.md
Normal file
32
README.md
Normal file
@ -0,0 +1,32 @@
|
||||
# `tar-no_std` - Parse Tar Archives (Tarballs)
|
||||
|
||||
_Due to historical reasons, there are several formats of tar archives. All of them are based on the same principles,
|
||||
but have some subtle differences that often make them incompatible with each other._ [[0]]
|
||||
|
||||
Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations. If you have a standard
|
||||
environment and need full feature support, I recommend the use of <https://crates.io/crates/tar> instead.
|
||||
The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
|
||||
as *GNU Longname*. The maximum supported file name length is 100 characters including the NULL-byte.
|
||||
The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
|
||||
collections of files.
|
||||
|
||||
This library is useful, if you write a kernel or a similar low-level application, which needs
|
||||
"a bunch of files" from an archive ("init ramdisk"). The Tar file could for example come
|
||||
as a Multiboot2 boot module provided by the bootloader.
|
||||
|
||||
This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**.
|
||||
GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet.
|
||||
[This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible
|
||||
archive formats and their limitations.
|
||||
|
||||
|
||||
|
||||
## Compression
|
||||
If your tar file is compressed, e.g. bei `gzip`, you need to uncompress the bytes first (e.g. by a *deflate* algorithm),
|
||||
before
|
||||
|
||||
## MSRV
|
||||
The MSRV is 1.51.0 stable.
|
||||
|
||||
|
||||
[0]: https://www.gnu.org/software/tar/manual/html_section/Formats.html
|
36
examples/minimal.rs
Normal file
36
examples/minimal.rs
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Philipp Schuster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
use tar_no_std::TarArchive;
|
||||
|
||||
fn main() {
|
||||
// log: not mandatory
|
||||
std::env::set_var("RUST_LOG", "trace");
|
||||
env_logger::init();
|
||||
|
||||
// also works in no_std environment
|
||||
let archive = include_bytes!("../tests/gnu_tar_default.tar");
|
||||
let archive = TarArchive::new(archive);
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
println!("{:#?}", entries);
|
||||
}
|
236
src/archive.rs
Normal file
236
src/archive.rs
Normal file
@ -0,0 +1,236 @@
|
||||
/*
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Philipp Schuster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
//! Module for [`TarArchive`].
|
||||
|
||||
use crate::header::PosixHeader;
|
||||
use crate::{TypeFlag, BLOCKSIZE};
|
||||
use arrayvec::ArrayString;
|
||||
use core::fmt::{Debug, Formatter};
|
||||
use core::ptr;
|
||||
use core::str::FromStr;
|
||||
|
||||
/// Describes an entry in a archive.
|
||||
/// Currently only supports files but no directories.
|
||||
pub struct ArchiveEntry<'a> {
|
||||
filename: ArrayString<100>,
|
||||
data: &'a [u8],
|
||||
size: usize,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
impl<'a> ArchiveEntry<'a> {
|
||||
pub fn new(filename: ArrayString<100>, data: &'a [u8]) -> Self {
|
||||
ArchiveEntry {
|
||||
filename,
|
||||
data,
|
||||
size: data.len(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Filename of the entry. Max 99 characters.
|
||||
pub fn filename(&self) -> ArrayString<100> {
|
||||
self.filename
|
||||
}
|
||||
|
||||
/// Data of the file.
|
||||
pub fn data(&self) -> &'a [u8] {
|
||||
self.data
|
||||
}
|
||||
|
||||
/// Filesize in bytes.
|
||||
pub fn size(&self) -> usize {
|
||||
self.size
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Debug for ArchiveEntry<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
|
||||
f.debug_struct("ArchiveEntry")
|
||||
.field("filename", &self.filename().as_str())
|
||||
.field("size", &self.size())
|
||||
.field("data", &"<bytes>")
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper type around the bytes, which represents an archive.
|
||||
#[derive(Debug)]
|
||||
pub struct TarArchive<'a> {
|
||||
data: &'a [u8],
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
impl<'a> TarArchive<'a> {
|
||||
/// Interprets the provided byte array as Tar archive.
|
||||
pub fn new(data: &'a [u8]) -> Self {
|
||||
assert_eq!(
|
||||
data.len() % BLOCKSIZE,
|
||||
0,
|
||||
"data must be a multiple of BLOCKSIZE={}",
|
||||
BLOCKSIZE
|
||||
);
|
||||
Self { data }
|
||||
}
|
||||
|
||||
/// Iterates over all entries of the TAR Archive.
|
||||
/// Returns items of type [`ArchiveEntry`].
|
||||
pub fn entries(&self) -> ArchiveIterator {
|
||||
ArchiveIterator::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over the files. Each iteration step starts
|
||||
/// at the next Tar header entry.
|
||||
#[derive(Debug)]
|
||||
pub struct ArchiveIterator<'a> {
|
||||
archive: &'a TarArchive<'a>,
|
||||
block_index: usize,
|
||||
}
|
||||
|
||||
impl<'a> ArchiveIterator<'a> {
|
||||
pub fn new(archive: &'a TarArchive<'a>) -> Self {
|
||||
Self {
|
||||
archive,
|
||||
block_index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a pointer to the next Header.
|
||||
fn next_hdr(&self, block_index: usize) -> *const PosixHeader {
|
||||
let hdr_ptr = &self.archive.data[block_index * BLOCKSIZE];
|
||||
let hdr_ptr = hdr_ptr as *const u8;
|
||||
hdr_ptr as *const PosixHeader
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ArchiveIterator<'a> {
|
||||
type Item = ArchiveEntry<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.block_index * BLOCKSIZE >= self.archive.data.len() {
|
||||
log::warn!("Reached end of Tar archive data without finding zero/end blocks!");
|
||||
return None;
|
||||
}
|
||||
|
||||
let hdr = self.next_hdr(self.block_index);
|
||||
let hdr = unsafe { ptr::read(hdr) };
|
||||
|
||||
// check if we found end of archive
|
||||
if hdr.is_zero_block() {
|
||||
let next_hdr = unsafe { ptr::read(self.next_hdr(self.block_index + 1)) };
|
||||
if next_hdr.is_zero_block() {
|
||||
// gracefully terminated Archive
|
||||
log::debug!("End of Tar archive with two zero blocks!");
|
||||
} else {
|
||||
log::warn!("Zero block found at end of Tar archive, but only one instead of two!");
|
||||
}
|
||||
// end of archive
|
||||
return None;
|
||||
}
|
||||
|
||||
if hdr.typeflag != TypeFlag::AREGTYPE && hdr.typeflag != TypeFlag::REGTYPE {
|
||||
log::warn!(
|
||||
"Found entry of type={:?}, but only files are supported",
|
||||
hdr.typeflag
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
// fetch data of file from next block(s)
|
||||
let block_count = hdr.payload_block_count();
|
||||
let i_begin = self.block_index * BLOCKSIZE;
|
||||
let i_end = i_begin + block_count * BLOCKSIZE;
|
||||
debug_assert!(i_end <= self.archive.data.len(), "index ouf of range!");
|
||||
// +1: hdr itself + data blocks
|
||||
self.block_index += block_count + 1;
|
||||
|
||||
let file_block_bytes = &self.archive.data[i_begin..i_end];
|
||||
let file_bytes = &file_block_bytes[0..hdr.size.val()];
|
||||
|
||||
Some(ArchiveEntry::new(
|
||||
ArrayString::from_str(hdr.name.as_string().as_str()).unwrap(),
|
||||
file_bytes,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::vec::Vec;
|
||||
|
||||
#[test]
|
||||
fn test_archive_list() {
|
||||
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar"));
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
println!("{:#?}", entries);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_archive_entries() {
|
||||
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar"));
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
assert_archive_content(&entries);
|
||||
|
||||
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_gnu.tar"));
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
assert_archive_content(&entries);
|
||||
|
||||
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
assert_archive_content(&entries);
|
||||
|
||||
// UNSUPPORTED. Uses extensions.
|
||||
/*let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_pax.tar"));
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
assert_archive_content(&entries);*/
|
||||
|
||||
// UNSUPPORTED. Uses extensions.
|
||||
/*let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_posix.tar"));
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
assert_archive_content(&entries);*/
|
||||
|
||||
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_ustar.tar"));
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
assert_archive_content(&entries);
|
||||
|
||||
let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_v7.tar"));
|
||||
let entries = archive.entries().collect::<Vec<_>>();
|
||||
assert_archive_content(&entries);
|
||||
}
|
||||
|
||||
fn assert_archive_content(entries: &[ArchiveEntry]) {
|
||||
assert_eq!(entries.len(), 3);
|
||||
// order in that I stored the files into the archive
|
||||
assert_eq!(entries[0].filename().as_str(), "bye_world_513b.txt");
|
||||
assert_eq!(entries[0].size(), 513);
|
||||
assert_eq!(entries[0].data().len(), 513);
|
||||
assert_eq!(entries[1].filename().as_str(), "hello_world_513b.txt");
|
||||
assert_eq!(entries[1].size(), 513);
|
||||
assert_eq!(entries[1].data().len(), 513);
|
||||
assert_eq!(entries[2].filename().as_str(), "hello_world.txt");
|
||||
assert_eq!(entries[2].size(), 12);
|
||||
assert_eq!(entries[2].data().len(), 12);
|
||||
}
|
||||
}
|
396
src/header.rs
Normal file
396
src/header.rs
Normal file
@ -0,0 +1,396 @@
|
||||
/*
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Philipp Schuster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
//! TAR header definition taken from <https://www.gnu.org/software/tar/manual/html_node/Standard.html>.
|
||||
//! A Tar-archive is a collection of 512-byte sized blocks. Unfortunately there are several
|
||||
//! TAR-like archive specifications. An Overview can be found here:
|
||||
//! <https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>
|
||||
//!
|
||||
//! This library focuses on extracting files from the GNU Tar format.
|
||||
|
||||
#![allow(non_upper_case_globals)]
|
||||
|
||||
use crate::BLOCKSIZE;
|
||||
use arrayvec::ArrayString;
|
||||
use core::fmt::{Debug, Formatter};
|
||||
|
||||
/// The file size is encoded as octal ASCII number inside a Tar header.
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(transparent)]
|
||||
pub struct Size(StaticCString<12>);
|
||||
|
||||
impl Size {
|
||||
/// Returns the octal ASCII number as actual size in bytes.
|
||||
pub fn val(&self) -> usize {
|
||||
usize::from_str_radix(self.0.as_string().as_str(), 8).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Size {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
|
||||
let mut debug = f.debug_tuple("Size");
|
||||
debug.field(&self.val());
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(transparent)]
|
||||
pub struct Mode(StaticCString<8>);
|
||||
|
||||
impl Mode {
|
||||
pub fn to_flags(self) -> ModeFlags {
|
||||
let octal_number_str = self.0.as_string();
|
||||
let bits = u64::from_str_radix(octal_number_str.as_str(), 8).unwrap();
|
||||
ModeFlags::from_bits(bits).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Mode {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
|
||||
let mut debug = f.debug_tuple("Mode");
|
||||
debug.field(&self.to_flags());
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// A C-String that is stored in a static array. All unused
|
||||
/// chars must be a NULL-byte.
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(transparent)]
|
||||
pub struct StaticCString<const N: usize>([u8; N]);
|
||||
|
||||
#[allow(unused)]
|
||||
impl<const N: usize> StaticCString<N> {
|
||||
/// Constructor.
|
||||
fn new(bytes: [u8; N]) -> Self {
|
||||
Self(bytes)
|
||||
}
|
||||
|
||||
/// Returns the length of the string without NULL-byte.
|
||||
pub fn len(&self) -> usize {
|
||||
// not as efficient as it could be but negligible
|
||||
self.as_string().len()
|
||||
}
|
||||
|
||||
/// Returns a string without null bytes.
|
||||
pub fn as_string(&self) -> ArrayString<N> {
|
||||
let mut string = ArrayString::new();
|
||||
// copy all bytes (=ASCII) into string
|
||||
self.0
|
||||
.clone()
|
||||
.iter()
|
||||
// remove all zero bytes; there is always one
|
||||
// zero byte at the end. Furtherore, the other
|
||||
// unused bytes are also zero, but not part of the
|
||||
// string.
|
||||
.filter(|x| **x != 0)
|
||||
.for_each(|b| string.push(*b as char));
|
||||
string
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Debug for StaticCString<N> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
|
||||
let mut debug = f.debug_tuple("Name");
|
||||
let str = self.as_string();
|
||||
if str.len() == 0 {
|
||||
debug.field(&"<empty>");
|
||||
} else {
|
||||
debug.field(&str);
|
||||
}
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990.
|
||||
/// "New" (version?) GNU Tar versions use this archive format by default.
|
||||
/// (<https://www.gnu.org/software/tar/manual/html_node/Formats.html#Formats>).
|
||||
///
|
||||
/// Each file is started by such a header, that describes the size and
|
||||
/// the file name. After that, the file content stands in chunks of 512 bytes.
|
||||
/// The number of bytes can be derived from the file size.
|
||||
///
|
||||
/// This is also mostly compatible with the "Ustar"-header and the "GNU format".
|
||||
/// Because this library only needs to fetch data and filename, we don't need
|
||||
/// further checks.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[repr(C, packed)]
|
||||
pub struct PosixHeader {
|
||||
/// Name. There is always a null byte, therefore
|
||||
/// the max len is 99.
|
||||
pub name: StaticCString<100>,
|
||||
pub mode: Mode,
|
||||
pub uid: [u8; 8],
|
||||
pub gid: [u8; 8],
|
||||
// confusing; size is stored as ASCII string
|
||||
pub size: Size,
|
||||
pub mtime: [u8; 12],
|
||||
pub cksum: [u8; 8],
|
||||
pub typeflag: TypeFlag,
|
||||
/// Name. There is always a null byte, therefore
|
||||
/// the max len is 99.
|
||||
pub linkname: StaticCString<100>,
|
||||
pub magic: StaticCString<6>,
|
||||
pub version: StaticCString<2>,
|
||||
/// Username. There is always a null byte, therefore
|
||||
/// the max len is N-1.
|
||||
pub uname: StaticCString<32>,
|
||||
/// Groupname. There is always a null byte, therefore
|
||||
/// the max len is N-1.
|
||||
pub gname: StaticCString<32>,
|
||||
pub dev_major: [u8; 8],
|
||||
pub dev_minor: [u8; 8],
|
||||
/// There is always a null byte, therefore
|
||||
/// the max len is N-1.
|
||||
pub prefix: StaticCString<155>,
|
||||
// padding => to BLOCKSIZE bytes
|
||||
pub _pad: [u8; 12],
|
||||
}
|
||||
|
||||
impl PosixHeader {
|
||||
/// Returns the number of blocks that are required to
|
||||
/// read the whole file content.
|
||||
pub fn payload_block_count(&self) -> usize {
|
||||
let div = self.size.val() / BLOCKSIZE;
|
||||
let modulo = self.size.val() % BLOCKSIZE;
|
||||
if modulo > 0 {
|
||||
(div + 1) as usize
|
||||
} else {
|
||||
div as usize
|
||||
}
|
||||
}
|
||||
|
||||
/// A Tar archive is terminated, if a end-of-archive entry, which consists of two 512 blocks
|
||||
/// of zero bytes, is found.
|
||||
pub fn is_zero_block(&self) -> bool {
|
||||
let ptr = self as *const Self as *const u8;
|
||||
let self_bytes = unsafe { core::slice::from_raw_parts(ptr, BLOCKSIZE) };
|
||||
self_bytes.iter().filter(|x| **x == 0).count() == BLOCKSIZE
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
#[repr(u8)]
|
||||
#[allow(unused)]
|
||||
pub enum TypeFlag {
|
||||
/// Represents a regular file. In order to be compatible with older versions of tar, a typeflag
|
||||
/// value of AREGTYPE should be silently recognized as a regular file. New archives should be
|
||||
/// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose
|
||||
/// name ends with a slash as a directory.
|
||||
REGTYPE = b'0',
|
||||
/// Represents a regular file. In order to be compatible with older versions of tar, a typeflag
|
||||
/// value of AREGTYPE should be silently recognized as a regular file. New archives should be
|
||||
/// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose
|
||||
/// name ends with a slash as a directory.
|
||||
AREGTYPE = b'\0',
|
||||
/// This flag represents a file linked to another file, of any type, previously archived. Such
|
||||
/// files are identified in Unix by each file having the same device and inode number. The
|
||||
/// linked-to name is specified in the linkname field with a trailing null.
|
||||
LINK = 1,
|
||||
/// This represents a symbolic link to another file. The linked-to name is specified in the
|
||||
/// linkname field with a trailing null.
|
||||
SYMTYPE = 2,
|
||||
/// Represents character special files and block special files respectively. In this case the
|
||||
/// devmajor and devminor fields will contain the major and minor device numbers respectively.
|
||||
/// Operating systems may map the device specifications to their own local specification, or
|
||||
/// may ignore the entry.
|
||||
CHRTYPE = 3,
|
||||
/// Represents character special files and block special files respectively. In this case the
|
||||
/// devmajor and devminor fields will contain the major and minor device numbers respectively.
|
||||
/// Operating systems may map the device specifications to their own local specification, or
|
||||
/// may ignore the entry.
|
||||
BLKTYPE = 4,
|
||||
/// This flag specifies a directory or sub-directory. The directory name in the name field
|
||||
/// should end with a slash. On systems where disk allocation is performed on a directory
|
||||
/// basis, the size field will contain the maximum number of bytes (which may be rounded to
|
||||
/// the nearest disk block allocation unit) which the directory may hold. A size field of zero
|
||||
/// indicates no such limiting. Systems which do not support limiting in this manner should
|
||||
/// ignore the size field.
|
||||
DIRTYPE = 5,
|
||||
/// This specifies a FIFO special file. Note that the archiving of a FIFO file archives the
|
||||
/// existence of this file and not its contents.
|
||||
FIFOTYPE = 6,
|
||||
/// This specifies a contiguous file, which is the same as a normal file except that, in
|
||||
/// operating systems which support it, all its space is allocated contiguously on the disk.
|
||||
/// Operating systems which do not allow contiguous allocation should silently treat this type
|
||||
/// as a normal file.
|
||||
CONTTYPE = 7,
|
||||
/// Extended header referring to the next file in the archive
|
||||
XHDTYPE = b'x',
|
||||
/// Global extended header
|
||||
XGLTYPE = b'g',
|
||||
}
|
||||
|
||||
bitflags::bitflags! {
|
||||
/// UNIX file permissions on octal format.
|
||||
pub struct ModeFlags: u64 {
|
||||
/// Set UID on execution.
|
||||
const SetUID = 0o4000;
|
||||
/// Set GID on execution.
|
||||
const SetGID = 0o2000;
|
||||
/// Reserved.
|
||||
const TSVTX = 0o1000;
|
||||
/// Owner read.
|
||||
const OwnerRead = 0o400;
|
||||
/// Owner write.
|
||||
const OwnerWrite = 0o200;
|
||||
/// Owner execute.
|
||||
const OwnerExec = 0o100;
|
||||
/// Group read.
|
||||
const GroupRead = 0o040;
|
||||
/// Group write.
|
||||
const GroupWrite = 0o020;
|
||||
/// Group execute.
|
||||
const GroupExec = 0o010;
|
||||
/// Others read.
|
||||
const OthersRead = 0o004;
|
||||
/// Others read.
|
||||
const OthersWrite = 0o002;
|
||||
/// Others execute.
|
||||
const OthersExec = 0o001;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::header::{PosixHeader, StaticCString, TypeFlag};
|
||||
use crate::BLOCKSIZE;
|
||||
use std::mem::size_of;
|
||||
|
||||
fn bytes_to_archive(bytes: &[u8]) -> PosixHeader {
|
||||
let hdr = bytes.as_ptr() as *const PosixHeader;
|
||||
unsafe { core::ptr::read(hdr) }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_display_header() {
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
|
||||
println!("{:#?}'", archive);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_show_tar_header_magics() {
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
|
||||
println!(
|
||||
"default: magic='{:?}', version='{:?}'",
|
||||
archive.magic, archive.version
|
||||
);
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
|
||||
println!(
|
||||
"gnu: magic='{:?}', version='{:?}'",
|
||||
archive.magic, archive.version
|
||||
);
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
|
||||
println!(
|
||||
"oldgnu: magic='{:?}', version='{:?}'",
|
||||
archive.magic, archive.version
|
||||
);
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
|
||||
println!(
|
||||
"pax: magic='{:?}', version='{:?}'",
|
||||
archive.magic, archive.version
|
||||
);
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar"));
|
||||
println!(
|
||||
"posix: magic='{:?}', version='{:?}'",
|
||||
archive.magic, archive.version
|
||||
);
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar"));
|
||||
println!(
|
||||
"ustar: magic='{:?}', version='{:?}'",
|
||||
archive.magic, archive.version
|
||||
);
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
|
||||
println!(
|
||||
"v7: magic='{:?}', version='{:?}'",
|
||||
archive.magic, archive.version
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_tar_header_filename() {
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar"));
|
||||
assert_eq!(
|
||||
archive.typeflag,
|
||||
TypeFlag::REGTYPE,
|
||||
"the first entry is a regular file!"
|
||||
);
|
||||
assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
|
||||
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar"));
|
||||
assert_eq!(
|
||||
archive.typeflag,
|
||||
TypeFlag::REGTYPE,
|
||||
"the first entry is a regular file!"
|
||||
);
|
||||
assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
|
||||
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
|
||||
assert_eq!(
|
||||
archive.typeflag,
|
||||
TypeFlag::REGTYPE,
|
||||
"the first entry is a regular file!"
|
||||
);
|
||||
assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
|
||||
|
||||
/* UNSUPPORTED YET. Uses extensions..
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar"));
|
||||
assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!");
|
||||
assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */
|
||||
|
||||
/* UNSUPPORTED YET. Uses extensions.
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar"));
|
||||
unsupported extension XHDTYPE assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!");
|
||||
assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */
|
||||
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar"));
|
||||
assert_eq!(
|
||||
archive.typeflag,
|
||||
TypeFlag::REGTYPE,
|
||||
"the first entry is a regular file!"
|
||||
);
|
||||
assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
|
||||
|
||||
let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar"));
|
||||
// ARegType: legacy
|
||||
assert_eq!(
|
||||
archive.typeflag,
|
||||
TypeFlag::AREGTYPE,
|
||||
"the first entry is a regular file!"
|
||||
);
|
||||
assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_size() {
|
||||
assert_eq!(BLOCKSIZE, size_of::<PosixHeader>());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_static_str() {
|
||||
let str = StaticCString::new(*b"0000633\0");
|
||||
assert_eq!(str.len(), 7);
|
||||
assert_eq!(str.as_string().as_str(), "0000633");
|
||||
}
|
||||
}
|
55
src/lib.rs
Normal file
55
src/lib.rs
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Philipp Schuster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations.
|
||||
//! If you have a standard environment and need full feature support, I recommend the use of
|
||||
//! <https://crates.io/crates/tar> instead.
|
||||
//!
|
||||
//! The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
|
||||
//! as GNU Longname. The maximum supported file name length is 100 characters including the NULL-byte.
|
||||
//! The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
|
||||
//! collections of files.
|
||||
//!
|
||||
//! This library is useful, if you write a kernel or a similar low-level application, which needs
|
||||
//! "a bunch of files" from an archive ("init ram disk"). The Tar file could for example come
|
||||
//! as a Multiboot2 boot module provided by the bootloader.
|
||||
//!
|
||||
//! This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**.
|
||||
//! GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet.
|
||||
//! [This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible
|
||||
//! archive formats and their limitations.
|
||||
|
||||
#![cfg_attr(not(test), no_std)]
|
||||
|
||||
#[cfg_attr(test, macro_use)]
|
||||
#[cfg(test)]
|
||||
extern crate std;
|
||||
|
||||
/// Each Archive Entry (either Header or Data Block) is a block of 512 bytes.
|
||||
const BLOCKSIZE: usize = 512;
|
||||
|
||||
mod archive;
|
||||
mod header;
|
||||
|
||||
pub use archive::*;
|
||||
pub use header::*;
|
17
tests/bye_world_513b.txt
Normal file
17
tests/bye_world_513b.txt
Normal file
@ -0,0 +1,17 @@
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
|
BIN
tests/gnu_tar_default.tar
Normal file
BIN
tests/gnu_tar_default.tar
Normal file
Binary file not shown.
BIN
tests/gnu_tar_gnu.tar
Normal file
BIN
tests/gnu_tar_gnu.tar
Normal file
Binary file not shown.
BIN
tests/gnu_tar_oldgnu.tar
Normal file
BIN
tests/gnu_tar_oldgnu.tar
Normal file
Binary file not shown.
BIN
tests/gnu_tar_pax.tar
Normal file
BIN
tests/gnu_tar_pax.tar
Normal file
Binary file not shown.
BIN
tests/gnu_tar_posix.tar
Normal file
BIN
tests/gnu_tar_posix.tar
Normal file
Binary file not shown.
BIN
tests/gnu_tar_ustar.tar
Normal file
BIN
tests/gnu_tar_ustar.tar
Normal file
Binary file not shown.
BIN
tests/gnu_tar_v7.tar
Normal file
BIN
tests/gnu_tar_v7.tar
Normal file
Binary file not shown.
1
tests/hello_world.txt
Normal file
1
tests/hello_world.txt
Normal file
@ -0,0 +1 @@
|
||||
Hello World
|
17
tests/hello_world_513b.txt
Normal file
17
tests/hello_world_513b.txt
Normal file
@ -0,0 +1,17 @@
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
###############################
|
||||
|
Loading…
Reference in New Issue
Block a user