From 3a5d764d728e4ae8177f524592ba696329413eb8 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Mon, 4 Oct 2021 11:00:03 +0200 Subject: [PATCH] initial commit --- .editorconfig | 11 ++ .gitignore | 1 + Cargo.lock | 157 +++++++++++++++ Cargo.toml | 24 +++ README.md | 32 +++ examples/minimal.rs | 36 ++++ src/archive.rs | 236 ++++++++++++++++++++++ src/header.rs | 396 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 55 ++++++ tests/bye_world_513b.txt | 17 ++ tests/gnu_tar_default.tar | Bin 0 -> 10240 bytes tests/gnu_tar_gnu.tar | Bin 0 -> 10240 bytes tests/gnu_tar_oldgnu.tar | Bin 0 -> 10240 bytes tests/gnu_tar_pax.tar | Bin 0 -> 10240 bytes tests/gnu_tar_posix.tar | Bin 0 -> 10240 bytes tests/gnu_tar_ustar.tar | Bin 0 -> 10240 bytes tests/gnu_tar_v7.tar | Bin 0 -> 10240 bytes tests/hello_world.txt | 1 + tests/hello_world_513b.txt | 17 ++ 19 files changed, 983 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 examples/minimal.rs create mode 100644 src/archive.rs create mode 100644 src/header.rs create mode 100644 src/lib.rs create mode 100644 tests/bye_world_513b.txt create mode 100644 tests/gnu_tar_default.tar create mode 100644 tests/gnu_tar_gnu.tar create mode 100644 tests/gnu_tar_oldgnu.tar create mode 100644 tests/gnu_tar_pax.tar create mode 100644 tests/gnu_tar_posix.tar create mode 100644 tests/gnu_tar_ustar.tar create mode 100644 tests/gnu_tar_v7.tar create mode 100644 tests/hello_world.txt create mode 100644 tests/hello_world_513b.txt diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..098d9b0 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..f55af63 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,157 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "arrayvec" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "env_logger" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "libc" +version = "0.2.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6" + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "tar-no-std" +version = "0.1.0" +dependencies = [ + "arrayvec", + "bitflags", + "env_logger", + "log", +] + +[[package]] +name = "termcolor" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..3ea995a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "tar-no-std" +description = """ +Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations. +The crate is simple and only supports reading of "basic" archives, therefore no extensions, such +as GNU Longname. The maximum supported file size is 100 characters including the NULL-byte. +""" +version = "0.1.0" +edition = "2018" +keywords = ["tar", "tarball", "archive"] +categories = ["data-structures", "no-std", "parser-implementations"] +readme = "README.md" +license = "MIT" +homepage = "https://github.com/phip1611/tar-no-std" +repository = "https://github.com/phip1611/tar-no-std" +documentation = "https://docs.rs/tar-no-std" + +[dependencies] +bitflags = "1.3" +arrayvec = { version = "0.7", default-features = false } +log = { version = "0.4", default-features = false } + +[dev-dependencies] +env_logger = "0.9" diff --git a/README.md b/README.md new file mode 100644 index 0000000..2356f81 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# `tar-no_std` - Parse Tar Archives (Tarballs) + +_Due to historical reasons, there are several formats of tar archives. All of them are based on the same principles, +but have some subtle differences that often make them incompatible with each other._ [[0]] + +Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations. If you have a standard +environment and need full feature support, I recommend the use of instead. +The crate is simple and only supports reading of "basic" archives, therefore no extensions, such +as *GNU Longname*. The maximum supported file name length is 100 characters including the NULL-byte. +The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat +collections of files. + +This library is useful, if you write a kernel or a similar low-level application, which needs +"a bunch of files" from an archive ("init ramdisk"). The Tar file could for example come +as a Multiboot2 boot module provided by the bootloader. + +This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**. +GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet. +[This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible +archive formats and their limitations. + + + +## Compression +If your tar file is compressed, e.g. bei `gzip`, you need to uncompress the bytes first (e.g. by a *deflate* algorithm), +before + +## MSRV +The MSRV is 1.51.0 stable. + + +[0]: https://www.gnu.org/software/tar/manual/html_section/Formats.html diff --git a/examples/minimal.rs b/examples/minimal.rs new file mode 100644 index 0000000..cda422b --- /dev/null +++ b/examples/minimal.rs @@ -0,0 +1,36 @@ +/* +MIT License + +Copyright (c) 2021 Philipp Schuster + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ +use tar_no_std::TarArchive; + +fn main() { + // log: not mandatory + std::env::set_var("RUST_LOG", "trace"); + env_logger::init(); + + // also works in no_std environment + let archive = include_bytes!("../tests/gnu_tar_default.tar"); + let archive = TarArchive::new(archive); + let entries = archive.entries().collect::>(); + println!("{:#?}", entries); +} diff --git a/src/archive.rs b/src/archive.rs new file mode 100644 index 0000000..9366705 --- /dev/null +++ b/src/archive.rs @@ -0,0 +1,236 @@ +/* +MIT License + +Copyright (c) 2021 Philipp Schuster + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ +//! Module for [`TarArchive`]. + +use crate::header::PosixHeader; +use crate::{TypeFlag, BLOCKSIZE}; +use arrayvec::ArrayString; +use core::fmt::{Debug, Formatter}; +use core::ptr; +use core::str::FromStr; + +/// Describes an entry in a archive. +/// Currently only supports files but no directories. +pub struct ArchiveEntry<'a> { + filename: ArrayString<100>, + data: &'a [u8], + size: usize, +} + +#[allow(unused)] +impl<'a> ArchiveEntry<'a> { + pub fn new(filename: ArrayString<100>, data: &'a [u8]) -> Self { + ArchiveEntry { + filename, + data, + size: data.len(), + } + } + + /// Filename of the entry. Max 99 characters. + pub fn filename(&self) -> ArrayString<100> { + self.filename + } + + /// Data of the file. + pub fn data(&self) -> &'a [u8] { + self.data + } + + /// Filesize in bytes. + pub fn size(&self) -> usize { + self.size + } +} + +impl<'a> Debug for ArchiveEntry<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + f.debug_struct("ArchiveEntry") + .field("filename", &self.filename().as_str()) + .field("size", &self.size()) + .field("data", &"") + .finish() + } +} + +/// Wrapper type around the bytes, which represents an archive. +#[derive(Debug)] +pub struct TarArchive<'a> { + data: &'a [u8], +} + +#[allow(unused)] +impl<'a> TarArchive<'a> { + /// Interprets the provided byte array as Tar archive. + pub fn new(data: &'a [u8]) -> Self { + assert_eq!( + data.len() % BLOCKSIZE, + 0, + "data must be a multiple of BLOCKSIZE={}", + BLOCKSIZE + ); + Self { data } + } + + /// Iterates over all entries of the TAR Archive. + /// Returns items of type [`ArchiveEntry`]. + pub fn entries(&self) -> ArchiveIterator { + ArchiveIterator::new(self) + } +} + +/// Iterator over the files. Each iteration step starts +/// at the next Tar header entry. +#[derive(Debug)] +pub struct ArchiveIterator<'a> { + archive: &'a TarArchive<'a>, + block_index: usize, +} + +impl<'a> ArchiveIterator<'a> { + pub fn new(archive: &'a TarArchive<'a>) -> Self { + Self { + archive, + block_index: 0, + } + } + + /// Returns a pointer to the next Header. + fn next_hdr(&self, block_index: usize) -> *const PosixHeader { + let hdr_ptr = &self.archive.data[block_index * BLOCKSIZE]; + let hdr_ptr = hdr_ptr as *const u8; + hdr_ptr as *const PosixHeader + } +} + +impl<'a> Iterator for ArchiveIterator<'a> { + type Item = ArchiveEntry<'a>; + + fn next(&mut self) -> Option { + if self.block_index * BLOCKSIZE >= self.archive.data.len() { + log::warn!("Reached end of Tar archive data without finding zero/end blocks!"); + return None; + } + + let hdr = self.next_hdr(self.block_index); + let hdr = unsafe { ptr::read(hdr) }; + + // check if we found end of archive + if hdr.is_zero_block() { + let next_hdr = unsafe { ptr::read(self.next_hdr(self.block_index + 1)) }; + if next_hdr.is_zero_block() { + // gracefully terminated Archive + log::debug!("End of Tar archive with two zero blocks!"); + } else { + log::warn!("Zero block found at end of Tar archive, but only one instead of two!"); + } + // end of archive + return None; + } + + if hdr.typeflag != TypeFlag::AREGTYPE && hdr.typeflag != TypeFlag::REGTYPE { + log::warn!( + "Found entry of type={:?}, but only files are supported", + hdr.typeflag + ); + return None; + } + + // fetch data of file from next block(s) + let block_count = hdr.payload_block_count(); + let i_begin = self.block_index * BLOCKSIZE; + let i_end = i_begin + block_count * BLOCKSIZE; + debug_assert!(i_end <= self.archive.data.len(), "index ouf of range!"); + // +1: hdr itself + data blocks + self.block_index += block_count + 1; + + let file_block_bytes = &self.archive.data[i_begin..i_end]; + let file_bytes = &file_block_bytes[0..hdr.size.val()]; + + Some(ArchiveEntry::new( + ArrayString::from_str(hdr.name.as_string().as_str()).unwrap(), + file_bytes, + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::vec::Vec; + + #[test] + fn test_archive_list() { + let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar")); + let entries = archive.entries().collect::>(); + println!("{:#?}", entries); + } + + #[test] + fn test_archive_entries() { + let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar")); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries); + + let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_gnu.tar")); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries); + + let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_oldgnu.tar")); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries); + + // UNSUPPORTED. Uses extensions. + /*let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_pax.tar")); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries);*/ + + // UNSUPPORTED. Uses extensions. + /*let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_posix.tar")); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries);*/ + + let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_ustar.tar")); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries); + + let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_v7.tar")); + let entries = archive.entries().collect::>(); + assert_archive_content(&entries); + } + + fn assert_archive_content(entries: &[ArchiveEntry]) { + assert_eq!(entries.len(), 3); + // order in that I stored the files into the archive + assert_eq!(entries[0].filename().as_str(), "bye_world_513b.txt"); + assert_eq!(entries[0].size(), 513); + assert_eq!(entries[0].data().len(), 513); + assert_eq!(entries[1].filename().as_str(), "hello_world_513b.txt"); + assert_eq!(entries[1].size(), 513); + assert_eq!(entries[1].data().len(), 513); + assert_eq!(entries[2].filename().as_str(), "hello_world.txt"); + assert_eq!(entries[2].size(), 12); + assert_eq!(entries[2].data().len(), 12); + } +} diff --git a/src/header.rs b/src/header.rs new file mode 100644 index 0000000..a37b9ba --- /dev/null +++ b/src/header.rs @@ -0,0 +1,396 @@ +/* +MIT License + +Copyright (c) 2021 Philipp Schuster + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ +//! TAR header definition taken from . +//! A Tar-archive is a collection of 512-byte sized blocks. Unfortunately there are several +//! TAR-like archive specifications. An Overview can be found here: +//! +//! +//! This library focuses on extracting files from the GNU Tar format. + +#![allow(non_upper_case_globals)] + +use crate::BLOCKSIZE; +use arrayvec::ArrayString; +use core::fmt::{Debug, Formatter}; + +/// The file size is encoded as octal ASCII number inside a Tar header. +#[derive(Copy, Clone)] +#[repr(transparent)] +pub struct Size(StaticCString<12>); + +impl Size { + /// Returns the octal ASCII number as actual size in bytes. + pub fn val(&self) -> usize { + usize::from_str_radix(self.0.as_string().as_str(), 8).unwrap() + } +} + +impl Debug for Size { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + let mut debug = f.debug_tuple("Size"); + debug.field(&self.val()); + debug.finish() + } +} + +#[derive(Copy, Clone)] +#[repr(transparent)] +pub struct Mode(StaticCString<8>); + +impl Mode { + pub fn to_flags(self) -> ModeFlags { + let octal_number_str = self.0.as_string(); + let bits = u64::from_str_radix(octal_number_str.as_str(), 8).unwrap(); + ModeFlags::from_bits(bits).unwrap() + } +} + +impl Debug for Mode { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + let mut debug = f.debug_tuple("Mode"); + debug.field(&self.to_flags()); + debug.finish() + } +} + +/// A C-String that is stored in a static array. All unused +/// chars must be a NULL-byte. +#[derive(Copy, Clone)] +#[repr(transparent)] +pub struct StaticCString([u8; N]); + +#[allow(unused)] +impl StaticCString { + /// Constructor. + fn new(bytes: [u8; N]) -> Self { + Self(bytes) + } + + /// Returns the length of the string without NULL-byte. + pub fn len(&self) -> usize { + // not as efficient as it could be but negligible + self.as_string().len() + } + + /// Returns a string without null bytes. + pub fn as_string(&self) -> ArrayString { + let mut string = ArrayString::new(); + // copy all bytes (=ASCII) into string + self.0 + .clone() + .iter() + // remove all zero bytes; there is always one + // zero byte at the end. Furtherore, the other + // unused bytes are also zero, but not part of the + // string. + .filter(|x| **x != 0) + .for_each(|b| string.push(*b as char)); + string + } +} + +impl Debug for StaticCString { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + let mut debug = f.debug_tuple("Name"); + let str = self.as_string(); + if str.len() == 0 { + debug.field(&""); + } else { + debug.field(&str); + } + debug.finish() + } +} + +/// Header of the TAR format as specified by POSIX (POSIX 1003.1-1990. +/// "New" (version?) GNU Tar versions use this archive format by default. +/// (). +/// +/// Each file is started by such a header, that describes the size and +/// the file name. After that, the file content stands in chunks of 512 bytes. +/// The number of bytes can be derived from the file size. +/// +/// This is also mostly compatible with the "Ustar"-header and the "GNU format". +/// Because this library only needs to fetch data and filename, we don't need +/// further checks. +#[derive(Debug, Copy, Clone)] +#[repr(C, packed)] +pub struct PosixHeader { + /// Name. There is always a null byte, therefore + /// the max len is 99. + pub name: StaticCString<100>, + pub mode: Mode, + pub uid: [u8; 8], + pub gid: [u8; 8], + // confusing; size is stored as ASCII string + pub size: Size, + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: TypeFlag, + /// Name. There is always a null byte, therefore + /// the max len is 99. + pub linkname: StaticCString<100>, + pub magic: StaticCString<6>, + pub version: StaticCString<2>, + /// Username. There is always a null byte, therefore + /// the max len is N-1. + pub uname: StaticCString<32>, + /// Groupname. There is always a null byte, therefore + /// the max len is N-1. + pub gname: StaticCString<32>, + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + /// There is always a null byte, therefore + /// the max len is N-1. + pub prefix: StaticCString<155>, + // padding => to BLOCKSIZE bytes + pub _pad: [u8; 12], +} + +impl PosixHeader { + /// Returns the number of blocks that are required to + /// read the whole file content. + pub fn payload_block_count(&self) -> usize { + let div = self.size.val() / BLOCKSIZE; + let modulo = self.size.val() % BLOCKSIZE; + if modulo > 0 { + (div + 1) as usize + } else { + div as usize + } + } + + /// A Tar archive is terminated, if a end-of-archive entry, which consists of two 512 blocks + /// of zero bytes, is found. + pub fn is_zero_block(&self) -> bool { + let ptr = self as *const Self as *const u8; + let self_bytes = unsafe { core::slice::from_raw_parts(ptr, BLOCKSIZE) }; + self_bytes.iter().filter(|x| **x == 0).count() == BLOCKSIZE + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +#[repr(u8)] +#[allow(unused)] +pub enum TypeFlag { + /// Represents a regular file. In order to be compatible with older versions of tar, a typeflag + /// value of AREGTYPE should be silently recognized as a regular file. New archives should be + /// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose + /// name ends with a slash as a directory. + REGTYPE = b'0', + /// Represents a regular file. In order to be compatible with older versions of tar, a typeflag + /// value of AREGTYPE should be silently recognized as a regular file. New archives should be + /// created using REGTYPE. Also, for backward compatibility, tar treats a regular file whose + /// name ends with a slash as a directory. + AREGTYPE = b'\0', + /// This flag represents a file linked to another file, of any type, previously archived. Such + /// files are identified in Unix by each file having the same device and inode number. The + /// linked-to name is specified in the linkname field with a trailing null. + LINK = 1, + /// This represents a symbolic link to another file. The linked-to name is specified in the + /// linkname field with a trailing null. + SYMTYPE = 2, + /// Represents character special files and block special files respectively. In this case the + /// devmajor and devminor fields will contain the major and minor device numbers respectively. + /// Operating systems may map the device specifications to their own local specification, or + /// may ignore the entry. + CHRTYPE = 3, + /// Represents character special files and block special files respectively. In this case the + /// devmajor and devminor fields will contain the major and minor device numbers respectively. + /// Operating systems may map the device specifications to their own local specification, or + /// may ignore the entry. + BLKTYPE = 4, + /// This flag specifies a directory or sub-directory. The directory name in the name field + /// should end with a slash. On systems where disk allocation is performed on a directory + /// basis, the size field will contain the maximum number of bytes (which may be rounded to + /// the nearest disk block allocation unit) which the directory may hold. A size field of zero + /// indicates no such limiting. Systems which do not support limiting in this manner should + /// ignore the size field. + DIRTYPE = 5, + /// This specifies a FIFO special file. Note that the archiving of a FIFO file archives the + /// existence of this file and not its contents. + FIFOTYPE = 6, + /// This specifies a contiguous file, which is the same as a normal file except that, in + /// operating systems which support it, all its space is allocated contiguously on the disk. + /// Operating systems which do not allow contiguous allocation should silently treat this type + /// as a normal file. + CONTTYPE = 7, + /// Extended header referring to the next file in the archive + XHDTYPE = b'x', + /// Global extended header + XGLTYPE = b'g', +} + +bitflags::bitflags! { + /// UNIX file permissions on octal format. + pub struct ModeFlags: u64 { + /// Set UID on execution. + const SetUID = 0o4000; + /// Set GID on execution. + const SetGID = 0o2000; + /// Reserved. + const TSVTX = 0o1000; + /// Owner read. + const OwnerRead = 0o400; + /// Owner write. + const OwnerWrite = 0o200; + /// Owner execute. + const OwnerExec = 0o100; + /// Group read. + const GroupRead = 0o040; + /// Group write. + const GroupWrite = 0o020; + /// Group execute. + const GroupExec = 0o010; + /// Others read. + const OthersRead = 0o004; + /// Others read. + const OthersWrite = 0o002; + /// Others execute. + const OthersExec = 0o001; + } +} + +#[cfg(test)] +mod tests { + use crate::header::{PosixHeader, StaticCString, TypeFlag}; + use crate::BLOCKSIZE; + use std::mem::size_of; + + fn bytes_to_archive(bytes: &[u8]) -> PosixHeader { + let hdr = bytes.as_ptr() as *const PosixHeader; + unsafe { core::ptr::read(hdr) } + } + + #[test] + fn test_display_header() { + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar")); + println!("{:#?}'", archive); + } + + #[test] + fn test_show_tar_header_magics() { + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar")); + println!( + "default: magic='{:?}', version='{:?}'", + archive.magic, archive.version + ); + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar")); + println!( + "gnu: magic='{:?}', version='{:?}'", + archive.magic, archive.version + ); + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar")); + println!( + "oldgnu: magic='{:?}', version='{:?}'", + archive.magic, archive.version + ); + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar")); + println!( + "pax: magic='{:?}', version='{:?}'", + archive.magic, archive.version + ); + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar")); + println!( + "posix: magic='{:?}', version='{:?}'", + archive.magic, archive.version + ); + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar")); + println!( + "ustar: magic='{:?}', version='{:?}'", + archive.magic, archive.version + ); + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar")); + println!( + "v7: magic='{:?}', version='{:?}'", + archive.magic, archive.version + ); + } + + #[test] + fn test_parse_tar_header_filename() { + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_default.tar")); + assert_eq!( + archive.typeflag, + TypeFlag::REGTYPE, + "the first entry is a regular file!" + ); + assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_gnu.tar")); + assert_eq!( + archive.typeflag, + TypeFlag::REGTYPE, + "the first entry is a regular file!" + ); + assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_oldgnu.tar")); + assert_eq!( + archive.typeflag, + TypeFlag::REGTYPE, + "the first entry is a regular file!" + ); + assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + + /* UNSUPPORTED YET. Uses extensions.. + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_pax.tar")); + assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!"); + assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */ + + /* UNSUPPORTED YET. Uses extensions. + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_posix.tar")); + unsupported extension XHDTYPE assert_eq!(archive.typeflag, TypeFlag::REGTYPE, "the first entry is a regular file!"); + assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); */ + + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_ustar.tar")); + assert_eq!( + archive.typeflag, + TypeFlag::REGTYPE, + "the first entry is a regular file!" + ); + assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + + let archive = bytes_to_archive(include_bytes!("../tests/gnu_tar_v7.tar")); + // ARegType: legacy + assert_eq!( + archive.typeflag, + TypeFlag::AREGTYPE, + "the first entry is a regular file!" + ); + assert_eq!(archive.name.as_string().as_str(), "bye_world_513b.txt"); + } + + #[test] + fn test_size() { + assert_eq!(BLOCKSIZE, size_of::()); + } + + #[test] + fn test_static_str() { + let str = StaticCString::new(*b"0000633\0"); + assert_eq!(str.len(), 7); + assert_eq!(str.as_string().as_str(), "0000633"); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8739814 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,55 @@ +/* +MIT License + +Copyright (c) 2021 Philipp Schuster + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ +//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations. +//! If you have a standard environment and need full feature support, I recommend the use of +//! instead. +//! +//! The crate is simple and only supports reading of "basic" archives, therefore no extensions, such +//! as GNU Longname. The maximum supported file name length is 100 characters including the NULL-byte. +//! The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat +//! collections of files. +//! +//! This library is useful, if you write a kernel or a similar low-level application, which needs +//! "a bunch of files" from an archive ("init ram disk"). The Tar file could for example come +//! as a Multiboot2 boot module provided by the bootloader. +//! +//! This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**. +//! GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet. +//! [This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible +//! archive formats and their limitations. + +#![cfg_attr(not(test), no_std)] + +#[cfg_attr(test, macro_use)] +#[cfg(test)] +extern crate std; + +/// Each Archive Entry (either Header or Data Block) is a block of 512 bytes. +const BLOCKSIZE: usize = 512; + +mod archive; +mod header; + +pub use archive::*; +pub use header::*; diff --git a/tests/bye_world_513b.txt b/tests/bye_world_513b.txt new file mode 100644 index 0000000..0bcc4b2 --- /dev/null +++ b/tests/bye_world_513b.txt @@ -0,0 +1,17 @@ +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### + diff --git a/tests/gnu_tar_default.tar b/tests/gnu_tar_default.tar new file mode 100644 index 0000000000000000000000000000000000000000..bd44cde09a505dacc17fd0cb8dcfed8aa707dc7a GIT binary patch literal 10240 zcmeI#!3u&f7zgm3eTsqi*thoSEA$2(B}?!i7;02+-zGsGbaikt;y(n%%{hPS-&sDZ z;@)=6rC{2ZyWug2)r+zv%Z#uq%9*BXS4DOaWJqVMwJ};KV&Ys#wfabAI=uD6sgqJ% z`*Ur#t8S)^+x4w};jq|ur~Z=ot5ekbfui?GVt+s}zMuki)iiD3FMtdApYxx_{AZ<* z|3Pp3yK{ehA>aSq2l>CnTNdv*Cix#`@^3GF%zv{u|F_eB^O^s%ZAT_&M*eT{)EwOR xUmo2bAfjLkaGV1I5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SE*Zcmc~L-Kziq literal 0 HcmV?d00001 diff --git a/tests/gnu_tar_gnu.tar b/tests/gnu_tar_gnu.tar new file mode 100644 index 0000000000000000000000000000000000000000..bd44cde09a505dacc17fd0cb8dcfed8aa707dc7a GIT binary patch literal 10240 zcmeI#!3u&f7zgm3eTsqi*thoSEA$2(B}?!i7;02+-zGsGbaikt;y(n%%{hPS-&sDZ z;@)=6rC{2ZyWug2)r+zv%Z#uq%9*BXS4DOaWJqVMwJ};KV&Ys#wfabAI=uD6sgqJ% z`*Ur#t8S)^+x4w};jq|ur~Z=ot5ekbfui?GVt+s}zMuki)iiD3FMtdApYxx_{AZ<* z|3Pp3yK{ehA>aSq2l>CnTNdv*Cix#`@^3GF%zv{u|F_eB^O^s%ZAT_&M*eT{)EwOR xUmo2bAfjLkaGV1I5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SE*Zcmc~L-Kziq literal 0 HcmV?d00001 diff --git a/tests/gnu_tar_oldgnu.tar b/tests/gnu_tar_oldgnu.tar new file mode 100644 index 0000000000000000000000000000000000000000..bd44cde09a505dacc17fd0cb8dcfed8aa707dc7a GIT binary patch literal 10240 zcmeI#!3u&f7zgm3eTsqi*thoSEA$2(B}?!i7;02+-zGsGbaikt;y(n%%{hPS-&sDZ z;@)=6rC{2ZyWug2)r+zv%Z#uq%9*BXS4DOaWJqVMwJ};KV&Ys#wfabAI=uD6sgqJ% z`*Ur#t8S)^+x4w};jq|ur~Z=ot5ekbfui?GVt+s}zMuki)iiD3FMtdApYxx_{AZ<* z|3Pp3yK{ehA>aSq2l>CnTNdv*Cix#`@^3GF%zv{u|F_eB^O^s%ZAT_&M*eT{)EwOR xUmo2bAfjLkaGV1I5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SE*Zcmc~L-Kziq literal 0 HcmV?d00001 diff --git a/tests/gnu_tar_pax.tar b/tests/gnu_tar_pax.tar new file mode 100644 index 0000000000000000000000000000000000000000..f05f25198a1e5edc395de9e6069c08a6ca21b070 GIT binary patch literal 10240 zcmeI1O>crg5QaJXD@eSr`!PGfp?8nH^=ME@Y@)G2n%aNgSrS?iN-?a2n%S7F*yH!=ZY}4S5%P3neFd#8Uq%vdZbds$EFY=Su$wHVVQ~G+F8Y?X=m^_bhMO zfNT0I?sh`u`{wq`FjH%ucb8drc+T-|K$GV)o7#co^^8hDH2vQ$+wYG)9iY1}_V$0O z^ncbG_J3m>goghEEs_KkJ)prh{a?-DHT++#?-;A+IffGOF7}oMmHO}N|4RKw&PlQV zYhmfTsoTD=zk75(?Eic#>VJ*b?f*O^{|LuT{?AgLK!Mcs{?GZp`=6zT|BFyyEVW56 zE;D?$12O?7zyz286JP>NfC(@GCcp%yL*NV5-EU9; literal 0 HcmV?d00001 diff --git a/tests/gnu_tar_posix.tar b/tests/gnu_tar_posix.tar new file mode 100644 index 0000000000000000000000000000000000000000..f2a7b5a816c9d945daec997b89ae13cf12f99016 GIT binary patch literal 10240 zcmeI1O>crg5QaJXD@eSr`!PGfp?8nH^=ME@Y@)G2n%aNgSrS?iN-?a2n%S7F*_uG|KW-W?y5ezGm3IYVKrgB|NWptoGX|!4c z=s+sa)D)hFP1~>c`>9BCT2jBOAMPq6!$a|U$et7$DiRPy9LHLML7Y)Etwp0m7>Ytd zzmSV=YSCKXheP#{8}csp7D`U^h^78jWR=r@RlAU)&z1fWY!rl}XtL7(+iA0-?^)im z0oU|b-0g(Q_s#8>VW!qR?=G|M@SNk_fF{ppHnjuE>lu}RX!^fhw%;FpIzV?{?Ct+l z>Hn-X?El6%2o3)SS|kZ7dO(A1`oEgPYxuuf-!WFta||WmUF~By2 literal 0 HcmV?d00001 diff --git a/tests/gnu_tar_ustar.tar b/tests/gnu_tar_ustar.tar new file mode 100644 index 0000000000000000000000000000000000000000..bfc488251765f089b894ea06365704663222c70e GIT binary patch literal 10240 zcmeH~!3u*g42FC5DFpAsbZvcwy}^z$H^GD8P{+2nU$Vf=a%~y|PX2aVso_Y@H zI-DDRult!bGWE?bnXpN6Vhg)OH=X${@w+ql{eYt9lSEgM2q+|hrf%DA;~PNq@Mrxe z(f>je6#5^a5d885egAhp(Ek7*S;R+7^naV?pL2ifzgg`6x&1ev{@?OCa@tzxe~7X< z$o-!lzp;Ei1oqb$uDHd1>;V%;^ty4 zd8(9a7xR_PPZ!M318v?XOZhOd*6swZZP)b+zX4(%{+$2v&VOH3j oFmYS?2mu5TKmY**5I_I{1Q0*~0R#|0009ILKmY**5ZIf*3yUkpV*mgE literal 0 HcmV?d00001 diff --git a/tests/hello_world.txt b/tests/hello_world.txt new file mode 100644 index 0000000..557db03 --- /dev/null +++ b/tests/hello_world.txt @@ -0,0 +1 @@ +Hello World diff --git a/tests/hello_world_513b.txt b/tests/hello_world_513b.txt new file mode 100644 index 0000000..0bcc4b2 --- /dev/null +++ b/tests/hello_world_513b.txt @@ -0,0 +1,17 @@ +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +############################### +