Merge pull request #7 from schnoberts1/bugfix-entry-type-enum

Fix the values in typeflag so they match https://www.gnu.org/software…
Fix the values in typeflag so they match https://www.gnu.org/software/tar/manual/html_node/Standard.html . i.e DIRTYPE is b'5' not 5.
2023-08-30 17:54:57 +02:00 · 2023-08-27 12:55:21 +01:00 · 2023-04-11 13:08:27 +02:00 · 2023-04-11 12:33:28 +02:00 · 2023-04-11 12:09:15 +02:00 · 2023-04-11 10:39:45 +02:00
12 changed files with 410 additions and 184 deletions
--- a/.editorconfig
+++ b/.editorconfig
@ -9,3 +9,7 @@ insert_final_newline = true
 indent_style = space
 indent_size = 4
 trim_trailing_whitespace = true
+max_line_length = 80
+
+[*.yml]
+indent_size = 2
--- a/.github/workflows/qa.yml
+++ b/.github/workflows/qa.yml
@ -0,0 +1,12 @@
+name: QA
+
+on: [ push, pull_request ]
+
+jobs:
+  spellcheck:
+    name: Spellcheck
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+      # Executes "typos ."
+      - uses: crate-ci/typos@v1.13.20
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@ -1,48 +1,59 @@
 name: Build

-on:
-    push:
-        branches: [ main ]
-    pull_request:
-        branches: [ main ]
+on: [push, pull_request]

 env:
-    CARGO_TERM_COLOR: always
+  CARGO_TERM_COLOR: always

 jobs:
-    build:
-        runs-on: ubuntu-latest
-        strategy:
-            matrix:
-                rust:
-                    - stable
-                    - nightly
-                    - 1.51.0 # MSVR
-        steps:
-            -   uses: actions/checkout@v2
-            -   name: Build
-                run: cargo build --verbose
-            # use some no_std target
-            -   name: Install no_std target thumbv7em-none-eabihf
-                run: rustup target add thumbv7em-none-eabihf
-            -   name: Build (no_std)
-                run: cargo build --verbose --target thumbv7em-none-eabihf
-            -   name: Run tests
-                run: cargo test --verbose
+  build:
+    runs-on: ${{ matrix.runs-on }}
+    strategy:
+      matrix:
+        runs-on:
+          - windows-latest
+          - ubuntu-latest
+        rust:
+          - stable
+          - nightly
+          - 1.60.0 # MSVR
+    steps:
+      - uses: actions/checkout@v2
+      # Important preparation step: override the latest default Rust version in GitHub CI
+      # with the current value of the iteration in the "strategy.matrix.rust"-array.
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: default
+          toolchain: ${{ matrix.rust }}
+          override: true
+      - name: Build
+        run: cargo build --all-targets --verbose --features alloc
+      # use some arbitrary no_std target
+      - name: Install no_std target thumbv7em-none-eabihf
+        run: rustup target add thumbv7em-none-eabihf
+      - name: Build (no_std)
+        run: cargo build --verbose --target thumbv7em-none-eabihf --features alloc
+      - name: Run tests
+        run: cargo test --verbose --features alloc

-    style_checks:
-        runs-on: ubuntu-latest
-        strategy:
-            matrix:
-                rust:
-                    - stable
-                    - nightly
-                    - 1.51.0 # MSVR
-        steps:
-            -   uses: actions/checkout@v2
-            -   name: Rustfmt
-                run: cargo fmt -- --check
-            -   name: Clippy
-                run: cargo clippy
-            -   name: Rustdoc
-                run: cargo doc
+  style_checks:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        rust:
+          - 1.60.0
+    steps:
+      - uses: actions/checkout@v2
+      # Important preparation step: override the latest default Rust version in GitHub CI
+      # with the current value of the iteration in the "strategy.matrix.rust"-array.
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: default
+          toolchain: ${{ matrix.rust }}
+          override: true
+      - name: Rustfmt
+        run: cargo fmt -- --check
+      - name: Clippy
+        run: cargo clippy --features alloc
+      - name: Rustdoc
+        run: cargo doc --no-deps --document-private-items --features alloc
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,6 @@
+# v0.2.0 (2023-04-11)
+- MSRV is 1.60.0
+- bitflags bump: 1.x -> 2.x
+- few internal code improvements (less possible panics)
+- `Mode::to_flags` now returns a Result
+- Feature `all` was removed. Use `alloc` instead.
--- a/Cargo.toml
+++ b/Cargo.toml
@ -7,8 +7,8 @@ as GNU Longname. The maximum supported file name length is 100 characters includ
 The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
 collections of files.
 """
-version = "0.1.2"
-edition = "2018"
+version = "0.2.0"
+edition = "2021"
 keywords = ["tar", "tarball", "archive"]
 categories = ["data-structures", "no-std", "parser-implementations"]
 readme = "README.md"
@ -20,10 +20,14 @@ documentation = "https://docs.rs/tar-no-std"
 # required because "env_logger" uses "log" but with dependency to std..
 resolver = "2"

+[features]
+default = []
+alloc = []
+
 [dependencies]
-bitflags = "1.3"
 arrayvec = { version = "0.7", default-features = false }
+bitflags = "2.0"
 log = { version = "0.4", default-features = false }

 [dev-dependencies]
-env_logger = "0.9"
+env_logger = "0.10"
--- a/README.md
+++ b/README.md
@ -5,11 +5,15 @@ but have some subtle differences that often make them incompatible with each oth

 Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations. If you have a standard
 environment and need full feature support, I recommend the use of <https://crates.io/crates/tar> instead.
+
+## Limitations
 The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
 as *GNU Longname*. The maximum supported file name length is 100 characters including the NULL-byte.
 The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
 collections of files.

+## Use Case
+
 This library is useful, if you write a kernel or a similar low-level application, which needs
 "a bunch of files" from an archive ("init ramdisk"). The Tar file could for example come
 as a Multiboot2 boot module provided by the bootloader.
@ -19,8 +23,10 @@ GNU Extensions such as sparse files, incremental archives, and long filename ext
 [This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible
 archive formats and their limitations.

-## Example
+## Example (without `alloc`-feature)
 ```rust
+use tar_no_std::TarArchiveRef;
+
 fn main() {
    // log: not mandatory
    std::env::set_var("RUST_LOG", "trace");
@ -28,22 +34,26 @@ fn main() {

    // also works in no_std environment (except the println!, of course)
    let archive = include_bytes!("../tests/gnu_tar_default.tar");
-    let archive = TarArchive::new(archive);
+    let archive = TarArchiveRef::new(archive);
    // Vec needs an allocator of course, but the library itself doesn't need one
    let entries = archive.entries().collect::<Vec<_>>();
    println!("{:#?}", entries);
    println!("content of last file:");
-    let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) };
-    println!("{:#?}", last_file_content);
+    println!("{:#?}", entries[2].data_as_str().expect("Should be valid UTF-8"));
 }
 ```

-## Compression
-If your tar file is compressed, e.g. bei `.tar.gz`/`gzip`, you need to uncompress the bytes first
-(e.g. by a *gzip* library). Afterwards, this crate can read and write the Tar archive format from the bytes.
+## Alloc Feature
+This crate allows the usage of the additional Cargo build time feature `alloc`. When this is used,
+the crate also provides the type `TarArchive`, which owns the data on the heap.
+
+## Compression (`tar.gz`)
+If your tar file is compressed, e.g. by `.tar.gz`/`gzip`, you need to uncompress the bytes first
+(e.g. by a *gzip* library). Afterwards, this crate can read the Tar archive format from the uncompressed
+bytes.

 ## MSRV
-The MSRV is 1.51.0 stable.
+The MSRV is 1.52.1 stable.


 [0]: https://www.gnu.org/software/tar/manual/html_section/Formats.html
--- a/build.sh
+++ b/build.sh
@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+cargo build --all-targets --verbose --features alloc
+# use some random no_std target
+rustup target add thumbv7em-none-eabihf
+cargo build --verbose --target thumbv7em-none-eabihf --features alloc
+cargo test --verbose --features alloc
+
+cargo fmt -- --check
+cargo +1.60.0 clippy --features alloc
+cargo +1.60.0 doc --no-deps --document-private-items --features alloc
--- a/examples/alloc_feature.rs
+++ b/examples/alloc_feature.rs
@ -0,0 +1,44 @@
+/*
+MIT License
+
+Copyright (c) 2023 Philipp Schuster
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+use tar_no_std::TarArchive;
+
+/// This example needs the `alloc` feature.
+fn main() {
+    // log: not mandatory
+    std::env::set_var("RUST_LOG", "trace");
+    env_logger::init();
+
+    // also works in no_std environment (except the println!, of course)
+    let archive = include_bytes!("../tests/gnu_tar_default.tar");
+    let archive_heap_owned = archive.to_vec().into_boxed_slice();
+    let archive = TarArchive::new(archive_heap_owned);
+    // Vec needs an allocator of course, but the library itself doesn't need one
+    let entries = archive.entries().collect::<Vec<_>>();
+    println!("{:#?}", entries);
+    println!("content of last file:");
+    println!(
+        "{:#?}",
+        entries[2].data_as_str().expect("Should be valid UTF-8")
+    );
+}
--- a/examples/minimal.rs
+++ b/examples/minimal.rs
@ -1,7 +1,7 @@
 /*
 MIT License

-Copyright (c) 2021 Philipp Schuster
+Copyright (c) 2023 Philipp Schuster

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 */
-use tar_no_std::TarArchive;
+use tar_no_std::TarArchiveRef;

 fn main() {
    // log: not mandatory
@ -30,11 +30,13 @@ fn main() {

    // also works in no_std environment (except the println!, of course)
    let archive = include_bytes!("../tests/gnu_tar_default.tar");
-    let archive = TarArchive::new(archive);
+    let archive = TarArchiveRef::new(archive);
    // Vec needs an allocator of course, but the library itself doesn't need one
    let entries = archive.entries().collect::<Vec<_>>();
    println!("{:#?}", entries);
    println!("content of last file:");
-    let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) };
-    println!("{:#?}", last_file_content);
+    println!(
+        "{:#?}",
+        entries[2].data_as_str().expect("Should be valid UTF-8")
+    );
 }
--- a/src/archive.rs
+++ b/src/archive.rs
@ -1,7 +1,7 @@
 /*
 MIT License

-Copyright (c) 2021 Philipp Schuster
+Copyright (c) 2023 Philipp Schuster

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@ -21,26 +21,29 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 */
-//! Module for [`TarArchive`].
+//! Module for [`TarArchiveRef`]. If the `alloc`-feature is enabled, this crate
+//! also exports `TarArchive`, which owns data on the heap.

 use crate::header::PosixHeader;
-use crate::{TypeFlag, BLOCKSIZE};
+use crate::{TypeFlag, BLOCKSIZE, FILENAME_MAX_LEN};
+#[cfg(feature = "alloc")]
+use alloc::boxed::Box;
 use arrayvec::ArrayString;
 use core::fmt::{Debug, Formatter};
-use core::ptr;
-use core::str::FromStr;
+use core::str::{FromStr, Utf8Error};
+use log::warn;

 /// Describes an entry in an archive.
 /// Currently only supports files but no directories.
 pub struct ArchiveEntry<'a> {
-    filename: ArrayString<100>,
+    filename: ArrayString<FILENAME_MAX_LEN>,
    data: &'a [u8],
    size: usize,
 }

 #[allow(unused)]
 impl<'a> ArchiveEntry<'a> {
-    pub const fn new(filename: ArrayString<100>, data: &'a [u8]) -> Self {
+    const fn new(filename: ArrayString<FILENAME_MAX_LEN>, data: &'a [u8]) -> Self {
        ArchiveEntry {
            filename,
            data,
@ -48,8 +51,9 @@ impl<'a> ArchiveEntry<'a> {
        }
    }

-    /// Filename of the entry. Max 99 characters.
-    pub const fn filename(&self) -> ArrayString<100> {
+    /// Filename of the entry with a maximum of 100 characters (including the
+    /// terminating NULL-byte).
+    pub const fn filename(&self) -> ArrayString<{ FILENAME_MAX_LEN }> {
        self.filename
    }

@ -58,6 +62,11 @@ impl<'a> ArchiveEntry<'a> {
        self.data
    }

+    /// Data of the file as string slice, if data is valid UTF-8.
+    pub fn data_as_str(&self) -> Result<&'a str, Utf8Error> {
+        core::str::from_utf8(self.data)
+    }
+
    /// Filesize in bytes.
    pub const fn size(&self) -> usize {
        self.size
@ -74,15 +83,65 @@ impl<'a> Debug for ArchiveEntry<'a> {
    }
 }

-/// Wrapper type around the bytes, which represents an archive.
+/// Type that owns bytes on the heap, that represents a Tar archive.
+/// Unlike [`TarArchiveRef`], this type is useful, if you need to own the
+/// data as long as you need the archive, but not longer.
+///
+/// This is only available with the `alloc` feature of this crate.
+#[cfg(feature = "alloc")]
 #[derive(Debug)]
-pub struct TarArchive<'a> {
+pub struct TarArchive {
+    data: Box<[u8]>,
+}
+
+#[cfg(feature = "alloc")]
+impl TarArchive {
+    /// Creates a new archive type, that owns the data on the heap. The provided byte array is
+    /// interpreted as bytes in Tar archive format.
+    pub fn new(data: Box<[u8]>) -> Self {
+        assert_eq!(
+            data.len() % BLOCKSIZE,
+            0,
+            "data must be a multiple of BLOCKSIZE={}, len is {}",
+            BLOCKSIZE,
+            data.len(),
+        );
+        Self { data }
+    }
+
+    /// Iterates over all entries of the Tar archive.
+    /// Returns items of type [`ArchiveEntry`].
+    /// See also [`ArchiveIterator`].
+    pub fn entries(&self) -> ArchiveIterator {
+        ArchiveIterator::new(self.data.as_ref())
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<Box<[u8]>> for TarArchive {
+    fn from(data: Box<[u8]>) -> Self {
+        Self::new(data)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl From<TarArchive> for Box<[u8]> {
+    fn from(ar: TarArchive) -> Self {
+        ar.data
+    }
+}
+
+/// Wrapper type around bytes, which represents a Tar archive.
+/// Unlike [`TarArchive`], this uses only a reference to the data.
+#[derive(Debug)]
+pub struct TarArchiveRef<'a> {
    data: &'a [u8],
 }

 #[allow(unused)]
-impl<'a> TarArchive<'a> {
-    /// Interprets the provided byte array as Tar archive.
+impl<'a> TarArchiveRef<'a> {
+    /// Creates a new archive wrapper type. The provided byte array is interpreted as
+    /// bytes in Tar archive format.
    pub fn new(data: &'a [u8]) -> Self {
        assert_eq!(
            data.len() % BLOCKSIZE,
@ -93,34 +152,34 @@ impl<'a> TarArchive<'a> {
        Self { data }
    }

-    /// Iterates over all entries of the TAR Archive.
+    /// Iterates over all entries of the Tar archive.
    /// Returns items of type [`ArchiveEntry`].
+    /// See also [`ArchiveIterator`].
    pub const fn entries(&self) -> ArchiveIterator {
-        ArchiveIterator::new(self)
+        ArchiveIterator::new(self.data)
    }
 }

-/// Iterator over the files. Each iteration step starts
+/// Iterator over the files of the archive. Each iteration starts
 /// at the next Tar header entry.
 #[derive(Debug)]
 pub struct ArchiveIterator<'a> {
-    archive: &'a TarArchive<'a>,
+    archive_data: &'a [u8],
    block_index: usize,
 }

 impl<'a> ArchiveIterator<'a> {
-    pub const fn new(archive: &'a TarArchive<'a>) -> Self {
+    pub const fn new(archive: &'a [u8]) -> Self {
        Self {
-            archive,
+            archive_data: archive,
            block_index: 0,
        }
    }

-    /// Returns a pointer to the next Header.
-    const fn next_hdr(&self, block_index: usize) -> *const PosixHeader {
-        let hdr_ptr = &self.archive.data[block_index * BLOCKSIZE];
-        let hdr_ptr = hdr_ptr as *const u8;
-        hdr_ptr as *const PosixHeader
+    /// Returns a reference to the next Header.
+    fn next_hdr(&self, block_index: usize) -> &'a PosixHeader {
+        let hdr_ptr = &self.archive_data[block_index * BLOCKSIZE];
+        unsafe { (hdr_ptr as *const u8).cast::<PosixHeader>().as_ref() }.unwrap()
    }
 }

@ -128,17 +187,16 @@ impl<'a> Iterator for ArchiveIterator<'a> {
    type Item = ArchiveEntry<'a>;

    fn next(&mut self) -> Option<Self::Item> {
-        if self.block_index * BLOCKSIZE >= self.archive.data.len() {
-            log::warn!("Reached end of Tar archive data without finding zero/end blocks!");
+        if self.block_index * BLOCKSIZE >= self.archive_data.len() {
+            warn!("Reached end of Tar archive data without finding zero/end blocks!");
            return None;
        }

        let hdr = self.next_hdr(self.block_index);
-        let hdr = unsafe { ptr::read(hdr) };

        // check if we found end of archive
        if hdr.is_zero_block() {
-            let next_hdr = unsafe { ptr::read(self.next_hdr(self.block_index + 1)) };
+            let next_hdr = self.next_hdr(self.block_index + 1);
            if next_hdr.is_zero_block() {
                // gracefully terminated Archive
                log::debug!("End of Tar archive with two zero blocks!");
@ -158,55 +216,67 @@ impl<'a> Iterator for ArchiveIterator<'a> {
        }

        if hdr.name.is_empty() {
-            log::warn!("Found empty file name",);
+            warn!("Found empty file name",);
        }

-        // fetch data of file from next block(s)
-        let data_block_count = hdr.payload_block_count();
+        let hdr_size = hdr.size.val();
+        if let Err(e) = hdr_size {
+            warn!("Can't parse the file size from the header block. Stop iterating Tar archive. {e:#?}");
+            return None;
+        }
+        let hdr_size = hdr_size.unwrap();
+
+        // Fetch data of file from next block(s).
+        // .unwrap() is fine as we checked that hdr.size().val() is valid
+        // above
+        let data_block_count = hdr.payload_block_count().unwrap();
+
        // +1: skip hdr block itself and start at data!
        // i_begin is the byte begin index of this file in the array of the whole archive
        let i_begin = (self.block_index + 1) * BLOCKSIZE;
        // i_end is the exclusive byte end index of the data of the current file
        let i_end = i_begin + data_block_count * BLOCKSIZE;
-        let file_block_bytes = &self.archive.data[i_begin..i_end];
-        // because each block is 512 bytes long, the file is not necessarily a multiple of 512 bytes
-        let file_bytes = &file_block_bytes[0..hdr.size.val()];
+        let file_block_bytes = &self.archive_data[i_begin..i_end];
+        // Each block is 512 bytes long, but the file size is not necessarily a
+        // multiple of 512.
+        let file_bytes = &file_block_bytes[0..hdr_size];

        // in next iteration: start at next Archive entry header
        // +1 for current hdr block itself + all data blocks
        self.block_index += data_block_count + 1;

-        Some(ArchiveEntry::new(
-            ArrayString::from_str(hdr.name.as_string().as_str()).unwrap(),
-            file_bytes,
-        ))
+        let filename = ArrayString::from_str(hdr.name.as_string().as_str());
+        // .unwrap is fine as the capacity is MUST be ok.
+        let filename = filename.unwrap();
+
+        Some(ArchiveEntry::new(filename, file_bytes))
    }
 }

 #[cfg(test)]
 mod tests {
    use super::*;
-    use core::str;
    use std::vec::Vec;

    #[test]
    fn test_archive_list() {
-        let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar"));
+        let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar"));
        let entries = archive.entries().collect::<Vec<_>>();
        println!("{:#?}", entries);
    }

+    /// Tests to read the entries from existing archives in various Tar flavors.
    #[test]
    fn test_archive_entries() {
-        let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_default.tar"));
+        let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_default.tar"));
        let entries = archive.entries().collect::<Vec<_>>();
        assert_archive_content(&entries);

-        let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_gnu.tar"));
+        let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_gnu.tar"));
        let entries = archive.entries().collect::<Vec<_>>();
        assert_archive_content(&entries);

-        let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
+        let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_oldgnu.tar"));
        let entries = archive.entries().collect::<Vec<_>>();
        assert_archive_content(&entries);

@ -220,39 +290,61 @@ mod tests {
        let entries = archive.entries().collect::<Vec<_>>();
        assert_archive_content(&entries);*/

-        let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_ustar.tar"));
+        let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_ustar.tar"));
        let entries = archive.entries().collect::<Vec<_>>();
        assert_archive_content(&entries);

-        let archive = TarArchive::new(include_bytes!("../tests/gnu_tar_v7.tar"));
+        let archive = TarArchiveRef::new(include_bytes!("../tests/gnu_tar_v7.tar"));
        let entries = archive.entries().collect::<Vec<_>>();
        assert_archive_content(&entries);
    }

+    /// Like [`test_archive_entries`] but with additional `alloc` functionality.
+    #[cfg(feature = "alloc")]
+    #[test]
+    fn test_archive_entries_alloc() {
+        let data = include_bytes!("../tests/gnu_tar_default.tar")
+            .to_vec()
+            .into_boxed_slice();
+        let archive = TarArchive::new(data.clone());
+        let entries = archive.entries().collect::<Vec<_>>();
+        assert_archive_content(&entries);
+
+        // Test that the archive can be transformed into owned heap data.
+        assert_eq!(data, archive.into());
+    }
+
+    /// Tests that the parsed archive matches the expected order. The tarballs
+    /// the tests directory were created once by me with files in the order
+    /// specified in this test.
    fn assert_archive_content(entries: &[ArchiveEntry]) {
        assert_eq!(entries.len(), 3);
-        // order in that I stored the files into the archive
+
        assert_eq!(entries[0].filename().as_str(), "bye_world_513b.txt");
        assert_eq!(entries[0].size(), 513);
        assert_eq!(entries[0].data().len(), 513);
        assert_eq!(
-            unsafe { str::from_utf8_unchecked(entries[0].data) },
-            include_str!("../tests/bye_world_513b.txt")
+            entries[0].data_as_str().expect("Should be valid UTF-8"),
+            // .replace: Ensure that the test also works on Windows
+            include_str!("../tests/bye_world_513b.txt").replace("\r\n", "\n")
        );

+        // Test that an entry that needs two 512 byte data blocks is read
+        // properly.
        assert_eq!(entries[1].filename().as_str(), "hello_world_513b.txt");
        assert_eq!(entries[1].size(), 513);
        assert_eq!(entries[1].data().len(), 513);
        assert_eq!(
-            unsafe { str::from_utf8_unchecked(entries[1].data) },
-            include_str!("../tests/hello_world_513b.txt")
+            entries[1].data_as_str().expect("Should be valid UTF-8"),
+            // .replace: Ensure that the test also works on Windows
+            include_str!("../tests/hello_world_513b.txt").replace("\r\n", "\n")
        );

        assert_eq!(entries[2].filename().as_str(), "hello_world.txt");
        assert_eq!(entries[2].size(), 12);
        assert_eq!(entries[2].data().len(), 12);
        assert_eq!(
-            unsafe { str::from_utf8_unchecked(entries[2].data) },
+            entries[2].data_as_str().expect("Should be valid UTF-8"),
            "Hello World\n",
            "file content must match"
        );
--- a/src/header.rs
+++ b/src/header.rs
@ -1,7 +1,7 @@
 /*
 MIT License

-Copyright (c) 2021 Philipp Schuster
+Copyright (c) 2023 Philipp Schuster

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@ -30,9 +30,10 @@ SOFTWARE.

 #![allow(non_upper_case_globals)]

-use crate::BLOCKSIZE;
+use crate::{BLOCKSIZE, FILENAME_MAX_LEN};
 use arrayvec::ArrayString;
 use core::fmt::{Debug, Formatter};
+use core::num::ParseIntError;

 /// The file size is encoded as octal ASCII number inside a Tar header.
 #[derive(Copy, Clone)]
@ -41,8 +42,8 @@ pub struct Size(StaticCString<12>);

 impl Size {
    /// Returns the octal ASCII number as actual size in bytes.
-    pub fn val(&self) -> usize {
-        usize::from_str_radix(self.0.as_string().as_str(), 8).unwrap()
+    pub fn val(&self) -> Result<usize, ParseIntError> {
+        usize::from_str_radix(self.0.as_string().as_str(), 8)
    }
 }

@ -54,16 +55,24 @@ impl Debug for Size {
    }
 }

+#[derive(Debug)]
+pub enum ModeError {
+    ParseInt(ParseIntError),
+    IllegalMode,
+}
+
 /// Wrapper around the UNIX file permissions given in octal ASCII.
 #[derive(Copy, Clone)]
 #[repr(transparent)]
 pub struct Mode(StaticCString<8>);

 impl Mode {
-    pub fn to_flags(self) -> ModeFlags {
+    /// Parses the [`ModeFlags`] from the mode string.
+    pub fn to_flags(self) -> Result<ModeFlags, ModeError> {
        let octal_number_str = self.0.as_string();
-        let bits = u64::from_str_radix(octal_number_str.as_str(), 8).unwrap();
-        ModeFlags::from_bits(bits).unwrap()
+        let bits =
+            u64::from_str_radix(octal_number_str.as_str(), 8).map_err(ModeError::ParseInt)?;
+        ModeFlags::from_bits(bits).ok_or(ModeError::IllegalMode)
    }
 }

@ -75,8 +84,11 @@ impl Debug for Mode {
    }
 }

-/// A C-String that is stored in a static array. All unused
-/// chars must be a NULL-byte.
+/// A C-String that is stored in a static array. There is always a terminating
+/// NULL-byte.
+///
+/// The content is likely to be UTF-8/ASCII, but that is not verified by this
+/// type.
 #[derive(Copy, Clone)]
 #[repr(transparent)]
 pub struct StaticCString<const N: usize>([u8; N]);
@ -99,19 +111,16 @@ impl<const N: usize> StaticCString<N> {
        self.len() == 0
    }

-    /// Returns a string without null bytes.
+    /// Returns a string that includes all characters until the first null.
    pub fn as_string(&self) -> ArrayString<N> {
        let mut string = ArrayString::new();
-        // copy all bytes (=ASCII) into string
        self.0
            .clone()
            .iter()
-            // remove all zero bytes; there is always one
-            // zero byte at the end. Furtherore, the other
-            // unused bytes are also zero, but not part of the
-            // string.
-            .filter(|x| **x != 0)
-            .for_each(|b| string.push(*b as char));
+            .copied()
+            // Take all chars until the terminating null.
+            .take_while(|byte| *byte != 0)
+            .for_each(|byte| string.push(byte as char));
        string
    }
 }
@ -145,7 +154,7 @@ impl<const N: usize> Debug for StaticCString<N> {
 pub struct PosixHeader {
    /// Name. There is always a null byte, therefore
    /// the max len is 99.
-    pub name: StaticCString<100>,
+    pub name: StaticCString<{ FILENAME_MAX_LEN }>,
    pub mode: Mode,
    pub uid: [u8; 8],
    pub gid: [u8; 8],
@ -156,7 +165,7 @@ pub struct PosixHeader {
    pub typeflag: TypeFlag,
    /// Name. There is always a null byte, therefore
    /// the max len is 99.
-    pub linkname: StaticCString<100>,
+    pub linkname: StaticCString<{ FILENAME_MAX_LEN }>,
    pub magic: StaticCString<6>,
    pub version: StaticCString<2>,
    /// Username. There is always a null byte, therefore
@ -175,16 +184,14 @@ pub struct PosixHeader {
 }

 impl PosixHeader {
-    /// Returns the number of blocks that are required to
-    /// read the whole file content.
-    pub fn payload_block_count(&self) -> usize {
-        let div = self.size.val() / BLOCKSIZE;
-        let modulo = self.size.val() % BLOCKSIZE;
-        if modulo > 0 {
-            (div + 1) as usize
-        } else {
-            div as usize
-        }
+    /// Returns the number of blocks that are required to read the whole file
+    /// content. Returns an error, if the file size can't be parsed from the
+    /// header.
+    pub fn payload_block_count(&self) -> Result<usize, ParseIntError> {
+        let div = self.size.val()? / BLOCKSIZE;
+        let modulo = self.size.val()? % BLOCKSIZE;
+        let block_count = if modulo > 0 { div + 1 } else { div };
+        Ok(block_count)
    }

    /// A Tar archive is terminated, if a end-of-archive entry, which consists of two 512 blocks
@ -196,7 +203,10 @@ impl PosixHeader {
    }
 }

-#[derive(Debug, Copy, Clone, PartialEq)]
+/// Describes the kind of payload, that follows after a
+/// [`PosixHeader`]. The properties of this payload are
+/// described inside the header.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
 #[repr(u8)]
 #[allow(unused)]
 pub enum TypeFlag {
@ -213,35 +223,35 @@ pub enum TypeFlag {
    /// This flag represents a file linked to another file, of any type, previously archived. Such
    /// files are identified in Unix by each file having the same device and inode number. The
    /// linked-to name is specified in the linkname field with a trailing null.
-    LINK = 1,
+    LINK = b'1',
    /// This represents a symbolic link to another file. The linked-to name is specified in the
    /// linkname field with a trailing null.
-    SYMTYPE = 2,
+    SYMTYPE = b'2',
    /// Represents character special files and block special files respectively. In this case the
    /// devmajor and devminor fields will contain the major and minor device numbers respectively.
    /// Operating systems may map the device specifications to their own local specification, or
    /// may ignore the entry.
-    CHRTYPE = 3,
+    CHRTYPE = b'3',
    /// Represents character special files and block special files respectively. In this case the
    /// devmajor and devminor fields will contain the major and minor device numbers respectively.
    /// Operating systems may map the device specifications to their own local specification, or
    /// may ignore the entry.
-    BLKTYPE = 4,
+    BLKTYPE = b'4',
    /// This flag specifies a directory or sub-directory. The directory name in the name field
    /// should end with a slash. On systems where disk allocation is performed on a directory
    /// basis, the size field will contain the maximum number of bytes (which may be rounded to
    /// the nearest disk block allocation unit) which the directory may hold. A size field of zero
    /// indicates no such limiting. Systems which do not support limiting in this manner should
    /// ignore the size field.
-    DIRTYPE = 5,
+    DIRTYPE = b'5',
    /// This specifies a FIFO special file. Note that the archiving of a FIFO file archives the
    /// existence of this file and not its contents.
-    FIFOTYPE = 6,
+    FIFOTYPE = b'6',
    /// This specifies a contiguous file, which is the same as a normal file except that, in
    /// operating systems which support it, all its space is allocated contiguously on the disk.
    /// Operating systems which do not allow contiguous allocation should silently treat this type
    /// as a normal file.
-    CONTTYPE = 7,
+    CONTTYPE = b'7',
    /// Extended header referring to the next file in the archive
    XHDTYPE = b'x',
    /// Global extended header
@ -249,7 +259,9 @@ pub enum TypeFlag {
 }

 bitflags::bitflags! {
-    /// UNIX file permissions on octal format.
+    /// UNIX file permissions in octal format.
+    #[repr(transparent)]
+    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
    pub struct ModeFlags: u64 {
        /// Set UID on execution.
        const SetUID = 0o4000;
@ -284,9 +296,9 @@ mod tests {
    use crate::BLOCKSIZE;
    use std::mem::size_of;

-    fn bytes_to_archive(bytes: &[u8]) -> PosixHeader {
-        let hdr = bytes.as_ptr() as *const PosixHeader;
-        unsafe { core::ptr::read(hdr) }
+    /// Casts the bytes to a reference to a PosixhHeader.
+    fn bytes_to_archive(bytes: &[u8]) -> &PosixHeader {
+        unsafe { (bytes.as_ptr() as *const PosixHeader).as_ref() }.unwrap()
    }

    #[test]
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,7 +1,7 @@
 /*
 MIT License

-Copyright (c) 2021 Philipp Schuster
+Copyright (c) 2023 Philipp Schuster

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@ -21,58 +21,76 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 */
-//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero allocations.
-//! If you have a standard environment and need full feature support, I recommend the use of
-//! <https://crates.io/crates/tar> instead.
+//! Library to read Tar archives (by GNU Tar) in `no_std` contexts with zero
+//! allocations. If you have a standard environment and need full feature
+//! support, I recommend the use of <https://crates.io/crates/tar> instead.
 //!
-//! The crate is simple and only supports reading of "basic" archives, therefore no extensions, such
-//! as GNU Longname. The maximum supported file name length is 100 characters including the NULL-byte.
-//! The maximum supported file size is 8GiB. Also, directories are not supported yet but only flat
-//! collections of files.
+//! The crate is simple and only supports reading of "basic" archives, therefore
+//! no extensions, such as GNU Longname. The maximum supported file name length
+//! is 100 characters including the NULL-byte. The maximum supported file size
+//! is 8 GiB. Also, directories are not supported yet but only flat collections
+//! of files.
 //!
-//! This library is useful, if you write a kernel or a similar low-level application, which needs
-//! "a bunch of files" from an archive ("init ram disk"). The Tar file could for example come
-//! as a Multiboot2 boot module provided by the bootloader.
+//! This library is useful, if you write a kernel or a similar low-level
+//! application, which needs "a bunch of files" from an archive ("init ram
+//! disk"). The Tar file could for example come as a Multiboot2 boot module
+//! provided by the bootloader.
 //!
-//! This crate focuses on extracting files from uncompressed Tar archives created with default options by **GNU Tar**.
-//! GNU Extensions such as sparse files, incremental archives, and long filename extension are not supported yet.
-//! [This link](https://www.gnu.org/software/tar/manual/html_section/Formats.html) gives a good overview over possible
-//! archive formats and their limitations.
+//! This crate focuses on extracting files from uncompressed Tar archives
+//! created with default options by **GNU Tar**. GNU Extensions such as sparse
+//! files, incremental archives, and long filename extension are not supported
+//! yet. [gnu.org](https://www.gnu.org/software/tar/manual/html_section/Formats.html)
+//! provides a good overview over possible archive formats and their
+//! limitations.
 //!
 //! # Example
 //! ```rust
-//! use tar_no_std::TarArchive;
+//! use tar_no_std::TarArchiveRef;
 //!
-//! fn main() {
-//!     // log: not mandatory
-//!     std::env::set_var("RUST_LOG", "trace");
-//!     env_logger::init();
+//! // log: not mandatory
+//! std::env::set_var("RUST_LOG", "trace");
+//! env_logger::init();
 //!
-//!     // also works in no_std environment (except the println!, of course)
-//!     let archive = include_bytes!("../tests/gnu_tar_default.tar");
-//!     let archive = TarArchive::new(archive);
-//!     // Vec needs an allocator of course, but the library itself doesn't need one
-//!     let entries = archive.entries().collect::<Vec<_>>();
-//!     println!("{:#?}", entries);
-//!     println!("content of last file:");
-//!     let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) };
-//!     println!("{:#?}", last_file_content);
-//! }
+//! // also works in no_std environment (except the println!, of course)
+//! let archive = include_bytes!("../tests/gnu_tar_default.tar");
+//! let archive = TarArchiveRef::new(archive);
+//! // Vec needs an allocator of course, but the library itself doesn't need one
+//! let entries = archive.entries().collect::<Vec<_>>();
+//! println!("{:#?}", entries);
+//! println!("content of last file:");
+//! let last_file_content = unsafe { core::str::from_utf8_unchecked(entries[2].data()) };
+//! println!("{:#?}", last_file_content);
 //! ```

 #![cfg_attr(not(test), no_std)]
-#![deny(rustdoc::all)]
-#![allow(rustdoc::missing_doc_code_examples)]
-#![deny(clippy::all)]
-#![deny(clippy::missing_const_for_fn)]
+#![deny(
+    clippy::all,
+    clippy::cargo,
+    clippy::nursery,
+    // clippy::restriction,
+    // clippy::pedantic
+)]
+// now allow a few rules which are denied by the above statement
+// --> they are ridiculous and not necessary
+#![allow(
+    clippy::suboptimal_flops,
+    clippy::redundant_pub_crate,
+    clippy::fallible_impl_from
+)]
 #![deny(missing_debug_implementations)]
+#![deny(rustdoc::all)]

 #[cfg_attr(test, macro_use)]
 #[cfg(test)]
 extern crate std;

+#[cfg(feature = "alloc")]
+extern crate alloc;
+
 /// Each Archive Entry (either Header or Data Block) is a block of 512 bytes.
 const BLOCKSIZE: usize = 512;
+/// Maximum filename length of the base Tar format including the terminating NULL-byte.
+const FILENAME_MAX_LEN: usize = 100;

 mod archive;
 mod header;
Author	SHA1	Message	Date
Philipp Schuster	57a0f950ef	Merge pull request #7 from schnoberts1/bugfix-entry-type-enum Fix the values in typeflag so they match https://www.gnu.org/software…	2023-08-30 17:54:57 +02:00
Andy Schneider	f598cb3f9f	Fix the values in typeflag so they match https://www.gnu.org/software/tar/manual/html_node/Standard.html . i.e DIRTYPE is b'5' not 5.	2023-08-27 12:55:21 +01:00
Philipp Schuster	382c10c217	Merge pull request #5 from phip1611/dev v0.2.0	2023-04-11 13:08:27 +02:00
Philipp Schuster	3eccc3c199	ci: add typos check	2023-04-11 12:33:28 +02:00
Philipp Schuster	1412663d2c	prepare v0.2.0 with code improvements	2023-04-11 12:09:15 +02:00
Philipp Schuster	547429a3dd	Merge pull request #4 from semiviral/main update bitflags dependency `1.3` -> `2.0`	2023-04-11 10:39:45 +02:00
semiviral	e417692b46	undo version bump	2023-04-02 08:27:47 -05:00
semiviral	c8c12e7c0b	`#[repr(transparent)]` for `ModeFlags`	2023-04-02 06:15:42 -05:00
semiviral	48bec03dfe	update bitflags `1.3` -> `2.0`	2023-04-02 06:06:21 -05:00
Philipp Schuster	530d058154	v0.1.8	2022-05-02 22:00:18 +02:00
Philipp Schuster	f70030eb38	MSRV fix	2022-05-02 21:59:51 +02:00
Philipp Schuster	b0442bffa8	CI fix	2022-05-02 21:41:13 +02:00
Philipp Schuster	30dcdb395a	stricter clippy rules	2022-01-19 10:44:20 +01:00
Philipp Schuster	a01ea0f26f	README update (v0.1.7)	2022-01-03 10:43:51 +01:00
Philipp Schuster	a92e2ec39d	doc fixes (v0.1.6)	2021-10-11 15:37:01 +02:00
Philipp Schuster	89dbfc6acd	Merge pull request #3 from phip1611/dev 'alloc'-feature and owning tar archive type (v0.1.5)	2021-10-11 15:32:16 +02:00
Philipp Schuster	da12b748dc	'alloc'-feature and owning tar archive type (v0.1.5)	2021-10-11 15:30:24 +02:00
Philipp Schuster	b35f7a5179	convenient "data_as_str" getter (v0.1.4)	2021-10-09 16:37:10 +02:00
Philipp Schuster	a8c44fb83d	small memory usage improvement (v0.1.3)	2021-10-05 12:01:52 +02:00
Philipp Schuster	40e6da5e97	clippy fix	2021-10-04 14:40:15 +02:00
Philipp Schuster	6cb4d04cef	typo	2021-10-04 13:46:03 +02:00