From 7a53215a578ca89c9b2f81fa7375deabd4250443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Harabie=C5=84?= Date: Wed, 20 Jun 2018 17:17:01 +0200 Subject: [PATCH] No longer return &str for short names in no_std mode Instead methods returning &[u8] were added. It is API preparation for proper OEM codepage decoding. Previous behavior would require to store duplicated short names (one lossy and one real). --- src/dir.rs | 2 +- src/dir_entry.rs | 57 ++++++++++++++++++++++++++++++++++-------------- src/fs.rs | 37 ++++++++++++++++++------------- 3 files changed, 64 insertions(+), 32 deletions(-) diff --git a/src/dir.rs b/src/dir.rs index 93b63df..13a3b3b 100644 --- a/src/dir.rs +++ b/src/dir.rs @@ -118,7 +118,7 @@ impl <'a, T: ReadWriteSeek + 'a> Dir<'a, T> { for r in self.iter() { let e = r?; // compare name ignoring case - if e.file_name().eq_ignore_ascii_case(name) || e.short_file_name().eq_ignore_ascii_case(name) { + if e.eq_name(name) { // check if file or directory is expected if is_dir.is_some() && Some(e.is_dir()) != is_dir { let error_msg = if e.is_dir() { "Is a directory" } else { "Not a directory" }; diff --git a/src/dir_entry.rs b/src/dir_entry.rs index 171ddfa..7e92a2f 100644 --- a/src/dir_entry.rs +++ b/src/dir_entry.rs @@ -1,4 +1,6 @@ use core::{fmt, str}; +use core::iter::FromIterator; +use core::char; use io::prelude::*; use io; use io::Cursor; @@ -13,7 +15,7 @@ use chrono; #[cfg(all(not(feature = "std"), feature = "alloc"))] use alloc::{Vec, String, string::ToString}; -use fs::{FileSystem, FatType, ReadWriteSeek}; +use fs::{FileSystem, FatType, ReadWriteSeek, decode_oem_char_lossy}; use file::File; use dir::{Dir, DirRawStream}; @@ -63,16 +65,29 @@ impl ShortName { }; // Short names in FAT filesystem are encoded in OEM code-page. Rust operates on UTF-8 strings // and there is no built-in conversion so strip non-ascii characters in the name. - use strip_non_ascii; - strip_non_ascii(&mut name); ShortName { name, len: total_len as u8, } } - fn to_str(&self) -> &str { - str::from_utf8(&self.name[..self.len as usize]).unwrap() // SAFE: all characters outside of ASCII table has been removed + fn bytes(&self) -> &[u8] { + &self.name[..self.len as usize] + } + + #[cfg(feature = "alloc")] + fn to_string(&self) -> String { + // Strip non-ascii characters from short name + let char_iter = self.bytes().iter().cloned().map(decode_oem_char_lossy); + // Build string from character iterator + String::from_iter(char_iter) + } + + fn eq_ignore_ascii_case(&self, name: &str) -> bool { + // Strip non-ascii characters from short name + let char_iter = self.bytes().iter().cloned().map(decode_oem_char_lossy).map(|c| c.to_ascii_uppercase()); + // Build string from character iterator + char_iter.eq(name.chars().map(|c| c.to_ascii_uppercase())) } } @@ -124,7 +139,7 @@ impl DirFileEntryData { *c = (*c as char).to_ascii_lowercase() as u8; } } - ShortName::new(&name_copy).to_str().to_string() + ShortName::new(&name_copy).to_string() } pub(crate) fn first_cluster(&self, fat_type: FatType) -> Option { @@ -620,13 +635,18 @@ pub struct DirEntry<'a, T: ReadWriteSeek + 'a> { impl <'a, T: ReadWriteSeek> DirEntry<'a, T> { /// Returns short file name. + /// + /// Non-ASCII characters are replaced by the replacement character (U+FFFD). #[cfg(feature = "alloc")] pub fn short_file_name(&self) -> String { - self.short_name.to_str().to_string() + self.short_name.to_string() } - #[cfg(not(feature = "alloc"))] - pub fn short_file_name(&self) -> &str { - self.short_name.to_str() + + /// Returns short file name as byte array slice. + /// + /// Characters are encoded in the OEM codepage. + pub fn short_file_name_bytes(&self) -> &[u8] { + self.short_name.bytes() } /// Returns long file name or if it doesn't exist fallbacks to short file name. @@ -638,10 +658,6 @@ impl <'a, T: ReadWriteSeek> DirEntry<'a, T> { self.data.lowercase_name() } } - #[cfg(not(feature = "alloc"))] - pub fn file_name(&self) -> &str { - self.short_file_name() - } /// Returns file attributes. pub fn attributes(&self) -> FileAttributes { @@ -715,6 +731,15 @@ impl <'a, T: ReadWriteSeek> DirEntry<'a, T> { pub(crate) fn raw_short_name(&self) -> &[u8; 11] { &self.data.name } + + #[cfg(feature = "alloc")] + pub(crate) fn eq_name(&self, name: &str) -> bool { + self.file_name().eq_ignore_ascii_case(name) || self.short_name.eq_ignore_ascii_case(name) + } + #[cfg(not(feature = "alloc"))] + pub(crate) fn eq_name(&self, name: &str) -> bool { + self.short_name.eq_ignore_ascii_case(name) + } } impl <'a, T: ReadWriteSeek> fmt::Debug for DirEntry<'a, T> { @@ -731,14 +756,14 @@ mod tests { fn short_name_with_ext() { let mut raw_short_name = [0u8;11]; raw_short_name.copy_from_slice("FOO BAR".as_bytes()); - assert_eq!(ShortName::new(&raw_short_name).to_str(), "FOO.BAR"); + assert_eq!(ShortName::new(&raw_short_name).to_string(), "FOO.BAR"); } #[test] fn short_name_without_ext() { let mut raw_short_name = [0u8;11]; raw_short_name.copy_from_slice("FOO ".as_bytes()); - assert_eq!(ShortName::new(&raw_short_name).to_str(), "FOO"); + assert_eq!(ShortName::new(&raw_short_name).to_string(), "FOO"); } #[test] diff --git a/src/fs.rs b/src/fs.rs index 5624c18..af6f01d 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -1,5 +1,6 @@ use core::cell::RefCell; use core::cmp; +use core::char; use io::prelude::*; use io::{Error, ErrorKind, SeekFrom}; use io; @@ -15,6 +16,7 @@ use table::{ClusterIterator, alloc_cluster, read_fat_flags, count_free_clusters} use alloc::{String, string::ToString}; #[cfg(all(not(feature = "std"), not(feature = "alloc")))] use core::str; +use core::iter::FromIterator; // FAT implementation based on: // http://wiki.osdev.org/FAT @@ -69,14 +71,6 @@ impl ReadSeek for T where T: Read + Seek {} pub trait ReadWriteSeek: Read + Write + Seek {} impl ReadWriteSeek for T where T: Read + Write + Seek {} -pub(crate) fn strip_non_ascii(slice: &mut [u8]) { - for c in slice { - if *c < 0x20 || *c >= 0x80 { - *c = '_' as u8; - } - } -} - #[allow(dead_code)] #[derive(Default, Debug, Clone)] struct BiosParameterBlock { @@ -161,8 +155,6 @@ impl BiosParameterBlock { rdr.read_exact(&mut bpb.volume_label)?; rdr.read_exact(&mut bpb.fs_type_label)?; } - // Strip non-ascii characters from volume label - strip_non_ascii(&mut bpb.volume_label); if bpb.ext_sig != 0x29 { // fields after ext_sig are not used - clean them bpb.volume_id = 0; @@ -444,17 +436,28 @@ impl FileSystem { self.bpb.volume_id } - /// Returns a volume label from BPB in the Boot Sector. + /// Returns a volume label from BPB in the Boot Sector as `String`. /// + /// Non-ASCII characters are replaced by the replacement character (U+FFFD). /// Note: File with `VOLUME_ID` attribute in root directory is ignored by this library. /// Only label from BPB is used. #[cfg(feature = "alloc")] pub fn volume_label(&self) -> String { - String::from_utf8_lossy(&self.bpb.volume_label).trim_right().to_string() + // Strip non-ascii characters from volume label + let char_iter = self.volume_label_bytes().iter().cloned().map(decode_oem_char_lossy); + // Build string from character iterator + String::from_iter(char_iter) } - #[cfg(not(feature = "alloc"))] - pub fn volume_label(&self) -> &str { - str::from_utf8(&self.bpb.volume_label).unwrap_or("").trim_right() + + /// Returns a volume label from BPB in the Boot Sector as byte array slice. + /// + /// Label is encoded in the OEM codepage. + /// Note: File with `VOLUME_ID` attribute in root directory is ignored by this library. + /// Only label from BPB is used. + pub fn volume_label_bytes(&self) -> &[u8] { + let full_label_slice = &self.bpb.volume_label; + let len = full_label_slice.iter().rposition(|b| *b != 0x20).map(|p| p + 1).unwrap_or(0); + &full_label_slice[..len] } /// Returns a root directory object allowing for futher penetration of a filesystem structure. @@ -683,3 +686,7 @@ impl <'a, T: ReadWriteSeek> Seek for DiskSlice<'a, T> { } } } + +pub(crate) fn decode_oem_char_lossy(oem_char: u8) -> char { + if oem_char < 0x80 { oem_char as char } else { char::REPLACEMENT_CHARACTER } +}