Add oem_cp_converter option allowing to provide custom short name decoder

Encoder is not yet used but will be in future.
This is implemented as static reference for now to avoid adding additional
type parameters to all main types. It should be enough for most of cases
where encoder/decoder does not have any state and can be implemented as
static variable.
This commit is contained in:
Rafał Harabień 2018-06-25 23:51:09 +02:00
parent cd7e77e1b4
commit 4528aedc6e
3 changed files with 85 additions and 26 deletions

View File

@ -1,6 +1,5 @@
TODO TODO
==== ====
* proper support for short name decoding from the OEM codepage
* marking volume dirty on first write and not-dirty on unmount * marking volume dirty on first write and not-dirty on unmount
* support for a volume label file in the root directory * support for a volume label file in the root directory
* format volume API * format volume API

View File

@ -15,7 +15,7 @@ use chrono;
#[cfg(all(not(feature = "std"), feature = "alloc"))] #[cfg(all(not(feature = "std"), feature = "alloc"))]
use alloc::{Vec, String, string::ToString}; use alloc::{Vec, String, string::ToString};
use fs::{FileSystem, FatType, ReadWriteSeek, decode_oem_char_lossy}; use fs::{FileSystem, FatType, ReadWriteSeek, OemCpConverter};
use file::File; use file::File;
use dir::{Dir, DirRawStream}; use dir::{Dir, DirRawStream};
@ -88,18 +88,20 @@ impl ShortName {
} }
#[cfg(feature = "alloc")] #[cfg(feature = "alloc")]
fn to_string(&self) -> String { fn to_string(&self, oem_cp_converter: &OemCpConverter) -> String {
// Strip non-ascii characters from short name // Strip non-ascii characters from short name
let char_iter = self.as_bytes().iter().cloned().map(decode_oem_char_lossy); let char_iter = self.as_bytes().iter().cloned().map(|c| oem_cp_converter.decode(c));
// Build string from character iterator // Build string from character iterator
String::from_iter(char_iter) String::from_iter(char_iter)
} }
fn eq_ignore_ascii_case(&self, name: &str) -> bool { fn eq_ignore_case(&self, name: &str, oem_cp_converter: &OemCpConverter) -> bool {
// Strip non-ascii characters from short name // Strip non-ascii characters from short name
let char_iter = self.as_bytes().iter().cloned().map(decode_oem_char_lossy).map(|c| c.to_ascii_uppercase()); let byte_iter = self.as_bytes().iter().cloned();
let char_iter = byte_iter.map(|c| oem_cp_converter.decode(c));
let uppercase_char_iter = char_iter.flat_map(|c| c.to_uppercase());
// Build string from character iterator // Build string from character iterator
char_iter.eq(name.chars().map(|c| c.to_ascii_uppercase())) uppercase_char_iter.eq(name.chars().flat_map(|c| c.to_uppercase()))
} }
} }
@ -139,7 +141,7 @@ impl DirFileEntryData {
} }
#[cfg(feature = "alloc")] #[cfg(feature = "alloc")]
fn lowercase_name(&self) -> String { fn lowercase_name(&self) -> ShortName {
let mut name_copy: [u8; 11] = self.name; let mut name_copy: [u8; 11] = self.name;
if self.lowercase_basename() { if self.lowercase_basename() {
for c in &mut name_copy[..8] { for c in &mut name_copy[..8] {
@ -151,7 +153,7 @@ impl DirFileEntryData {
*c = (*c as char).to_ascii_lowercase() as u8; *c = (*c as char).to_ascii_lowercase() as u8;
} }
} }
ShortName::new(&name_copy).to_string() ShortName::new(&name_copy)
} }
pub(crate) fn first_cluster(&self, fat_type: FatType) -> Option<u32> { pub(crate) fn first_cluster(&self, fat_type: FatType) -> Option<u32> {
@ -675,7 +677,7 @@ impl <'a, T: ReadWriteSeek> DirEntry<'a, T> {
/// Non-ASCII characters are replaced by the replacement character (U+FFFD). /// Non-ASCII characters are replaced by the replacement character (U+FFFD).
#[cfg(feature = "alloc")] #[cfg(feature = "alloc")]
pub fn short_file_name(&self) -> String { pub fn short_file_name(&self) -> String {
self.short_name.to_string() self.short_name.to_string(self.fs.options.oem_cp_converter)
} }
/// Returns short file name as byte array slice. /// Returns short file name as byte array slice.
@ -691,7 +693,7 @@ impl <'a, T: ReadWriteSeek> DirEntry<'a, T> {
if self.lfn.len() > 0 { if self.lfn.len() > 0 {
String::from_utf16_lossy(&self.lfn) String::from_utf16_lossy(&self.lfn)
} else { } else {
self.data.lowercase_name() self.data.lowercase_name().to_string(self.fs.options.oem_cp_converter)
} }
} }
@ -774,11 +776,16 @@ impl <'a, T: ReadWriteSeek> DirEntry<'a, T> {
#[cfg(feature = "alloc")] #[cfg(feature = "alloc")]
pub(crate) fn eq_name(&self, name: &str) -> bool { pub(crate) fn eq_name(&self, name: &str) -> bool {
self.file_name().eq_ignore_ascii_case(name) || self.short_name.eq_ignore_ascii_case(name) let self_name = self.file_name();
let self_name_lowercase_iter = self_name.chars().flat_map(|c| c.to_uppercase());
let other_name_lowercase_iter = name.chars().flat_map(|c| c.to_uppercase());
let long_name_matches = self_name_lowercase_iter.eq(other_name_lowercase_iter);
let short_name_matches = self.short_name.eq_ignore_case(name, self.fs.options.oem_cp_converter);
long_name_matches || short_name_matches
} }
#[cfg(not(feature = "alloc"))] #[cfg(not(feature = "alloc"))]
pub(crate) fn eq_name(&self, name: &str) -> bool { pub(crate) fn eq_name(&self, name: &str) -> bool {
self.short_name.eq_ignore_ascii_case(name) self.short_name.eq_ignore_case(name)
} }
} }
@ -791,23 +798,38 @@ impl <'a, T: ReadWriteSeek> fmt::Debug for DirEntry<'a, T> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use fs::LOSSY_OEM_CP_CONVERTER;
#[test] #[test]
fn short_name_with_ext() { fn short_name_with_ext() {
let mut raw_short_name = [0u8;11]; let mut raw_short_name = [0u8;11];
raw_short_name.copy_from_slice("FOO BAR".as_bytes()); raw_short_name.copy_from_slice("FOO BAR".as_bytes());
assert_eq!(ShortName::new(&raw_short_name).to_string(), "FOO.BAR"); assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "FOO.BAR");
raw_short_name.copy_from_slice("LOOK AT M E".as_bytes()); raw_short_name.copy_from_slice("LOOK AT M E".as_bytes());
assert_eq!(ShortName::new(&raw_short_name).to_string(), "LOOK AT.M E"); assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "LOOK AT.M E");
raw_short_name[0] = 0x99;
raw_short_name[10] = 0x99;
assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "\u{FFFD}OOK AT.M \u{FFFD}");
assert_eq!(ShortName::new(&raw_short_name).eq_ignore_case("\u{FFFD}OOK AT.M \u{FFFD}", &LOSSY_OEM_CP_CONVERTER), true);
} }
#[test] #[test]
fn short_name_without_ext() { fn short_name_without_ext() {
let mut raw_short_name = [0u8;11]; let mut raw_short_name = [0u8;11];
raw_short_name.copy_from_slice("FOO ".as_bytes()); raw_short_name.copy_from_slice("FOO ".as_bytes());
assert_eq!(ShortName::new(&raw_short_name).to_string(), "FOO"); assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "FOO");
raw_short_name.copy_from_slice("LOOK AT ".as_bytes()); raw_short_name.copy_from_slice("LOOK AT ".as_bytes());
assert_eq!(ShortName::new(&raw_short_name).to_string(), "LOOK AT"); assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "LOOK AT");
}
#[test]
fn short_name_eq_ignore_case() {
let mut raw_short_name = [0u8;11];
raw_short_name.copy_from_slice("LOOK AT M E".as_bytes());
raw_short_name[0] = 0x99;
raw_short_name[10] = 0x99;
assert_eq!(ShortName::new(&raw_short_name).eq_ignore_case("\u{FFFD}OOK AT.M \u{FFFD}", &LOSSY_OEM_CP_CONVERTER), true);
assert_eq!(ShortName::new(&raw_short_name).eq_ignore_case("\u{FFFD}ook AT.m \u{FFFD}", &LOSSY_OEM_CP_CONVERTER), true);
} }
#[test] #[test]
@ -825,12 +847,12 @@ mod tests {
reserved_0: (1 << 3) | (1 << 4), reserved_0: (1 << 3) | (1 << 4),
..Default::default() ..Default::default()
}; };
assert_eq!(raw_entry.lowercase_name(), "foo.rs"); assert_eq!(raw_entry.lowercase_name().to_string(&LOSSY_OEM_CP_CONVERTER), "foo.rs");
raw_entry.reserved_0 = 1 << 3; raw_entry.reserved_0 = 1 << 3;
assert_eq!(raw_entry.lowercase_name(), "foo.RS"); assert_eq!(raw_entry.lowercase_name().to_string(&LOSSY_OEM_CP_CONVERTER), "foo.RS");
raw_entry.reserved_0 = 1 << 4; raw_entry.reserved_0 = 1 << 4;
assert_eq!(raw_entry.lowercase_name(), "FOO.rs"); assert_eq!(raw_entry.lowercase_name().to_string(&LOSSY_OEM_CP_CONVERTER), "FOO.rs");
raw_entry.reserved_0 = 0; raw_entry.reserved_0 = 0;
assert_eq!(raw_entry.lowercase_name(), "FOO.RS"); assert_eq!(raw_entry.lowercase_name().to_string(&LOSSY_OEM_CP_CONVERTER), "FOO.RS");
} }
} }

View File

@ -298,9 +298,10 @@ impl FsInfoSector {
/// A FAT filesystem mount options. /// A FAT filesystem mount options.
/// ///
/// Options are specified as an argument for `FileSystem::new` method. /// Options are specified as an argument for `FileSystem::new` method.
#[derive(Copy, Clone, Debug)] //#[derive(Copy, Clone, Debug)]
pub struct FsOptions { pub struct FsOptions {
pub(crate) update_accessed_date: bool, pub(crate) update_accessed_date: bool,
pub(crate) oem_cp_converter: &'static OemCpConverter,
} }
impl FsOptions { impl FsOptions {
@ -308,6 +309,7 @@ impl FsOptions {
pub fn new() -> Self { pub fn new() -> Self {
FsOptions { FsOptions {
update_accessed_date: false, update_accessed_date: false,
oem_cp_converter: &LOSSY_OEM_CP_CONVERTER,
} }
} }
@ -316,6 +318,12 @@ impl FsOptions {
self.update_accessed_date = enabled; self.update_accessed_date = enabled;
self self
} }
/// Changes default OEM code page encoder-decoder.
pub fn oem_cp_converter(mut self, oem_cp_converter: &'static OemCpConverter) -> Self {
self.oem_cp_converter = oem_cp_converter;
self
}
} }
/// A FAT volume statistics. /// A FAT volume statistics.
@ -440,8 +448,9 @@ impl <T: ReadWriteSeek> FileSystem<T> {
/// Only label from BPB is used. /// Only label from BPB is used.
#[cfg(feature = "alloc")] #[cfg(feature = "alloc")]
pub fn volume_label(&self) -> String { pub fn volume_label(&self) -> String {
// Strip non-ascii characters from volume label // Decode volume label from OEM codepage
let char_iter = self.volume_label_as_bytes().iter().cloned().map(decode_oem_char_lossy); let volume_label_iter = self.volume_label_as_bytes().iter().cloned();
let char_iter = volume_label_iter.map(|c| self.options.oem_cp_converter.decode(c));
// Build string from character iterator // Build string from character iterator
String::from_iter(char_iter) String::from_iter(char_iter)
} }
@ -682,6 +691,35 @@ impl <'a, T: ReadWriteSeek> Seek for DiskSlice<'a, T> {
} }
} }
pub(crate) fn decode_oem_char_lossy(oem_char: u8) -> char { /// An OEM code page encoder/decoder.
if oem_char < 0x80 { oem_char as char } else { '\u{FFFD}' } ///
/// Provides a custom implementation for a short name encoding/decoding.
/// Default implementation changes all non-ASCII characters to the replacement character (U+FFFD).
pub trait OemCpConverter {
fn decode(&self, oem_char: u8) -> char;
fn encode(&self, uni_char: char) -> Option<u8>;
} }
#[derive(Clone)]
pub(crate) struct LossyOemCpConverter {
_dummy: (),
}
impl OemCpConverter for LossyOemCpConverter {
fn decode(&self, oem_char: u8) -> char {
if oem_char <= 0x7F {
oem_char as char
} else {
'\u{FFFD}'
}
}
fn encode(&self, uni_char: char) -> Option<u8> {
if uni_char <= '\x7F' {
Some(uni_char as u8)
} else {
None
}
}
}
pub(crate) static LOSSY_OEM_CP_CONVERTER: LossyOemCpConverter = LossyOemCpConverter { _dummy: () };