Add oem_cp_converter option allowing to provide custom short name decoder

Encoder is not yet used but will be in future.
This is implemented as static reference for now to avoid adding additional
type parameters to all main types. It should be enough for most of cases
where encoder/decoder does not have any state and can be implemented as
static variable.
This commit is contained in:
Rafał Harabień 2018-06-25 23:51:09 +02:00
parent cd7e77e1b4
commit 4528aedc6e
3 changed files with 85 additions and 26 deletions

View File

@ -1,6 +1,5 @@
TODO
====
* proper support for short name decoding from the OEM codepage
* marking volume dirty on first write and not-dirty on unmount
* support for a volume label file in the root directory
* format volume API

View File

@ -15,7 +15,7 @@ use chrono;
#[cfg(all(not(feature = "std"), feature = "alloc"))]
use alloc::{Vec, String, string::ToString};
use fs::{FileSystem, FatType, ReadWriteSeek, decode_oem_char_lossy};
use fs::{FileSystem, FatType, ReadWriteSeek, OemCpConverter};
use file::File;
use dir::{Dir, DirRawStream};
@ -88,18 +88,20 @@ impl ShortName {
}
#[cfg(feature = "alloc")]
fn to_string(&self) -> String {
fn to_string(&self, oem_cp_converter: &OemCpConverter) -> String {
// Strip non-ascii characters from short name
let char_iter = self.as_bytes().iter().cloned().map(decode_oem_char_lossy);
let char_iter = self.as_bytes().iter().cloned().map(|c| oem_cp_converter.decode(c));
// Build string from character iterator
String::from_iter(char_iter)
}
fn eq_ignore_ascii_case(&self, name: &str) -> bool {
fn eq_ignore_case(&self, name: &str, oem_cp_converter: &OemCpConverter) -> bool {
// Strip non-ascii characters from short name
let char_iter = self.as_bytes().iter().cloned().map(decode_oem_char_lossy).map(|c| c.to_ascii_uppercase());
let byte_iter = self.as_bytes().iter().cloned();
let char_iter = byte_iter.map(|c| oem_cp_converter.decode(c));
let uppercase_char_iter = char_iter.flat_map(|c| c.to_uppercase());
// Build string from character iterator
char_iter.eq(name.chars().map(|c| c.to_ascii_uppercase()))
uppercase_char_iter.eq(name.chars().flat_map(|c| c.to_uppercase()))
}
}
@ -139,7 +141,7 @@ impl DirFileEntryData {
}
#[cfg(feature = "alloc")]
fn lowercase_name(&self) -> String {
fn lowercase_name(&self) -> ShortName {
let mut name_copy: [u8; 11] = self.name;
if self.lowercase_basename() {
for c in &mut name_copy[..8] {
@ -151,7 +153,7 @@ impl DirFileEntryData {
*c = (*c as char).to_ascii_lowercase() as u8;
}
}
ShortName::new(&name_copy).to_string()
ShortName::new(&name_copy)
}
pub(crate) fn first_cluster(&self, fat_type: FatType) -> Option<u32> {
@ -675,7 +677,7 @@ impl <'a, T: ReadWriteSeek> DirEntry<'a, T> {
/// Non-ASCII characters are replaced by the replacement character (U+FFFD).
#[cfg(feature = "alloc")]
pub fn short_file_name(&self) -> String {
self.short_name.to_string()
self.short_name.to_string(self.fs.options.oem_cp_converter)
}
/// Returns short file name as byte array slice.
@ -691,7 +693,7 @@ impl <'a, T: ReadWriteSeek> DirEntry<'a, T> {
if self.lfn.len() > 0 {
String::from_utf16_lossy(&self.lfn)
} else {
self.data.lowercase_name()
self.data.lowercase_name().to_string(self.fs.options.oem_cp_converter)
}
}
@ -774,11 +776,16 @@ impl <'a, T: ReadWriteSeek> DirEntry<'a, T> {
#[cfg(feature = "alloc")]
pub(crate) fn eq_name(&self, name: &str) -> bool {
self.file_name().eq_ignore_ascii_case(name) || self.short_name.eq_ignore_ascii_case(name)
let self_name = self.file_name();
let self_name_lowercase_iter = self_name.chars().flat_map(|c| c.to_uppercase());
let other_name_lowercase_iter = name.chars().flat_map(|c| c.to_uppercase());
let long_name_matches = self_name_lowercase_iter.eq(other_name_lowercase_iter);
let short_name_matches = self.short_name.eq_ignore_case(name, self.fs.options.oem_cp_converter);
long_name_matches || short_name_matches
}
#[cfg(not(feature = "alloc"))]
pub(crate) fn eq_name(&self, name: &str) -> bool {
self.short_name.eq_ignore_ascii_case(name)
self.short_name.eq_ignore_case(name)
}
}
@ -791,23 +798,38 @@ impl <'a, T: ReadWriteSeek> fmt::Debug for DirEntry<'a, T> {
#[cfg(test)]
mod tests {
use super::*;
use fs::LOSSY_OEM_CP_CONVERTER;
#[test]
fn short_name_with_ext() {
let mut raw_short_name = [0u8;11];
raw_short_name.copy_from_slice("FOO BAR".as_bytes());
assert_eq!(ShortName::new(&raw_short_name).to_string(), "FOO.BAR");
assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "FOO.BAR");
raw_short_name.copy_from_slice("LOOK AT M E".as_bytes());
assert_eq!(ShortName::new(&raw_short_name).to_string(), "LOOK AT.M E");
assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "LOOK AT.M E");
raw_short_name[0] = 0x99;
raw_short_name[10] = 0x99;
assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "\u{FFFD}OOK AT.M \u{FFFD}");
assert_eq!(ShortName::new(&raw_short_name).eq_ignore_case("\u{FFFD}OOK AT.M \u{FFFD}", &LOSSY_OEM_CP_CONVERTER), true);
}
#[test]
fn short_name_without_ext() {
let mut raw_short_name = [0u8;11];
raw_short_name.copy_from_slice("FOO ".as_bytes());
assert_eq!(ShortName::new(&raw_short_name).to_string(), "FOO");
assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "FOO");
raw_short_name.copy_from_slice("LOOK AT ".as_bytes());
assert_eq!(ShortName::new(&raw_short_name).to_string(), "LOOK AT");
assert_eq!(ShortName::new(&raw_short_name).to_string(&LOSSY_OEM_CP_CONVERTER), "LOOK AT");
}
#[test]
fn short_name_eq_ignore_case() {
let mut raw_short_name = [0u8;11];
raw_short_name.copy_from_slice("LOOK AT M E".as_bytes());
raw_short_name[0] = 0x99;
raw_short_name[10] = 0x99;
assert_eq!(ShortName::new(&raw_short_name).eq_ignore_case("\u{FFFD}OOK AT.M \u{FFFD}", &LOSSY_OEM_CP_CONVERTER), true);
assert_eq!(ShortName::new(&raw_short_name).eq_ignore_case("\u{FFFD}ook AT.m \u{FFFD}", &LOSSY_OEM_CP_CONVERTER), true);
}
#[test]
@ -825,12 +847,12 @@ mod tests {
reserved_0: (1 << 3) | (1 << 4),
..Default::default()
};
assert_eq!(raw_entry.lowercase_name(), "foo.rs");
assert_eq!(raw_entry.lowercase_name().to_string(&LOSSY_OEM_CP_CONVERTER), "foo.rs");
raw_entry.reserved_0 = 1 << 3;
assert_eq!(raw_entry.lowercase_name(), "foo.RS");
assert_eq!(raw_entry.lowercase_name().to_string(&LOSSY_OEM_CP_CONVERTER), "foo.RS");
raw_entry.reserved_0 = 1 << 4;
assert_eq!(raw_entry.lowercase_name(), "FOO.rs");
assert_eq!(raw_entry.lowercase_name().to_string(&LOSSY_OEM_CP_CONVERTER), "FOO.rs");
raw_entry.reserved_0 = 0;
assert_eq!(raw_entry.lowercase_name(), "FOO.RS");
assert_eq!(raw_entry.lowercase_name().to_string(&LOSSY_OEM_CP_CONVERTER), "FOO.RS");
}
}

View File

@ -298,9 +298,10 @@ impl FsInfoSector {
/// A FAT filesystem mount options.
///
/// Options are specified as an argument for `FileSystem::new` method.
#[derive(Copy, Clone, Debug)]
//#[derive(Copy, Clone, Debug)]
pub struct FsOptions {
pub(crate) update_accessed_date: bool,
pub(crate) oem_cp_converter: &'static OemCpConverter,
}
impl FsOptions {
@ -308,6 +309,7 @@ impl FsOptions {
pub fn new() -> Self {
FsOptions {
update_accessed_date: false,
oem_cp_converter: &LOSSY_OEM_CP_CONVERTER,
}
}
@ -316,6 +318,12 @@ impl FsOptions {
self.update_accessed_date = enabled;
self
}
/// Changes default OEM code page encoder-decoder.
pub fn oem_cp_converter(mut self, oem_cp_converter: &'static OemCpConverter) -> Self {
self.oem_cp_converter = oem_cp_converter;
self
}
}
/// A FAT volume statistics.
@ -440,8 +448,9 @@ impl <T: ReadWriteSeek> FileSystem<T> {
/// Only label from BPB is used.
#[cfg(feature = "alloc")]
pub fn volume_label(&self) -> String {
// Strip non-ascii characters from volume label
let char_iter = self.volume_label_as_bytes().iter().cloned().map(decode_oem_char_lossy);
// Decode volume label from OEM codepage
let volume_label_iter = self.volume_label_as_bytes().iter().cloned();
let char_iter = volume_label_iter.map(|c| self.options.oem_cp_converter.decode(c));
// Build string from character iterator
String::from_iter(char_iter)
}
@ -682,6 +691,35 @@ impl <'a, T: ReadWriteSeek> Seek for DiskSlice<'a, T> {
}
}
pub(crate) fn decode_oem_char_lossy(oem_char: u8) -> char {
if oem_char < 0x80 { oem_char as char } else { '\u{FFFD}' }
/// An OEM code page encoder/decoder.
///
/// Provides a custom implementation for a short name encoding/decoding.
/// Default implementation changes all non-ASCII characters to the replacement character (U+FFFD).
pub trait OemCpConverter {
fn decode(&self, oem_char: u8) -> char;
fn encode(&self, uni_char: char) -> Option<u8>;
}
#[derive(Clone)]
pub(crate) struct LossyOemCpConverter {
_dummy: (),
}
impl OemCpConverter for LossyOemCpConverter {
fn decode(&self, oem_char: u8) -> char {
if oem_char <= 0x7F {
oem_char as char
} else {
'\u{FFFD}'
}
}
fn encode(&self, uni_char: char) -> Option<u8> {
if uni_char <= '\x7F' {
Some(uni_char as u8)
} else {
None
}
}
}
pub(crate) static LOSSY_OEM_CP_CONVERTER: LossyOemCpConverter = LossyOemCpConverter { _dummy: () };