firmware/rpc_proto: Fix size/alignment calculation for structs with tail padding

Also factors out duplicate code for (de)serializing
elements of lists and ndarrays, and replaces the rounding
calculations by the well-known, much faster power-of-two-only
bit-twiddling version.

GitHub: Fixes #1934.
This commit is contained in:
David Nadlinger 2022-11-10 02:03:47 +00:00 committed by Sébastien Bourdeauducq
parent 6caa779c74
commit 8740ec3dd5
2 changed files with 174 additions and 140 deletions

View File

@ -6,22 +6,81 @@ use io::{ProtoRead, Read, Write, ProtoWrite, Error};
use self::tag::{Tag, TagIterator, split_tag}; use self::tag::{Tag, TagIterator, split_tag};
#[inline] #[inline]
fn alignment_offset(alignment: isize, ptr: isize) -> isize { fn round_up(val: usize, power_of_two: usize) -> usize {
(-ptr).rem_euclid(alignment) assert!(power_of_two.is_power_of_two());
let max_rem = power_of_two - 1;
(val + max_rem) & (!max_rem)
} }
#[inline]
unsafe fn round_up_mut<T>(ptr: *mut T, power_of_two: usize) -> *mut T {
round_up(ptr as usize, power_of_two) as *mut T
}
#[inline]
unsafe fn round_up_const<T>(ptr: *const T, power_of_two: usize) -> *const T {
round_up(ptr as usize, power_of_two) as *const T
}
#[inline]
unsafe fn align_ptr<T>(ptr: *const ()) -> *const T { unsafe fn align_ptr<T>(ptr: *const ()) -> *const T {
let alignment = core::mem::align_of::<T>() as isize; round_up_const(ptr, core::mem::align_of::<T>()) as *const T
let fix = alignment_offset(alignment as isize, ptr as isize);
((ptr as isize) + fix) as *const T
} }
#[inline]
unsafe fn align_ptr_mut<T>(ptr: *mut ()) -> *mut T { unsafe fn align_ptr_mut<T>(ptr: *mut ()) -> *mut T {
let alignment = core::mem::align_of::<T>() as isize; round_up_mut(ptr, core::mem::align_of::<T>()) as *mut T
let fix = alignment_offset(alignment as isize, ptr as isize);
((ptr as isize) + fix) as *mut T
} }
/// Reads (deserializes) `length` array or list elements of type `tag` from `reader`,
/// writing them into the buffer given by `storage`.
///
/// `alloc` is used for nested allocations (if elements themselves contain
/// lists/arrays), see [recv_value].
unsafe fn recv_elements<R, E>(
reader: &mut R,
tag: Tag,
length: usize,
storage: *mut (),
alloc: &dyn Fn(usize) -> Result<*mut (), E>,
) -> Result<(), E>
where
R: Read + ?Sized,
E: From<Error<R::ReadError>>,
{
// List of simple types are special-cased in the protocol for performance.
match tag {
Tag::Bool => {
let dest = slice::from_raw_parts_mut(storage as *mut u8, length);
reader.read_exact(dest)?;
},
Tag::Int32 => {
let dest = slice::from_raw_parts_mut(storage as *mut u8, length * 4);
reader.read_exact(dest)?;
let dest = slice::from_raw_parts_mut(storage as *mut i32, length);
NativeEndian::from_slice_i32(dest);
},
Tag::Int64 | Tag::Float64 => {
let dest = slice::from_raw_parts_mut(storage as *mut u8, length * 8);
reader.read_exact(dest)?;
let dest = slice::from_raw_parts_mut(storage as *mut i64, length);
NativeEndian::from_slice_i64(dest);
},
_ => {
let mut data = storage;
for _ in 0..length {
recv_value(reader, tag, &mut data, alloc)?
}
}
}
Ok(())
}
/// Reads (deserializes) a value of type `tag` from `reader`, writing the results to
/// the kernel-side buffer `data` (the passed pointer to which is incremented to point
/// past the just-received data). For nested allocations (lists/arrays), `alloc` is
/// invoked any number of times with the size of the required allocation as a parameter
/// (which is assumed to be correctly aligned for all payload types).
unsafe fn recv_value<R, E>(reader: &mut R, tag: Tag, data: &mut *mut (), unsafe fn recv_value<R, E>(reader: &mut R, tag: Tag, data: &mut *mut (),
alloc: &dyn Fn(usize) -> Result<*mut (), E>) alloc: &dyn Fn(usize) -> Result<*mut (), E>)
-> Result<(), E> -> Result<(), E>
@ -59,99 +118,63 @@ unsafe fn recv_value<R, E>(reader: &mut R, tag: Tag, data: &mut *mut (),
}) })
} }
Tag::Tuple(it, arity) => { Tag::Tuple(it, arity) => {
*data = data.offset(alignment_offset(tag.alignment() as isize, *data as isize)); let alignment = tag.alignment();
*data = round_up_mut(*data, alignment);
let mut it = it.clone(); let mut it = it.clone();
for _ in 0..arity { for _ in 0..arity {
let tag = it.next().expect("truncated tag"); let tag = it.next().expect("truncated tag");
recv_value(reader, tag, data, alloc)? recv_value(reader, tag, data, alloc)?
} }
// Take into account any tail padding (if element(s) with largest alignment
// are not at the end).
*data = round_up_mut(*data, alignment);
Ok(()) Ok(())
} }
Tag::List(it) => { Tag::List(it) => {
#[repr(C)] #[repr(C)]
struct List { elements: *mut (), length: u32 } struct List { elements: *mut (), length: usize }
consume_value!(*mut List, |ptr| { consume_value!(*mut List, |ptr_to_list| {
let tag = it.clone().next().expect("truncated tag"); let tag = it.clone().next().expect("truncated tag");
let padding = if let Tag::Int64 | Tag::Float64 = tag { 4 } else { 0 };
let length = reader.read_u32()? as usize; let length = reader.read_u32()? as usize;
let data = alloc(tag.size() * length + padding + 8)? as *mut u8;
*ptr = data as *mut List;
let ptr = data as *mut List;
let mut data = data.offset(8 + alignment_offset(tag.alignment() as isize, data as isize)) as *mut ();
(*ptr).length = length as u32; // To avoid multiple kernel CPU roundtrips, use a single allocation for
(*ptr).elements = data; // both the pointer/length List (slice) and the backing storage for the
match tag { // elements. We can assume that alloc() is aligned suitably, so just
Tag::Bool => { // need to take into account any extra padding required.
let dest = slice::from_raw_parts_mut(data as *mut u8, length); // (Note: On RISC-V, there will never actually be any types with
reader.read_exact(dest)?; // alignment larger than 8 bytes, so storage_offset == 0 always.)
}, let list_size = 4 + 4;
Tag::Int32 => { let storage_offset = round_up(list_size, tag.alignment());
let dest = slice::from_raw_parts_mut(data as *mut u8, length * 4); let storage_size = tag.size() * length;
reader.read_exact(dest)?;
let dest = slice::from_raw_parts_mut(data as *mut i32, length); let allocation = alloc(storage_offset as usize + storage_size)? as *mut u8;
NativeEndian::from_slice_i32(dest); *ptr_to_list = allocation as *mut List;
}, let storage = allocation.offset(storage_offset as isize) as *mut ();
Tag::Int64 | Tag::Float64 => {
let dest = slice::from_raw_parts_mut(data as *mut u8, length * 8); (**ptr_to_list).length = length;
reader.read_exact(dest)?; (**ptr_to_list).elements = storage;
let dest = slice::from_raw_parts_mut(data as *mut i64, length); recv_elements(reader, tag, length, storage, alloc)
NativeEndian::from_slice_i64(dest);
},
_ => {
for _ in 0..length {
recv_value(reader, tag, &mut data, alloc)?
}
}
}
Ok(())
}) })
} }
Tag::Array(it, num_dims) => { Tag::Array(it, num_dims) => {
consume_value!(*mut (), |buffer| { consume_value!(*mut (), |buffer| {
let mut total_len: u32 = 1; // Deserialize length along each dimension and compute total number of
// elements.
let mut total_len: usize = 1;
for _ in 0..num_dims { for _ in 0..num_dims {
let len = reader.read_u32()?; let len = reader.read_u32()? as usize;
total_len *= len; total_len *= len;
consume_value!(u32, |ptr| *ptr = len ) consume_value!(usize, |ptr| *ptr = len )
} }
let length = total_len as usize;
// Allocate backing storage for elements; deserialize them.
let elt_tag = it.clone().next().expect("truncated tag"); let elt_tag = it.clone().next().expect("truncated tag");
let padding = if let Tag::Int64 | Tag::Float64 = tag { 4 } else { 0 }; *buffer = alloc(elt_tag.size() * total_len)?;
let mut data = alloc(elt_tag.size() * length + padding)?; recv_elements(reader, tag, total_len, *buffer, alloc)
data = data.offset(alignment_offset(tag.alignment() as isize, data as isize));
*buffer = data;
match elt_tag {
Tag::Bool => {
let dest = slice::from_raw_parts_mut(data as *mut u8, length);
reader.read_exact(dest)?;
},
Tag::Int32 => {
let dest = slice::from_raw_parts_mut(data as *mut u8, length * 4);
reader.read_exact(dest)?;
let dest = slice::from_raw_parts_mut(data as *mut i32, length);
NativeEndian::from_slice_i32(dest);
},
Tag::Int64 | Tag::Float64 => {
let dest = slice::from_raw_parts_mut(data as *mut u8, length * 8);
reader.read_exact(dest)?;
let dest = slice::from_raw_parts_mut(data as *mut i64, length);
NativeEndian::from_slice_i64(dest);
},
_ => {
for _ in 0..length {
recv_value(reader, elt_tag, &mut data, alloc)?
}
}
}
Ok(())
}) })
} }
Tag::Range(it) => { Tag::Range(it) => {
*data = data.offset(alignment_offset(tag.alignment() as isize, *data as isize)); *data = round_up_mut(*data, tag.alignment());
let tag = it.clone().next().expect("truncated tag"); let tag = it.clone().next().expect("truncated tag");
recv_value(reader, tag, data, alloc)?; recv_value(reader, tag, data, alloc)?;
recv_value(reader, tag, data, alloc)?; recv_value(reader, tag, data, alloc)?;
@ -180,6 +203,36 @@ pub fn recv_return<R, E>(reader: &mut R, tag_bytes: &[u8], data: *mut (),
Ok(()) Ok(())
} }
unsafe fn send_elements<W>(writer: &mut W, elt_tag: Tag, length: usize, data: *const ())
-> Result<(), Error<W::WriteError>>
where W: Write + ?Sized
{
writer.write_u8(elt_tag.as_u8())?;
match elt_tag {
// we cannot use NativeEndian::from_slice_i32 as the data is not mutable,
// and that is not needed as the data is already in native endian
Tag::Bool => {
let slice = slice::from_raw_parts(data as *const u8, length);
writer.write_all(slice)?;
},
Tag::Int32 => {
let slice = slice::from_raw_parts(data as *const u8, length * 4);
writer.write_all(slice)?;
},
Tag::Int64 | Tag::Float64 => {
let slice = slice::from_raw_parts(data as *const u8, length * 8);
writer.write_all(slice)?;
},
_ => {
let mut data = data;
for _ in 0..length {
send_value(writer, elt_tag, &mut data)?;
}
}
}
Ok(())
}
unsafe fn send_value<W>(writer: &mut W, tag: Tag, data: &mut *const ()) unsafe fn send_value<W>(writer: &mut W, tag: Tag, data: &mut *const ())
-> Result<(), Error<W::WriteError>> -> Result<(), Error<W::WriteError>>
where W: Write + ?Sized where W: Write + ?Sized
@ -213,10 +266,13 @@ unsafe fn send_value<W>(writer: &mut W, tag: Tag, data: &mut *const ())
Tag::Tuple(it, arity) => { Tag::Tuple(it, arity) => {
let mut it = it.clone(); let mut it = it.clone();
writer.write_u8(arity)?; writer.write_u8(arity)?;
let mut max_alignment = 0;
for _ in 0..arity { for _ in 0..arity {
let tag = it.next().expect("truncated tag"); let tag = it.next().expect("truncated tag");
max_alignment = core::cmp::max(max_alignment, tag.alignment());
send_value(writer, tag, data)? send_value(writer, tag, data)?
} }
*data = round_up_const(*data, max_alignment);
Ok(()) Ok(())
} }
Tag::List(it) => { Tag::List(it) => {
@ -226,30 +282,7 @@ unsafe fn send_value<W>(writer: &mut W, tag: Tag, data: &mut *const ())
let length = (**ptr).length as usize; let length = (**ptr).length as usize;
writer.write_u32((**ptr).length)?; writer.write_u32((**ptr).length)?;
let tag = it.clone().next().expect("truncated tag"); let tag = it.clone().next().expect("truncated tag");
let mut data = (**ptr).elements; send_elements(writer, tag, length, (**ptr).elements)
writer.write_u8(tag.as_u8())?;
match tag {
// we cannot use NativeEndian::from_slice_i32 as the data is not mutable,
// and that is not needed as the data is already in native endian
Tag::Bool => {
let slice = slice::from_raw_parts(data as *const u8, length);
writer.write_all(slice)?;
},
Tag::Int32 => {
let slice = slice::from_raw_parts(data as *const u8, length * 4);
writer.write_all(slice)?;
},
Tag::Int64 | Tag::Float64 => {
let slice = slice::from_raw_parts(data as *const u8, length * 8);
writer.write_all(slice)?;
},
_ => {
for _ in 0..length {
send_value(writer, tag, &mut data)?;
}
}
}
Ok(())
}) })
} }
Tag::Array(it, num_dims) => { Tag::Array(it, num_dims) => {
@ -265,30 +298,7 @@ unsafe fn send_value<W>(writer: &mut W, tag: Tag, data: &mut *const ())
}) })
} }
let length = total_len as usize; let length = total_len as usize;
let mut data = *buffer; send_elements(writer, elt_tag, length, *buffer)
writer.write_u8(elt_tag.as_u8())?;
match elt_tag {
// we cannot use NativeEndian::from_slice_i32 as the data is not mutable,
// and that is not needed as the data is already in native endian
Tag::Bool => {
let slice = slice::from_raw_parts(data as *const u8, length);
writer.write_all(slice)?;
},
Tag::Int32 => {
let slice = slice::from_raw_parts(data as *const u8, length * 4);
writer.write_all(slice)?;
},
Tag::Int64 | Tag::Float64 => {
let slice = slice::from_raw_parts(data as *const u8, length * 8);
writer.write_all(slice)?;
},
_ => {
for _ in 0..length {
send_value(writer, elt_tag, &mut data)?;
}
}
}
Ok(())
}) })
} }
Tag::Range(it) => { Tag::Range(it) => {
@ -349,7 +359,7 @@ pub fn send_args<W>(writer: &mut W, service: u32, tag_bytes: &[u8], data: *const
mod tag { mod tag {
use core::fmt; use core::fmt;
use super::alignment_offset; use super::round_up;
pub fn split_tag(tag_bytes: &[u8]) -> (&[u8], &[u8]) { pub fn split_tag(tag_bytes: &[u8]) -> (&[u8], &[u8]) {
let tag_separator = let tag_separator =
@ -416,16 +426,18 @@ mod tag {
let it = it.clone(); let it = it.clone();
it.take(3).map(|t| t.alignment()).max().unwrap() it.take(3).map(|t| t.alignment()).max().unwrap()
} }
// CSlice basically // the ptr/length(s) pair is basically CSlice
Tag::Bytes | Tag::String | Tag::ByteArray | Tag::List(_) => Tag::Bytes | Tag::String | Tag::ByteArray | Tag::List(_) | Tag::Array(_, _) =>
core::mem::align_of::<CSlice<()>>(), core::mem::align_of::<CSlice<()>>(),
// array buffer is allocated, so no need for alignment first
Tag::Array(_, _) => 1,
// will not be sent from the host // will not be sent from the host
_ => unreachable!("unexpected tag from host") _ => unreachable!("unexpected tag from host")
} }
} }
/// Returns the "alignment size" of a value with the type described by the tag
/// (in bytes), i.e. the stride between successive elements in a list/array of
/// the given type, or the offset from a struct element of this type to the
/// next field.
pub fn size(self) -> usize { pub fn size(self) -> usize {
match self { match self {
Tag::None => 0, Tag::None => 0,
@ -438,12 +450,18 @@ mod tag {
Tag::ByteArray => 8, Tag::ByteArray => 8,
Tag::Tuple(it, arity) => { Tag::Tuple(it, arity) => {
let mut size = 0; let mut size = 0;
let mut max_alignment = 0;
let mut it = it.clone(); let mut it = it.clone();
for _ in 0..arity { for _ in 0..arity {
let tag = it.next().expect("truncated tag"); let tag = it.next().expect("truncated tag");
let alignment = tag.alignment();
max_alignment = core::cmp::max(max_alignment, alignment);
size = round_up(size, alignment);
size += tag.size(); size += tag.size();
size += alignment_offset(tag.alignment() as isize, size as isize) as usize;
} }
// Take into account any tail padding (if element(s) with largest
// alignment are not at the end).
size = round_up(size, max_alignment);
size size
} }
Tag::List(_) => 8, Tag::List(_) => 8,

View File

@ -78,7 +78,10 @@ class RoundtripTest(ExperimentCase):
self.assertRoundtrip(([1, 2], [3, 4])) self.assertRoundtrip(([1, 2], [3, 4]))
def test_list_mixed_tuple(self): def test_list_mixed_tuple(self):
self.assertRoundtrip([(0x12345678, [("foo", [0.0, 1.0], [0, 1])])]) self.assertRoundtrip([
(0x12345678, [("foo", [0.0, 1.0], [0, 1])]),
(0x23456789, [("bar", [2.0, 3.0], [2, 3])])])
self.assertRoundtrip([(0, 1.0, 0), (1, 1.5, 2), (2, 1.9, 4)])
def test_array_1d(self): def test_array_1d(self):
self.assertArrayRoundtrip(numpy.array([True, False])) self.assertArrayRoundtrip(numpy.array([True, False]))
@ -520,19 +523,32 @@ class NumpyBoolTest(ExperimentCase):
class _Alignment(EnvExperiment): class _Alignment(EnvExperiment):
def build(self): def build(self):
self.setattr_device("core") self.setattr_device("core")
self.a = False
self.b = 1234.5678
self.c = True
self.d = True
self.e = 2345.6789
self.f = False
@rpc @rpc
def a_tuple(self) -> TList(TTuple([TBool, TFloat, TBool])): def get_tuples(self) -> TList(TTuple([TBool, TFloat, TBool])):
return [(True, 1234.5678, True)] return [(self.a, self.b, self.c), (self.d, self.e, self.f)]
@kernel @kernel
def run(self): def run(self):
a, b, c = self.a_tuple()[0] # Run two RPCs before checking to catch any obvious allocation size calculation
d, e, f = self.a_tuple()[0] # issues (i.e. use of uninitialised stack memory).
assert a == d tuples0 = self.get_tuples()
assert b == e tuples1 = self.get_tuples()
assert c == f for tuples in [tuples0, tuples1]:
return 0 a, b, c = tuples[0]
d, e, f = tuples[1]
assert a == self.a
assert b == self.b
assert c == self.c
assert d == self.d
assert e == self.e
assert f == self.f
class AlignmentTest(ExperimentCase): class AlignmentTest(ExperimentCase):