Merge pull request #1031 from dimforge/rust-cuda

Add support for rust-CUDA
This commit is contained in:
Sébastien Crozet 2021-11-25 16:16:37 +01:00 committed by GitHub
commit e8c6a7c0a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 147 additions and 98 deletions

View File

@ -36,14 +36,20 @@ jobs:
run: cargo build;
- name: Build --features serde-serialize
run: cargo build --features serde-serialize
- name: Build --all-features
run: cargo build --all-features;
- name: Build nalgebra-glm
run: cargo build -p nalgebra-glm --all-features;
- name: Build nalgebra-lapack
run: cd nalgebra-lapack; cargo build;
- name: Build nalgebra-sparse
run: cd nalgebra-sparse; cargo build;
# Run this on its own job because it alone takes a lot of time.
# So its best to let it run in parallel to the other jobs.
build-nalgebra-all-features:
runs-on: ubuntu-latest
steps:
# Needed because the --all-features build which enables cuda support.
- uses: Jimver/cuda-toolkit@v0.2.4
- uses: actions/checkout@v2
- run: cargo build --all-features;
- run: cargo build -p nalgebra-glm --all-features;
test-nalgebra:
runs-on: ubuntu-latest
# env:
@ -110,3 +116,16 @@ jobs:
run: xargo build --verbose --no-default-features -p nalgebra-glm --target=x86_64-unknown-linux-gnu;
- name: build thumbv7em-none-eabihf nalgebra-glm
run: xargo build --verbose --no-default-features -p nalgebra-glm --target=thumbv7em-none-eabihf;
build-cuda:
runs-on: ubuntu-latest
steps:
- uses: Jimver/cuda-toolkit@v0.2.4
- name: Install nightly-2021-10-17
uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2021-10-17
override: true
- uses: actions/checkout@v2
- run: rustup target add nvptx64-nvidia-cuda
- run: cargo build --no-default-features --features cuda
- run: cargo build --no-default-features --features cuda --target=nvptx64-nvidia-cuda

View File

@ -4,17 +4,31 @@ documented here.
This project adheres to [Semantic Versioning](https://semver.org/).
## [0.29.1] - WIP
## [0.30.0]
### Breaking changes
- The `Dim` trait is now marked as unsafe.
### Modified
- Use more concise debug impls for matrices and geometric transformation types.
### Added
- The conversion trait `From<Vec<T>>` and method `from_vec_storage` for `RowDVector`. See [#975](https://github.com/dimforge/nalgebra/issues/975)
- Added the conversion trait `From<Vec<T>>` and method `from_vec_storage` for `RowDVector`.
- Added implementation of `From` and `Into` for converting between `nalgebra` types and types from
`glam 0.18`. These can be enabled by enabling the `convert-glam018` cargo features.
- Added the methods `Matrix::product`, `::row_product`, `::row_product_tr`, and `::column_product` to compute the
product of the components, rows, or columns, of a single matrix or vector.
- The `Default` trait is now implemented for most geometric types: `Point`, `Isometry`, `Rotation`, `Similarity`,
`Transform`, `UnitComplex`, and `UnitQuaternion`.
- Added the `Scale` geometric type for representing non-uniform scaling.
- `nalgebra-sparse`: provide constructors for unsorted but otherwise valid data using the CSR format.
- Added `Cholesky::new_with_substitute` that will replace diagonal elements by a given constant whenever `Cholesky`
meets a non-definite-positiveness.
### Fixed
- Fixed a potential unsoundness with `matrix.get(i)` and `matrix.get_mut(i)` where `i` is an `usize`, and `matrix`
is a matrix slice with non-default strides.
- Fixed potential unsoundness with `vector.perp` where `vector` isnt actually a 2D vector as expected.
## [0.29.0]
### Breaking changes

View File

@ -32,6 +32,7 @@ compare = [ "matrixcompare-core" ]
libm = [ "simba/libm" ]
libm-force = [ "simba/libm_force" ]
macros = [ "nalgebra-macros" ]
cuda = [ "cust", "simba/cuda" ]
# Conversion
convert-mint = [ "mint" ]
@ -86,7 +87,7 @@ pest = { version = "2", optional = true }
pest_derive = { version = "2", optional = true }
bytemuck = { version = "1.5", optional = true }
matrixcompare-core = { version = "0.1", optional = true }
proptest = { version = "1", optional = true, default-features = false, features = ["std"] }
proptest = { version = "1", optional = true, default-features = false, features = ["std"] }
glam013 = { package = "glam", version = "0.13", optional = true }
glam014 = { package = "glam", version = "0.14", optional = true }
glam015 = { package = "glam", version = "0.15", optional = true }
@ -94,6 +95,9 @@ glam016 = { package = "glam", version = "0.16", optional = true }
glam017 = { package = "glam", version = "0.17", optional = true }
glam018 = { package = "glam", version = "0.18", optional = true }
[target.'cfg(not(target_os = "cuda"))'.dependencies]
cust = { version = "0.1", optional = true }
[dev-dependencies]
serde_json = "1.0"
@ -129,3 +133,6 @@ lto = true
[package.metadata.docs.rs]
# Enable certain features when building docs for docs.rs
features = [ "proptest-support", "compare", "macros", "rand" ]
[patch.crates-io]
simba = { git = "https://github.com/dimforge/simba"}

View File

@ -22,6 +22,7 @@ std = [ "nalgebra/std", "simba/std" ]
arbitrary = [ "nalgebra/arbitrary" ]
serde-serialize = [ "nalgebra/serde-serialize-no-std" ]
abomonation-serialize = [ "nalgebra/abomonation-serialize" ]
cuda = [ "nalgebra/cuda" ]
# Conversion
convert-mint = [ "nalgebra/mint" ]

View File

@ -32,6 +32,10 @@ use std::mem;
/// A array-based statically sized matrix data storage.
#[repr(transparent)]
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub struct ArrayStorage<T, const R: usize, const C: usize>(pub [[T; R]; C]);
impl<T, const R: usize, const C: usize> ArrayStorage<T, R, C> {

View File

@ -13,6 +13,10 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
/// Dim of dynamically-sized algebraic entities.
#[derive(Clone, Copy, Eq, PartialEq, Debug)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub struct Dynamic {
value: usize,
}
@ -197,6 +201,10 @@ dim_ops!(
);
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub struct Const<const R: usize>;
/// Trait implemented exclusively by type-level integers.

View File

@ -589,7 +589,10 @@ where
where
S: RawStorageMut<T, R, C>,
{
matrix.data.get_unchecked_linear_mut(self)
let nrows = matrix.shape().0;
let row = self % nrows;
let col = self / nrows;
matrix.data.get_unchecked_mut(row, col)
}
}

View File

@ -154,6 +154,10 @@ pub type MatrixCross<T, R1, C1, R2, C2> =
/// some concrete types for `T` and a compatible data storage type `S`).
#[repr(C)]
#[derive(Clone, Copy)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub struct Matrix<T, R, C, S> {
/// The data storage that contains all the matrix components. Disappointed?
///

View File

@ -26,6 +26,10 @@ use crate::{Dim, Matrix, OMatrix, RealField, Scalar, SimdComplexField, SimdRealF
/// in their documentation, read their dedicated pages directly.
#[repr(transparent)]
#[derive(Clone, Hash, Copy)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub struct Unit<T> {
pub(crate) value: T,
}

View File

@ -39,6 +39,10 @@ use simba::scalar::{ClosedNeg, RealField};
/// See <https://github.com/dimforge/nalgebra/issues/487>
#[repr(C)]
#[derive(Debug, Copy, Clone)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub struct DualQuaternion<T> {
/// The real component of the quaternion
pub real: Quaternion<T>,

View File

@ -54,7 +54,11 @@ use crate::geometry::{AbstractRotation, Point, Translation};
/// * [Conversion to a matrix <span style="float:right;">`to_matrix`…</span>](#conversion-to-a-matrix)
///
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
#[cfg_attr(feature = "serde-serialize-no-std", derive(Serialize, Deserialize))]
#[cfg_attr(
feature = "serde-serialize-no-std",
@ -170,20 +174,6 @@ where
}
}
impl<T: Scalar + Copy, R: Copy, const D: usize> Copy for Isometry<T, R, D> where
Owned<T, Const<D>>: Copy
{
}
impl<T: Scalar, R: Clone, const D: usize> Clone for Isometry<T, R, D> {
#[inline]
fn clone(&self) -> Self {
Self {
rotation: self.rotation.clone(),
translation: self.translation.clone(),
}
}
}
/// # From the translation and rotation parts
impl<T: Scalar, R: AbstractRotation<T, D>, const D: usize> Isometry<T, R, D> {
/// Creates a new isometry from its rotational and translational parts.

View File

@ -19,19 +19,15 @@ use crate::geometry::{Point3, Projective3};
/// A 3D orthographic projection stored as a homogeneous 4x4 matrix.
#[repr(C)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
#[derive(Copy, Clone)]
pub struct Orthographic3<T> {
matrix: Matrix4<T>,
}
impl<T: RealField + Copy> Copy for Orthographic3<T> {}
impl<T: RealField> Clone for Orthographic3<T> {
#[inline]
fn clone(&self) -> Self {
Self::from_matrix_unchecked(self.matrix.clone())
}
}
impl<T: RealField> fmt::Debug for Orthographic3<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
self.matrix.fmt(f)

View File

@ -20,19 +20,15 @@ use crate::geometry::{Point3, Projective3};
/// A 3D perspective projection stored as a homogeneous 4x4 matrix.
#[repr(C)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
#[derive(Copy, Clone)]
pub struct Perspective3<T> {
matrix: Matrix4<T>,
}
impl<T: RealField + Copy> Copy for Perspective3<T> {}
impl<T: RealField> Clone for Perspective3<T> {
#[inline]
fn clone(&self) -> Self {
Self::from_matrix_unchecked(self.matrix.clone())
}
}
impl<T: RealField> fmt::Debug for Perspective3<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
self.matrix.fmt(f)

View File

@ -74,6 +74,15 @@ where
{
}
#[cfg(all(not(target_os = "cuda"), feature = "cuda"))]
unsafe impl<T: Scalar + cust::memory::DeviceCopy, D: DimName> cust::memory::DeviceCopy
for OPoint<T, D>
where
DefaultAllocator: Allocator<T, D>,
OVector<T, D>: cust::memory::DeviceCopy,
{
}
#[cfg(feature = "bytemuck")]
unsafe impl<T: Scalar, D: DimName> bytemuck::Zeroable for OPoint<T, D>
where

View File

@ -28,6 +28,10 @@ use crate::geometry::{Point3, Rotation};
/// that may be used as a rotation.
#[repr(C)]
#[derive(Copy, Clone)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub struct Quaternion<T> {
/// This quaternion as a 4D vector of coordinates in the `[ x, y, z, w ]` storage order.
pub coords: Vector4<T>,

View File

@ -54,6 +54,11 @@ use crate::geometry::Point;
/// * [Conversion to a matrix <span style="float:right;">`matrix`, `to_homogeneous`…</span>](#conversion-to-a-matrix)
///
#[repr(C)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
#[derive(Copy, Clone)]
pub struct Rotation<T, const D: usize> {
matrix: SMatrix<T, D, D>,
}
@ -73,21 +78,6 @@ where
}
}
impl<T: Scalar + Copy, const D: usize> Copy for Rotation<T, D> where
<DefaultAllocator as Allocator<T, Const<D>, Const<D>>>::Buffer: Copy
{
}
impl<T: Scalar, const D: usize> Clone for Rotation<T, D>
where
<DefaultAllocator as Allocator<T, Const<D>, Const<D>>>::Buffer: Clone,
{
#[inline]
fn clone(&self) -> Self {
Self::from_matrix_unchecked(self.matrix.clone())
}
}
#[cfg(feature = "bytemuck")]
unsafe impl<T, const D: usize> bytemuck::Zeroable for Rotation<T, D>
where

View File

@ -22,6 +22,11 @@ use crate::geometry::Point;
/// A scale which supports non-uniform scaling.
#[repr(C)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
#[derive(Copy, Clone)]
pub struct Scale<T, const D: usize> {
/// The scale coordinates, i.e., how much is multiplied to a point's coordinates when it is
/// scaled.
@ -43,18 +48,6 @@ where
}
}
impl<T: Scalar + Copy, const D: usize> Copy for Scale<T, D> {}
impl<T: Scalar, const D: usize> Clone for Scale<T, D>
where
Owned<T, Const<D>>: Clone,
{
#[inline]
fn clone(&self) -> Self {
Scale::from(self.vector.clone())
}
}
#[cfg(feature = "bytemuck")]
unsafe impl<T, const D: usize> bytemuck::Zeroable for Scale<T, D>
where

View File

@ -23,7 +23,11 @@ use crate::geometry::{AbstractRotation, Isometry, Point, Translation};
/// A similarity, i.e., an uniform scaling, followed by a rotation, followed by a translation.
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
#[cfg_attr(feature = "serde-serialize-no-std", derive(Serialize, Deserialize))]
#[cfg_attr(
feature = "serde-serialize-no-std",
@ -73,22 +77,6 @@ where
}
}
impl<T: Scalar + Copy + Zero, R: AbstractRotation<T, D> + Copy, const D: usize> Copy
for Similarity<T, R, D>
where
Owned<T, Const<D>>: Copy,
{
}
impl<T: Scalar + Zero, R: AbstractRotation<T, D> + Clone, const D: usize> Clone
for Similarity<T, R, D>
{
#[inline]
fn clone(&self) -> Self {
Similarity::from_isometry(self.isometry.clone(), self.scaling.clone())
}
}
impl<T: Scalar + Zero, R, const D: usize> Similarity<T, R, D>
where
R: AbstractRotation<T, D>,

View File

@ -60,14 +60,26 @@ where
/// Tag representing the most general (not necessarily inversible) `Transform` type.
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub enum TGeneral {}
/// Tag representing the most general inversible `Transform` type.
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub enum TProjective {}
/// Tag representing an affine `Transform`. Its bottom-row is equal to `(0, 0 ... 0, 1)`.
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
pub enum TAffine {}
impl TCategory for TGeneral {
@ -195,6 +207,16 @@ where
{
}
#[cfg(all(not(target_os = "cuda"), feature = "cuda"))]
unsafe impl<T: RealField + cust::memory::DeviceCopy, C: TCategory, const D: usize>
cust::memory::DeviceCopy for Transform<T, C, D>
where
Const<D>: DimNameAdd<U1>,
DefaultAllocator: Allocator<T, DimNameSum<Const<D>, U1>, DimNameSum<Const<D>, U1>>,
Owned<T, DimNameSum<Const<D>, U1>, DimNameSum<Const<D>, U1>>: cust::memory::DeviceCopy,
{
}
impl<T: RealField, C: TCategory, const D: usize> Clone for Transform<T, C, D>
where
Const<D>: DimNameAdd<U1>,

View File

@ -22,6 +22,11 @@ use crate::geometry::Point;
/// A translation.
#[repr(C)]
#[cfg_attr(
all(not(target_os = "cuda"), feature = "cuda"),
derive(cust::DeviceCopy)
)]
#[derive(Copy, Clone)]
pub struct Translation<T, const D: usize> {
/// The translation coordinates, i.e., how much is added to a point's coordinates when it is
/// translated.
@ -43,18 +48,6 @@ where
}
}
impl<T: Scalar + Copy, const D: usize> Copy for Translation<T, D> {}
impl<T: Scalar, const D: usize> Clone for Translation<T, D>
where
Owned<T, Const<D>>: Clone,
{
#[inline]
fn clone(&self) -> Self {
Translation::from(self.vector.clone())
}
}
#[cfg(feature = "bytemuck")]
unsafe impl<T, const D: usize> bytemuck::Zeroable for Translation<T, D>
where