Merge pull request #1031 from dimforge/rust-cuda
Add support for rust-CUDA
This commit is contained in:
commit
e8c6a7c0a2
27
.github/workflows/nalgebra-ci-build.yml
vendored
27
.github/workflows/nalgebra-ci-build.yml
vendored
@ -36,14 +36,20 @@ jobs:
|
||||
run: cargo build;
|
||||
- name: Build --features serde-serialize
|
||||
run: cargo build --features serde-serialize
|
||||
- name: Build --all-features
|
||||
run: cargo build --all-features;
|
||||
- name: Build nalgebra-glm
|
||||
run: cargo build -p nalgebra-glm --all-features;
|
||||
- name: Build nalgebra-lapack
|
||||
run: cd nalgebra-lapack; cargo build;
|
||||
- name: Build nalgebra-sparse
|
||||
run: cd nalgebra-sparse; cargo build;
|
||||
# Run this on it’s own job because it alone takes a lot of time.
|
||||
# So it’s best to let it run in parallel to the other jobs.
|
||||
build-nalgebra-all-features:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# Needed because the --all-features build which enables cuda support.
|
||||
- uses: Jimver/cuda-toolkit@v0.2.4
|
||||
- uses: actions/checkout@v2
|
||||
- run: cargo build --all-features;
|
||||
- run: cargo build -p nalgebra-glm --all-features;
|
||||
test-nalgebra:
|
||||
runs-on: ubuntu-latest
|
||||
# env:
|
||||
@ -110,3 +116,16 @@ jobs:
|
||||
run: xargo build --verbose --no-default-features -p nalgebra-glm --target=x86_64-unknown-linux-gnu;
|
||||
- name: build thumbv7em-none-eabihf nalgebra-glm
|
||||
run: xargo build --verbose --no-default-features -p nalgebra-glm --target=thumbv7em-none-eabihf;
|
||||
build-cuda:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: Jimver/cuda-toolkit@v0.2.4
|
||||
- name: Install nightly-2021-10-17
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly-2021-10-17
|
||||
override: true
|
||||
- uses: actions/checkout@v2
|
||||
- run: rustup target add nvptx64-nvidia-cuda
|
||||
- run: cargo build --no-default-features --features cuda
|
||||
- run: cargo build --no-default-features --features cuda --target=nvptx64-nvidia-cuda
|
20
CHANGELOG.md
20
CHANGELOG.md
@ -4,17 +4,31 @@ documented here.
|
||||
|
||||
This project adheres to [Semantic Versioning](https://semver.org/).
|
||||
|
||||
## [0.29.1] - WIP
|
||||
## [0.30.0]
|
||||
|
||||
### Breaking changes
|
||||
- The `Dim` trait is now marked as unsafe.
|
||||
|
||||
### Modified
|
||||
- Use more concise debug impls for matrices and geometric transformation types.
|
||||
|
||||
### Added
|
||||
|
||||
- The conversion trait `From<Vec<T>>` and method `from_vec_storage` for `RowDVector`. See [#975](https://github.com/dimforge/nalgebra/issues/975)
|
||||
- Added the conversion trait `From<Vec<T>>` and method `from_vec_storage` for `RowDVector`.
|
||||
- Added implementation of `From` and `Into` for converting between `nalgebra` types and types from
|
||||
`glam 0.18`. These can be enabled by enabling the `convert-glam018` cargo features.
|
||||
- Added the methods `Matrix::product`, `::row_product`, `::row_product_tr`, and `::column_product` to compute the
|
||||
product of the components, rows, or columns, of a single matrix or vector.
|
||||
- The `Default` trait is now implemented for most geometric types: `Point`, `Isometry`, `Rotation`, `Similarity`,
|
||||
`Transform`, `UnitComplex`, and `UnitQuaternion`.
|
||||
- Added the `Scale` geometric type for representing non-uniform scaling.
|
||||
- `nalgebra-sparse`: provide constructors for unsorted but otherwise valid data using the CSR format.
|
||||
- Added `Cholesky::new_with_substitute` that will replace diagonal elements by a given constant whenever `Cholesky`
|
||||
meets a non-definite-positiveness.
|
||||
|
||||
### Fixed
|
||||
- Fixed a potential unsoundness with `matrix.get(i)` and `matrix.get_mut(i)` where `i` is an `usize`, and `matrix`
|
||||
is a matrix slice with non-default strides.
|
||||
- Fixed potential unsoundness with `vector.perp` where `vector` isn’t actually a 2D vector as expected.
|
||||
|
||||
## [0.29.0]
|
||||
### Breaking changes
|
||||
|
@ -32,6 +32,7 @@ compare = [ "matrixcompare-core" ]
|
||||
libm = [ "simba/libm" ]
|
||||
libm-force = [ "simba/libm_force" ]
|
||||
macros = [ "nalgebra-macros" ]
|
||||
cuda = [ "cust", "simba/cuda" ]
|
||||
|
||||
# Conversion
|
||||
convert-mint = [ "mint" ]
|
||||
@ -86,7 +87,7 @@ pest = { version = "2", optional = true }
|
||||
pest_derive = { version = "2", optional = true }
|
||||
bytemuck = { version = "1.5", optional = true }
|
||||
matrixcompare-core = { version = "0.1", optional = true }
|
||||
proptest = { version = "1", optional = true, default-features = false, features = ["std"] }
|
||||
proptest = { version = "1", optional = true, default-features = false, features = ["std"] }
|
||||
glam013 = { package = "glam", version = "0.13", optional = true }
|
||||
glam014 = { package = "glam", version = "0.14", optional = true }
|
||||
glam015 = { package = "glam", version = "0.15", optional = true }
|
||||
@ -94,6 +95,9 @@ glam016 = { package = "glam", version = "0.16", optional = true }
|
||||
glam017 = { package = "glam", version = "0.17", optional = true }
|
||||
glam018 = { package = "glam", version = "0.18", optional = true }
|
||||
|
||||
[target.'cfg(not(target_os = "cuda"))'.dependencies]
|
||||
cust = { version = "0.1", optional = true }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
serde_json = "1.0"
|
||||
@ -129,3 +133,6 @@ lto = true
|
||||
[package.metadata.docs.rs]
|
||||
# Enable certain features when building docs for docs.rs
|
||||
features = [ "proptest-support", "compare", "macros", "rand" ]
|
||||
|
||||
[patch.crates-io]
|
||||
simba = { git = "https://github.com/dimforge/simba"}
|
||||
|
@ -22,6 +22,7 @@ std = [ "nalgebra/std", "simba/std" ]
|
||||
arbitrary = [ "nalgebra/arbitrary" ]
|
||||
serde-serialize = [ "nalgebra/serde-serialize-no-std" ]
|
||||
abomonation-serialize = [ "nalgebra/abomonation-serialize" ]
|
||||
cuda = [ "nalgebra/cuda" ]
|
||||
|
||||
# Conversion
|
||||
convert-mint = [ "nalgebra/mint" ]
|
||||
|
@ -32,6 +32,10 @@ use std::mem;
|
||||
/// A array-based statically sized matrix data storage.
|
||||
#[repr(transparent)]
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub struct ArrayStorage<T, const R: usize, const C: usize>(pub [[T; R]; C]);
|
||||
|
||||
impl<T, const R: usize, const C: usize> ArrayStorage<T, R, C> {
|
||||
|
@ -13,6 +13,10 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
/// Dim of dynamically-sized algebraic entities.
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Debug)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub struct Dynamic {
|
||||
value: usize,
|
||||
}
|
||||
@ -197,6 +201,10 @@ dim_ops!(
|
||||
);
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub struct Const<const R: usize>;
|
||||
|
||||
/// Trait implemented exclusively by type-level integers.
|
||||
|
@ -589,7 +589,10 @@ where
|
||||
where
|
||||
S: RawStorageMut<T, R, C>,
|
||||
{
|
||||
matrix.data.get_unchecked_linear_mut(self)
|
||||
let nrows = matrix.shape().0;
|
||||
let row = self % nrows;
|
||||
let col = self / nrows;
|
||||
matrix.data.get_unchecked_mut(row, col)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -154,6 +154,10 @@ pub type MatrixCross<T, R1, C1, R2, C2> =
|
||||
/// some concrete types for `T` and a compatible data storage type `S`).
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub struct Matrix<T, R, C, S> {
|
||||
/// The data storage that contains all the matrix components. Disappointed?
|
||||
///
|
||||
|
@ -26,6 +26,10 @@ use crate::{Dim, Matrix, OMatrix, RealField, Scalar, SimdComplexField, SimdRealF
|
||||
/// in their documentation, read their dedicated pages directly.
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone, Hash, Copy)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub struct Unit<T> {
|
||||
pub(crate) value: T,
|
||||
}
|
||||
|
@ -39,6 +39,10 @@ use simba::scalar::{ClosedNeg, RealField};
|
||||
/// See <https://github.com/dimforge/nalgebra/issues/487>
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub struct DualQuaternion<T> {
|
||||
/// The real component of the quaternion
|
||||
pub real: Quaternion<T>,
|
||||
|
@ -54,7 +54,11 @@ use crate::geometry::{AbstractRotation, Point, Translation};
|
||||
/// * [Conversion to a matrix <span style="float:right;">`to_matrix`…</span>](#conversion-to-a-matrix)
|
||||
///
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
#[cfg_attr(feature = "serde-serialize-no-std", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(
|
||||
feature = "serde-serialize-no-std",
|
||||
@ -170,20 +174,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Scalar + Copy, R: Copy, const D: usize> Copy for Isometry<T, R, D> where
|
||||
Owned<T, Const<D>>: Copy
|
||||
{
|
||||
}
|
||||
|
||||
impl<T: Scalar, R: Clone, const D: usize> Clone for Isometry<T, R, D> {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
rotation: self.rotation.clone(),
|
||||
translation: self.translation.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// # From the translation and rotation parts
|
||||
impl<T: Scalar, R: AbstractRotation<T, D>, const D: usize> Isometry<T, R, D> {
|
||||
/// Creates a new isometry from its rotational and translational parts.
|
||||
|
@ -19,19 +19,15 @@ use crate::geometry::{Point3, Projective3};
|
||||
|
||||
/// A 3D orthographic projection stored as a homogeneous 4x4 matrix.
|
||||
#[repr(C)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Orthographic3<T> {
|
||||
matrix: Matrix4<T>,
|
||||
}
|
||||
|
||||
impl<T: RealField + Copy> Copy for Orthographic3<T> {}
|
||||
|
||||
impl<T: RealField> Clone for Orthographic3<T> {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
Self::from_matrix_unchecked(self.matrix.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealField> fmt::Debug for Orthographic3<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
|
||||
self.matrix.fmt(f)
|
||||
|
@ -20,19 +20,15 @@ use crate::geometry::{Point3, Projective3};
|
||||
|
||||
/// A 3D perspective projection stored as a homogeneous 4x4 matrix.
|
||||
#[repr(C)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Perspective3<T> {
|
||||
matrix: Matrix4<T>,
|
||||
}
|
||||
|
||||
impl<T: RealField + Copy> Copy for Perspective3<T> {}
|
||||
|
||||
impl<T: RealField> Clone for Perspective3<T> {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
Self::from_matrix_unchecked(self.matrix.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: RealField> fmt::Debug for Perspective3<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
|
||||
self.matrix.fmt(f)
|
||||
|
@ -74,6 +74,15 @@ where
|
||||
{
|
||||
}
|
||||
|
||||
#[cfg(all(not(target_os = "cuda"), feature = "cuda"))]
|
||||
unsafe impl<T: Scalar + cust::memory::DeviceCopy, D: DimName> cust::memory::DeviceCopy
|
||||
for OPoint<T, D>
|
||||
where
|
||||
DefaultAllocator: Allocator<T, D>,
|
||||
OVector<T, D>: cust::memory::DeviceCopy,
|
||||
{
|
||||
}
|
||||
|
||||
#[cfg(feature = "bytemuck")]
|
||||
unsafe impl<T: Scalar, D: DimName> bytemuck::Zeroable for OPoint<T, D>
|
||||
where
|
||||
|
@ -28,6 +28,10 @@ use crate::geometry::{Point3, Rotation};
|
||||
/// that may be used as a rotation.
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub struct Quaternion<T> {
|
||||
/// This quaternion as a 4D vector of coordinates in the `[ x, y, z, w ]` storage order.
|
||||
pub coords: Vector4<T>,
|
||||
|
@ -54,6 +54,11 @@ use crate::geometry::Point;
|
||||
/// * [Conversion to a matrix <span style="float:right;">`matrix`, `to_homogeneous`…</span>](#conversion-to-a-matrix)
|
||||
///
|
||||
#[repr(C)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Rotation<T, const D: usize> {
|
||||
matrix: SMatrix<T, D, D>,
|
||||
}
|
||||
@ -73,21 +78,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Scalar + Copy, const D: usize> Copy for Rotation<T, D> where
|
||||
<DefaultAllocator as Allocator<T, Const<D>, Const<D>>>::Buffer: Copy
|
||||
{
|
||||
}
|
||||
|
||||
impl<T: Scalar, const D: usize> Clone for Rotation<T, D>
|
||||
where
|
||||
<DefaultAllocator as Allocator<T, Const<D>, Const<D>>>::Buffer: Clone,
|
||||
{
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
Self::from_matrix_unchecked(self.matrix.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "bytemuck")]
|
||||
unsafe impl<T, const D: usize> bytemuck::Zeroable for Rotation<T, D>
|
||||
where
|
||||
|
@ -22,6 +22,11 @@ use crate::geometry::Point;
|
||||
|
||||
/// A scale which supports non-uniform scaling.
|
||||
#[repr(C)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Scale<T, const D: usize> {
|
||||
/// The scale coordinates, i.e., how much is multiplied to a point's coordinates when it is
|
||||
/// scaled.
|
||||
@ -43,18 +48,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Scalar + Copy, const D: usize> Copy for Scale<T, D> {}
|
||||
|
||||
impl<T: Scalar, const D: usize> Clone for Scale<T, D>
|
||||
where
|
||||
Owned<T, Const<D>>: Clone,
|
||||
{
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
Scale::from(self.vector.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "bytemuck")]
|
||||
unsafe impl<T, const D: usize> bytemuck::Zeroable for Scale<T, D>
|
||||
where
|
||||
|
@ -23,7 +23,11 @@ use crate::geometry::{AbstractRotation, Isometry, Point, Translation};
|
||||
|
||||
/// A similarity, i.e., an uniform scaling, followed by a rotation, followed by a translation.
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
#[cfg_attr(feature = "serde-serialize-no-std", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(
|
||||
feature = "serde-serialize-no-std",
|
||||
@ -73,22 +77,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Scalar + Copy + Zero, R: AbstractRotation<T, D> + Copy, const D: usize> Copy
|
||||
for Similarity<T, R, D>
|
||||
where
|
||||
Owned<T, Const<D>>: Copy,
|
||||
{
|
||||
}
|
||||
|
||||
impl<T: Scalar + Zero, R: AbstractRotation<T, D> + Clone, const D: usize> Clone
|
||||
for Similarity<T, R, D>
|
||||
{
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
Similarity::from_isometry(self.isometry.clone(), self.scaling.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Scalar + Zero, R, const D: usize> Similarity<T, R, D>
|
||||
where
|
||||
R: AbstractRotation<T, D>,
|
||||
|
@ -60,14 +60,26 @@ where
|
||||
|
||||
/// Tag representing the most general (not necessarily inversible) `Transform` type.
|
||||
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub enum TGeneral {}
|
||||
|
||||
/// Tag representing the most general inversible `Transform` type.
|
||||
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub enum TProjective {}
|
||||
|
||||
/// Tag representing an affine `Transform`. Its bottom-row is equal to `(0, 0 ... 0, 1)`.
|
||||
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
pub enum TAffine {}
|
||||
|
||||
impl TCategory for TGeneral {
|
||||
@ -195,6 +207,16 @@ where
|
||||
{
|
||||
}
|
||||
|
||||
#[cfg(all(not(target_os = "cuda"), feature = "cuda"))]
|
||||
unsafe impl<T: RealField + cust::memory::DeviceCopy, C: TCategory, const D: usize>
|
||||
cust::memory::DeviceCopy for Transform<T, C, D>
|
||||
where
|
||||
Const<D>: DimNameAdd<U1>,
|
||||
DefaultAllocator: Allocator<T, DimNameSum<Const<D>, U1>, DimNameSum<Const<D>, U1>>,
|
||||
Owned<T, DimNameSum<Const<D>, U1>, DimNameSum<Const<D>, U1>>: cust::memory::DeviceCopy,
|
||||
{
|
||||
}
|
||||
|
||||
impl<T: RealField, C: TCategory, const D: usize> Clone for Transform<T, C, D>
|
||||
where
|
||||
Const<D>: DimNameAdd<U1>,
|
||||
|
@ -22,6 +22,11 @@ use crate::geometry::Point;
|
||||
|
||||
/// A translation.
|
||||
#[repr(C)]
|
||||
#[cfg_attr(
|
||||
all(not(target_os = "cuda"), feature = "cuda"),
|
||||
derive(cust::DeviceCopy)
|
||||
)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Translation<T, const D: usize> {
|
||||
/// The translation coordinates, i.e., how much is added to a point's coordinates when it is
|
||||
/// translated.
|
||||
@ -43,18 +48,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Scalar + Copy, const D: usize> Copy for Translation<T, D> {}
|
||||
|
||||
impl<T: Scalar, const D: usize> Clone for Translation<T, D>
|
||||
where
|
||||
Owned<T, Const<D>>: Clone,
|
||||
{
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
Translation::from(self.vector.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "bytemuck")]
|
||||
unsafe impl<T, const D: usize> bytemuck::Zeroable for Translation<T, D>
|
||||
where
|
||||
|
Loading…
Reference in New Issue
Block a user