// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. /* This file implements common mathematical operations on vector types (float3, float4 etc.) since these are not provided as standard by CUDA. The syntax is modelled on the Cg standard library. */ #ifndef CUTIL_MATH_H #define CUTIL_MATH_H #include "foundation/PxPreprocessor.h" #if PX_LINUX && PX_CLANG #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" #endif #include #include "vector_functions.h" // why does this pull in the runtime api? #if PX_LINUX && PX_CLANG #pragma clang diagnostic pop #endif //////////////////////////////////////////////////////////////////////////////// typedef unsigned int uint; typedef unsigned short ushort; typedef unsigned char uchar; // float functions //////////////////////////////////////////////////////////////////////////////// #if !PX_CUDA_COMPILER #ifndef PX_MIN_MAX #define PX_MIN_MAX #if PX_VC PX_FORCE_INLINE __device__ __host__ float fmaxf(float a, float b) { return a > b ? a : b; } PX_FORCE_INLINE __device__ __host__ float fminf(float a, float b) { return a < b ? a : b; } #endif PX_FORCE_INLINE __device__ __host__ int max(int a, int b) { return a > b ? a : b; } PX_FORCE_INLINE __device__ __host__ unsigned int max(unsigned int a, unsigned int b) { return a > b ? a : b; } PX_FORCE_INLINE __device__ __host__ int min(int a, int b) { return a < b ? a : b; } PX_FORCE_INLINE __device__ __host__ unsigned int min(unsigned int a, unsigned int b) { return a < b ? a : b; } #endif #endif // lerp inline __device__ __host__ float lerp(float a, float b, float t) { return a + t*(b-a); } template __device__ inline T bilerp(const T f00, const T f10, const T f01, const T f11, const float tx, const float ty) { return lerp(lerp(f00, f10, tx), lerp(f01, f11, tx), ty); } template __device__ inline T trilerp(const T f000, const T f100, const T f010, const T f110, const T f001, const T f101, const T f011, const T f111, const float tx, const float ty, const float tz) { return lerp(bilerp(f000, f100, f010, f110, tx, ty), bilerp(f001, f101, f011, f111, tx, ty), tz); } // clamp inline __device__ __host__ float clamp(float f, float a, float b) { return fmaxf(a, fminf(f, b)); } // int2 functions //////////////////////////////////////////////////////////////////////////////// // addition inline __host__ __device__ int2 operator+(int2 a, int2 b) { return make_int2(a.x + b.x, a.y + b.y); } inline __host__ __device__ void operator+=(int2 &a, int2 b) { a.x += b.x; a.y += b.y; } // subtract inline __host__ __device__ int2 operator-(int2 a, int2 b) { return make_int2(a.x - b.x, a.y - b.y); } inline __host__ __device__ void operator-=(int2 &a, int2 b) { a.x -= b.x; a.y -= b.y; } // multiply inline __host__ __device__ int2 operator*(int2 a, int2 b) { return make_int2(a.x * b.x, a.y * b.y); } inline __host__ __device__ int2 operator*(int2 a, int s) { return make_int2(a.x * s, a.y * s); } inline __host__ __device__ int2 operator*(int s, int2 a) { return make_int2(a.x * s, a.y * s); } inline __host__ __device__ void operator*=(int2 &a, int s) { a.x *= s; a.y *= s; } // float2 functions //////////////////////////////////////////////////////////////////////////////// // additional constructors inline __host__ __device__ float2 make_float2(float s) { return make_float2(s, s); } inline __host__ __device__ float2 make_float2(int2 a) { return make_float2(float(a.x), float(a.y)); } // addition inline __host__ __device__ float2 operator+(float2 a, float2 b) { return make_float2(a.x + b.x, a.y + b.y); } inline __host__ __device__ void operator+=(float2 &a, float2 b) { a.x += b.x; a.y += b.y; } // subtract inline __host__ __device__ float2 operator-(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); } inline __host__ __device__ void operator-=(float2 &a, float2 b) { a.x -= b.x; a.y -= b.y; } // multiply inline __host__ __device__ float2 operator*(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); } inline __host__ __device__ float2 operator*(float2 a, float s) { return make_float2(a.x * s, a.y * s); } inline __host__ __device__ float2 operator*(float s, float2 a) { return make_float2(a.x * s, a.y * s); } inline __host__ __device__ void operator*=(float2 &a, float s) { a.x *= s; a.y *= s; } // divide inline __host__ __device__ float2 operator/(float2 a, float2 b) { return make_float2(a.x / b.x, a.y / b.y); } inline __host__ __device__ float2 operator/(float2 a, float s) { float inv = 1.0f / s; return a * inv; } inline __host__ __device__ float2 operator/(float s, float2 a) { float inv = 1.0f / s; return a * inv; } inline __host__ __device__ void operator/=(float2 &a, float s) { float inv = 1.0f / s; a *= inv; } // lerp inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) { return a + t*(b-a); } // clamp inline __device__ __host__ float2 clamp(float2 v, float a, float b) { return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); } inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) { return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); } // dot product inline __host__ __device__ float dot(float2 a, float2 b) { return a.x * b.x + a.y * b.y; } // length inline __host__ __device__ float length(float2 v) { return sqrtf(dot(v, v)); } // normalize inline __host__ __device__ float2 normalize(float2 v) { float invLen = 1.0f / sqrtf(dot(v, v)); return v * invLen; } // floor inline __host__ __device__ float2 floor(const float2 v) { return make_float2(floorf(v.x), floorf(v.y)); } //validate inline __host__ __device__ bool validate(const float2 v0, const float2 v1) { return (v0.x == v1.x && v0.y == v1.y); } // float3 functions //////////////////////////////////////////////////////////////////////////////// // additional constructors inline __host__ __device__ float3 make_float3(float s) { return make_float3(s, s, s); } inline __host__ __device__ float3 make_float3(float2 a) { return make_float3(a.x, a.y, 0.0f); } inline __host__ __device__ float3 make_float3(float2 a, float s) { return make_float3(a.x, a.y, s); } inline __host__ __device__ float3 make_float3(float4 a) { return make_float3(a.x, a.y, a.z); // discards w } inline __host__ __device__ float3 make_float3(int3 a) { return make_float3(float(a.x), float(a.y), float(a.z)); } // min static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b) { return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); } // max static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b) { return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); } // addition inline __host__ __device__ float3 operator+(float3 a, float3 b) { return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); } inline __host__ __device__ void operator+=(float3 &a, float3 b) { a.x += b.x; a.y += b.y; a.z += b.z; } // subtract inline __host__ __device__ float3 operator-(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); } inline __host__ __device__ void operator-=(float3 &a, float3 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; } // multiply inline __host__ __device__ float3 operator*(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); } inline __host__ __device__ float3 operator*(float3 a, float s) { return make_float3(a.x * s, a.y * s, a.z * s); } inline __host__ __device__ float3 operator*(float s, float3 a) { return make_float3(a.x * s, a.y * s, a.z * s); } inline __host__ __device__ void operator*=(float3 &a, float s) { a.x *= s; a.y *= s; a.z *= s; } // divide inline __host__ __device__ float3 operator/(float3 a, float3 b) { return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); } inline __host__ __device__ float3 operator/(float3 a, float s) { float inv = 1.0f / s; return a * inv; } inline __host__ __device__ float3 operator/(float s, float3 a) { float inv = 1.0f / s; return a * inv; } inline __host__ __device__ void operator/=(float3 &a, float s) { float inv = 1.0f / s; a *= inv; } // lerp inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) { return a + t*(b-a); } // clamp inline __device__ __host__ float3 clamp(float3 v, float a, float b) { return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); } inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) { return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); } // dot product inline __host__ __device__ float dot(float3 a, float3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } // cross product inline __host__ __device__ float3 cross(float3 a, float3 b) { return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); } // length inline __host__ __device__ float length(float3 v) { return sqrtf(dot(v, v)); } // normalize inline __host__ __device__ float3 normalize(float3 v) { float invLen = 1.0f / sqrtf(dot(v, v)); return v * invLen; } // floor inline __host__ __device__ float3 floor(const float3 v) { return make_float3(floorf(v.x), floorf(v.y), floorf(v.z)); } //validate inline __host__ __device__ bool validate(const float3 v0, const float3 v1) { return (v0.x == v1.x && v0.y == v1.y && v0.z == v1.z); } // float4 functions //////////////////////////////////////////////////////////////////////////////// // additional constructors inline __host__ __device__ float4 make_float4(float s) { return make_float4(s, s, s, s); } inline __host__ __device__ float4 make_float4(float3 a) { return make_float4(a.x, a.y, a.z, 0.0f); } inline __host__ __device__ float4 make_float4(float3 a, float w) { return make_float4(a.x, a.y, a.z, w); } inline __host__ __device__ float4 make_float4(int4 a) { return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); } // min static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b) { return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w)); } // max static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b) { return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w)); } // addition inline __host__ __device__ float4 operator+(float4 a, float4 b) { return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } inline __host__ __device__ void operator+=(float4 &a, float4 b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; } // subtract inline __host__ __device__ float4 operator-(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } inline __host__ __device__ void operator-=(float4 &a, float4 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; } // multiply inline __host__ __device__ float4 operator*(float4 a, float s) { return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); } inline __host__ __device__ float4 operator*(float s, float4 a) { return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); } inline __host__ __device__ void operator*=(float4 &a, float s) { a.x *= s; a.y *= s; a.z *= s; a.w *= s; } // divide inline __host__ __device__ float4 operator/(float4 a, float4 b) { return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); } inline __host__ __device__ float4 operator/(float4 a, float s) { float inv = 1.0f / s; return a * inv; } inline __host__ __device__ float4 operator/(float s, float4 a) { float inv = 1.0f / s; return a * inv; } inline __host__ __device__ void operator/=(float4 &a, float s) { float inv = 1.0f / s; a *= inv; } // lerp inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) { return a + t*(b-a); } // clamp inline __device__ __host__ float4 clamp(float4 v, float a, float b) { return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); } inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) { return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); } // dot product inline __host__ __device__ float dot(float4 a, float4 b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } // length inline __host__ __device__ float length(float4 r) { return sqrtf(dot(r, r)); } // normalize inline __host__ __device__ float4 normalize(float4 v) { float invLen = 1.0f / sqrtf(dot(v, v)); return v * invLen; } // floor inline __host__ __device__ float4 floor(const float4 v) { return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); } //validate inline __host__ __device__ bool validate(const float4 v0, const float4 v1) { return (v0.x == v1.x && v0.y == v1.y && v0.z == v1.z && v0.w == v1.w); } // int3 functions //////////////////////////////////////////////////////////////////////////////// // additional constructors inline __host__ __device__ int3 make_int3(int s) { return make_int3(s, s, s); } inline __host__ __device__ int3 make_int3(float3 a) { return make_int3(int(a.x), int(a.y), int(a.z)); } // min inline __host__ __device__ int3 min(int3 a, int3 b) { return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); } // max inline __host__ __device__ int3 max(int3 a, int3 b) { return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); } // addition inline __host__ __device__ int3 operator+(int3 a, int3 b) { return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); } inline __host__ __device__ void operator+=(int3 &a, int3 b) { a.x += b.x; a.y += b.y; a.z += b.z; } // subtract inline __host__ __device__ int3 operator-(int3 a, int3 b) { return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); } inline __host__ __device__ void operator-=(int3 &a, int3 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; } // multiply inline __host__ __device__ int3 operator*(int3 a, int3 b) { return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); } inline __host__ __device__ int3 operator*(int3 a, int s) { return make_int3(a.x * s, a.y * s, a.z * s); } inline __host__ __device__ int3 operator*(int s, int3 a) { return make_int3(a.x * s, a.y * s, a.z * s); } inline __host__ __device__ void operator*=(int3 &a, int s) { a.x *= s; a.y *= s; a.z *= s; } // divide inline __host__ __device__ int3 operator/(int3 a, int3 b) { return make_int3(a.x / b.x, a.y / b.y, a.z / b.z); } inline __host__ __device__ int3 operator/(int3 a, int s) { return make_int3(a.x / s, a.y / s, a.z / s); } inline __host__ __device__ int3 operator/(int s, int3 a) { return make_int3(a.x / s, a.y / s, a.z / s); } inline __host__ __device__ void operator/=(int3 &a, int s) { a.x /= s; a.y /= s; a.z /= s; } // clamp inline __device__ __host__ int clamp(int f, int a, int b) { return max(a, min(f, b)); } inline __device__ __host__ int3 clamp(int3 v, int a, int b) { return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); } inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) { return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); } //validate inline __host__ __device__ bool validate(const int3 v0, const int3 v1) { return (v0.x == v1.x && v0.y == v1.y && v0.z == v1.z); } // uint3 functions //////////////////////////////////////////////////////////////////////////////// // additional constructors inline __host__ __device__ uint3 make_uint3(uint s) { return make_uint3(s, s, s); } inline __host__ __device__ uint4 make_uint4(uint s) { return make_uint4(s, s, s, s); } inline __host__ __device__ uint3 make_uint3(float3 a) { return make_uint3(uint(a.x), uint(a.y), uint(a.z)); } // min inline __host__ __device__ uint3 min(uint3 a, uint3 b) { return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); } // max inline __host__ __device__ uint3 max(uint3 a, uint3 b) { return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); } // addition inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) { return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); } inline __host__ __device__ void operator+=(uint3 &a, uint3 b) { a.x += b.x; a.y += b.y; a.z += b.z; } // subtract inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) { return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); } inline __host__ __device__ void operator-=(uint3 &a, uint3 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; } // multiply inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) { return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); } inline __host__ __device__ uint3 operator*(uint3 a, uint s) { return make_uint3(a.x * s, a.y * s, a.z * s); } inline __host__ __device__ uint3 operator*(uint s, uint3 a) { return make_uint3(a.x * s, a.y * s, a.z * s); } inline __host__ __device__ void operator*=(uint3 &a, uint s) { a.x *= s; a.y *= s; a.z *= s; } // divide inline __host__ __device__ uint3 operator/(uint3 a, uint3 b) { return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z); } inline __host__ __device__ uint3 operator/(uint3 a, uint s) { return make_uint3(a.x / s, a.y / s, a.z / s); } inline __host__ __device__ uint3 operator/(uint s, uint3 a) { return make_uint3(a.x / s, a.y / s, a.z / s); } inline __host__ __device__ void operator/=(uint3 &a, uint s) { a.x /= s; a.y /= s; a.z /= s; } // clamp inline __device__ __host__ uint clamp(uint f, uint a, uint b) { return max(a, min(f, b)); } inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) { return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); } inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) { return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); } //validate inline __host__ __device__ bool validate(const uint3 v0, const uint3 v1) { return (v0.x == v1.x && v0.y == v1.y && v0.z == v1.z); } //other inline __device__ __host__ ushort u32High(uint val) { return ushort(val >> 16); } inline __device__ __host__ ushort u32Low(uint val) { return ushort(val & ((1 << 16u) - 1)); } inline __device__ __host__ uint merge(ushort hi, ushort lo) { return uint((uint(hi) << 16) | uint(lo)); } inline __device__ __host__ uchar u16High(ushort val) { return uchar(val >> 8); } inline __device__ __host__ uchar u16Low(ushort val) { return uchar(val & ((1 << 8u) - 1)); } inline __device__ __host__ ushort merge(uchar hi, uchar lo) { return ushort((ushort(hi) << 8) | ushort(lo)); } #endif