9#if defined(JPH_USE_SSE)
10 mValue = _mm_set_epi32(
int(inW),
int(inZ),
int(inY),
int(inX));
11#elif defined(JPH_USE_NEON)
12 uint32x2_t xy = vcreate_u32(
static_cast<uint64>(inX) | (
static_cast<uint64>(inY) << 32));
13 uint32x2_t zw = vcreate_u32(
static_cast<uint64>(inZ) | (
static_cast<uint64>(inW) << 32));
14 mValue = vcombine_u32(xy, zw);
28template<u
int32 SwizzleX, u
int32 SwizzleY, u
int32 SwizzleZ, u
int32 SwizzleW>
31 static_assert(SwizzleX <= 3,
"SwizzleX template parameter out of range");
32 static_assert(SwizzleY <= 3,
"SwizzleY template parameter out of range");
33 static_assert(SwizzleZ <= 3,
"SwizzleZ template parameter out of range");
34 static_assert(SwizzleW <= 3,
"SwizzleW template parameter out of range");
36#if defined(JPH_USE_SSE)
37 return _mm_shuffle_epi32(
mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
38#elif defined(JPH_USE_NEON)
39 return JPH_NEON_SHUFFLE_F32x4(
mValue,
mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
47#if defined(JPH_USE_SSE)
48 return _mm_setzero_si128();
49#elif defined(JPH_USE_NEON)
50 return vdupq_n_u32(0);
52 return UVec4(0, 0, 0, 0);
58#if defined(JPH_USE_SSE)
59 return _mm_set1_epi32(
int(inV));
60#elif defined(JPH_USE_NEON)
61 return vdupq_n_u32(inV);
63 return UVec4(inV, inV, inV, inV);
69#if defined(JPH_USE_SSE)
70 return _mm_castps_si128(_mm_load_ss(
reinterpret_cast<const float*
>(inV)));
71#elif defined(JPH_USE_NEON)
72 return vsetq_lane_u32(*inV, vdupq_n_u32(0), 0);
74 return UVec4(*inV, 0, 0, 0);
80#if defined(JPH_USE_SSE)
81 return _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(inV));
82#elif defined(JPH_USE_NEON)
83 return vld1q_u32(inV);
85 return UVec4(inV[0], inV[1], inV[2], inV[3]);
91#if defined(JPH_USE_SSE)
92 return _mm_load_si128(
reinterpret_cast<const __m128i *
>(inV));
93#elif defined(JPH_USE_NEON)
94 return vld1q_u32(inV);
96 return UVec4(inV[0], inV[1], inV[2], inV[3]);
100template <const
int Scale>
104 return _mm_i32gather_epi32(
reinterpret_cast<const int *
>(inBase), inOffsets.
mValue, Scale);
106 return Vec4::sGatherFloat4<Scale>(
reinterpret_cast<const float *
>(inBase), inOffsets).ReinterpretAsInt();
112#if defined(JPH_USE_SSE4_1)
114#elif defined(JPH_USE_NEON)
118 for (
int i = 0; i < 4; i++)
126#if defined(JPH_USE_SSE4_1)
128#elif defined(JPH_USE_NEON)
132 for (
int i = 0; i < 4; i++)
140#if defined(JPH_USE_SSE)
142#elif defined(JPH_USE_NEON)
146 inV1.
mU32[1] == inV2.
mU32[1]? 0xffffffffu : 0,
147 inV1.
mU32[2] == inV2.
mU32[2]? 0xffffffffu : 0,
148 inV1.
mU32[3] == inV2.
mU32[3]? 0xffffffffu : 0);
154#if defined(JPH_USE_SSE4_1)
155 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(inV1.
mValue), _mm_castsi128_ps(inV2.
mValue), _mm_castsi128_ps(inControl.
mValue)));
156#elif defined(JPH_USE_NEON)
160 for (
int i = 0; i < 4; i++)
168#if defined(JPH_USE_SSE)
170#elif defined(JPH_USE_NEON)
182#if defined(JPH_USE_SSE)
184#elif defined(JPH_USE_NEON)
196#if defined(JPH_USE_SSE)
198#elif defined(JPH_USE_NEON)
211#if defined(JPH_USE_AVX512)
213#elif defined(JPH_USE_SSE)
215#elif defined(JPH_USE_NEON)
216 return vmvnq_u32(inV1.
mValue);
238#if defined(JPH_USE_SSE4_1)
240#elif defined(JPH_USE_NEON)
244 for (
int i = 0; i < 4; i++)
252#if defined(JPH_USE_SSE)
254#elif defined(JPH_USE_NEON)
266#if defined(JPH_USE_SSE)
268#elif defined(JPH_USE_NEON)
271 for (
int i = 0; i < 4; ++i)
279#if defined(JPH_USE_SSE)
280 return _mm_shuffle_epi32(
mValue, _MM_SHUFFLE(0, 0, 0, 0));
281#elif defined(JPH_USE_NEON)
282 return vdupq_laneq_u32(
mValue, 0);
290#if defined(JPH_USE_SSE)
291 return _mm_shuffle_epi32(
mValue, _MM_SHUFFLE(1, 1, 1, 1));
292#elif defined(JPH_USE_NEON)
293 return vdupq_laneq_u32(
mValue, 1);
301#if defined(JPH_USE_SSE)
302 return _mm_shuffle_epi32(
mValue, _MM_SHUFFLE(2, 2, 2, 2));
303#elif defined(JPH_USE_NEON)
304 return vdupq_laneq_u32(
mValue, 2);
312#if defined(JPH_USE_SSE)
313 return _mm_shuffle_epi32(
mValue, _MM_SHUFFLE(3, 3, 3, 3));
314#elif defined(JPH_USE_NEON)
315 return vdupq_laneq_u32(
mValue, 3);
323#if defined(JPH_USE_SSE)
324 return _mm_cvtepi32_ps(
mValue);
325#elif defined(JPH_USE_NEON)
326 return vcvtq_f32_s32(
mValue);
334#if defined(JPH_USE_SSE)
336#elif defined(JPH_USE_NEON)
337 return vreinterpretq_f32_s32(
mValue);
339 return *
reinterpret_cast<const Vec4 *
>(
this);
345#if defined(JPH_USE_SSE)
346 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(outV),
mValue);
347#elif defined(JPH_USE_NEON)
350 for (
int i = 0; i < 4; ++i)
357#if defined(JPH_USE_SSE)
358 _mm_store_si128(
reinterpret_cast<__m128i *
>(outV),
mValue);
359#elif defined(JPH_USE_NEON)
362 for (
int i = 0; i < 4; ++i)
369#if defined(JPH_USE_SSE)
371#elif defined(JPH_USE_NEON)
372 return vaddvq_u32(vshrq_n_u32(
mValue, 31));
374 return (
mU32[0] >> 31) + (
mU32[1] >> 31) + (
mU32[2] >> 31) + (
mU32[3] >> 31);
380#if defined(JPH_USE_SSE)
381 return _mm_movemask_ps(_mm_castsi128_ps(
mValue));
382#elif defined(JPH_USE_NEON)
383 int32x4_t shift = JPH_NEON_INT32x4(0, 1, 2, 3);
384 return vaddvq_u32(vshlq_u32(vshrq_n_u32(
mValue, 31), shift));
386 return (
mU32[0] >> 31) | ((
mU32[1] >> 31) << 1) | ((
mU32[2] >> 31) << 2) | ((
mU32[3] >> 31) << 3);
407 return (
GetTrues() & 0b111) == 0b111;
410template <const u
int Count>
413 static_assert(Count <= 31,
"Invalid shift");
415#if defined(JPH_USE_SSE)
416 return _mm_slli_epi32(
mValue, Count);
417#elif defined(JPH_USE_NEON)
418 return vshlq_n_u32(
mValue, Count);
424template <const u
int Count>
427 static_assert(Count <= 31,
"Invalid shift");
429#if defined(JPH_USE_SSE)
430 return _mm_srli_epi32(
mValue, Count);
431#elif defined(JPH_USE_NEON)
432 return vshrq_n_u32(
mValue, Count);
438template <const u
int Count>
441 static_assert(Count <= 31,
"Invalid shift");
443#if defined(JPH_USE_SSE)
444 return _mm_srai_epi32(
mValue, Count);
445#elif defined(JPH_USE_NEON)
446 return vshrq_n_s32(
mValue, Count);
457#if defined(JPH_USE_SSE)
458 return _mm_unpacklo_epi16(
mValue, _mm_castps_si128(_mm_setzero_ps()));
459#elif defined(JPH_USE_NEON)
460 int16x4_t value = vget_low_s16(
mValue);
461 int16x4_t zero = vdup_n_s16(0);
462 return vcombine_s16(vzip1_s16(value, zero), vzip2_s16(value, zero));
465 (
mU32[0] >> 16) & 0xffff,
467 (
mU32[1] >> 16) & 0xffff);
473#if defined(JPH_USE_SSE)
474 return _mm_unpackhi_epi16(
mValue, _mm_castps_si128(_mm_setzero_ps()));
475#elif defined(JPH_USE_NEON)
476 int16x4_t value = vget_high_s16(
mValue);
477 int16x4_t zero = vdup_n_s16(0);
478 return vcombine_s16(vzip1_s16(value, zero), vzip2_s16(value, zero));
481 (
mU32[2] >> 16) & 0xffff,
483 (
mU32[3] >> 16) & 0xffff);
489#if defined(JPH_USE_SSE4_1)
490 return _mm_shuffle_epi8(
mValue, _mm_set_epi32(
int(0xffffff03),
int(0xffffff02),
int(0xffffff01),
int(0xffffff00)));
491#elif defined(JPH_USE_NEON)
492 int8x16_t idx = JPH_NEON_INT8x16(0x00, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, 0x7f, 0x7f, 0x02, 0x7f, 0x7f, 0x7f, 0x03, 0x7f, 0x7f, 0x7f);
493 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(
mValue), idx));
496 for (
int i = 0; i < 4; i++)
497 result.
mU32[i] = (
mU32[0] >> (i * 8)) & 0xff;
504#if defined(JPH_USE_SSE4_1)
505 return _mm_shuffle_epi8(
mValue, _mm_set_epi32(
int(0xffffff07),
int(0xffffff06),
int(0xffffff05),
int(0xffffff04)));
506#elif defined(JPH_USE_NEON)
507 int8x16_t idx = JPH_NEON_INT8x16(0x04, 0x7f, 0x7f, 0x7f, 0x05, 0x7f, 0x7f, 0x7f, 0x06, 0x7f, 0x7f, 0x7f, 0x07, 0x7f, 0x7f, 0x7f);
508 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(
mValue), idx));
511 for (
int i = 0; i < 4; i++)
512 result.
mU32[i] = (
mU32[1] >> (i * 8)) & 0xff;
519#if defined(JPH_USE_SSE4_1)
520 return _mm_shuffle_epi8(
mValue, _mm_set_epi32(
int(0xffffff0b),
int(0xffffff0a),
int(0xffffff09),
int(0xffffff08)));
521#elif defined(JPH_USE_NEON)
522 int8x16_t idx = JPH_NEON_INT8x16(0x08, 0x7f, 0x7f, 0x7f, 0x09, 0x7f, 0x7f, 0x7f, 0x0a, 0x7f, 0x7f, 0x7f, 0x0b, 0x7f, 0x7f, 0x7f);
523 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(
mValue), idx));
526 for (
int i = 0; i < 4; i++)
527 result.
mU32[i] = (
mU32[2] >> (i * 8)) & 0xff;
534#if defined(JPH_USE_SSE4_1)
535 return _mm_shuffle_epi8(
mValue, _mm_set_epi32(
int(0xffffff0f),
int(0xffffff0e),
int(0xffffff0d),
int(0xffffff0c)));
536#elif defined(JPH_USE_NEON)
537 int8x16_t idx = JPH_NEON_INT8x16(0x0c, 0x7f, 0x7f, 0x7f, 0x0d, 0x7f, 0x7f, 0x7f, 0x0e, 0x7f, 0x7f, 0x7f, 0x0f, 0x7f, 0x7f, 0x7f);
538 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(
mValue), idx));
541 for (
int i = 0; i < 4; i++)
542 result.
mU32[i] = (
mU32[3] >> (i * 8)) & 0xff;
549#if defined(JPH_USE_SSE4_1)
550 return _mm_shuffle_epi8(
mValue, sFourMinusXShuffle[inCount].
mValue);
551#elif defined(JPH_USE_NEON)
552 uint8x16_t idx = vreinterpretq_u8_u32(sFourMinusXShuffle[inCount].
mValue);
553 return vreinterpretq_u32_s8(vqtbl1q_s8(vreinterpretq_s8_u32(
mValue), idx));
556 for (
int i = 0; i < inCount; i++)
557 result.
mU32[i] =
mU32[i + 4 - inCount];
uint32_t uint32
Definition Core.h:312
#define JPH_NAMESPACE_END
Definition Core.h:240
uint64_t uint64
Definition Core.h:313
#define JPH_NAMESPACE_BEGIN
Definition Core.h:234
uint CountBits(uint32 inValue)
Count the number of 1 bits in a value.
Definition Math.h:155
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_W
Use the W component.
Definition Swizzle.h:15
@ SWIZZLE_X
Use the X component.
Definition Swizzle.h:12
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
JPH_INLINE UVec4 Swizzle() const
Swizzle the elements in inV.
static JPH_INLINE UVec4 sNot(UVec4Arg inV1)
Logical not (component wise)
Definition UVec4.inl:209
static JPH_INLINE UVec4 sMin(UVec4Arg inV1, UVec4Arg inV2)
Return the minimum value of each of the components.
Definition UVec4.inl:110
JPH_INLINE UVec4 LogicalShiftLeft() const
Shift all components by Count bits to the left (filling with zeros from the left)
JPH_INLINE int CountTrues() const
Count the number of components that are true (true is when highest bit of component is set)
Definition UVec4.inl:367
JPH_INLINE UVec4 SplatY() const
Replicate the Y component to all components.
Definition UVec4.inl:288
static JPH_INLINE UVec4 sLoadInt(const uint32 *inV)
Load 1 int from memory and place it in the X component, zeros Y, Z and W.
Definition UVec4.inl:67
JPH_INLINE UVec4 Expand4Uint16Lo() const
Takes the lower 4 16 bits and expands them to X, Y, Z and W.
Definition UVec4.inl:455
static JPH_INLINE UVec4 sSort4True(UVec4Arg inValue, UVec4Arg inIndex)
Definition UVec4.inl:222
JPH_INLINE UVec4 LogicalShiftRight() const
Shift all components by Count bits to the right (filling with zeros from the right)
static JPH_INLINE UVec4 sReplicate(uint32 inV)
Replicate int inV across all components.
Definition UVec4.inl:56
JPH_INLINE UVec4 SplatX() const
Replicate the X component to all components.
Definition UVec4.inl:277
JPH_INLINE UVec4 Expand4Byte4() const
Takes byte 4 .. 7 and expands them to X, Y, Z and W.
Definition UVec4.inl:502
JPH_INLINE bool TestAllTrue() const
Test if all components are true (true is when highest bit of component is set)
Definition UVec4.inl:400
JPH_INLINE UVec4 Expand4Byte0() const
Takes byte 0 .. 3 and expands them to X, Y, Z and W.
Definition UVec4.inl:487
JPH_INLINE int GetTrues() const
Store if X is true in bit 0, Y in bit 1, Z in bit 2 and W in bit 3 (true is when highest bit of compo...
Definition UVec4.inl:378
JPH_INLINE bool TestAnyXYZTrue() const
Test if any of X, Y or Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:395
JPH_INLINE UVec4 & operator+=(UVec4Arg inV2)
Add two integer vectors (component wise)
Definition UVec4.inl:264
static JPH_INLINE UVec4 sGatherInt4(const uint32 *inBase, UVec4Arg inOffsets)
Gather 4 ints from memory at inBase + inOffsets[i] * Scale.
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:194
static JPH_INLINE UVec4 sEquals(UVec4Arg inV1, UVec4Arg inV2)
Equals (component wise)
Definition UVec4.inl:138
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:166
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:405
JPH_INLINE UVec4 ShiftComponents4Minus(int inCount) const
Shift vector components by 4 - Count floats to the left, so if Count = 1 the resulting vector is (W,...
Definition UVec4.inl:547
JPH_INLINE bool operator==(UVec4Arg inV2) const
Comparison.
Definition UVec4.inl:23
static JPH_INLINE UVec4 sMax(UVec4Arg inV1, UVec4Arg inV2)
Return the maximum of each of the components.
Definition UVec4.inl:124
JPH_INLINE UVec4 SplatZ() const
Replicate the Z component to all components.
Definition UVec4.inl:299
Type mValue
Definition UVec4.h:210
JPH_INLINE UVec4 SplatW() const
Replicate the W component to all components.
Definition UVec4.inl:310
JPH_INLINE void StoreInt4(uint32 *outV) const
Store 4 ints to memory.
Definition UVec4.inl:343
JPH_INLINE UVec4 Expand4Byte8() const
Takes byte 8 .. 11 and expands them to X, Y, Z and W.
Definition UVec4.inl:517
static JPH_INLINE UVec4 sLoadInt4Aligned(const uint32 *inV)
Load 4 ints from memory, aligned to 16 bytes.
Definition UVec4.inl:89
static JPH_INLINE UVec4 sLoadInt4(const uint32 *inV)
Load 4 ints from memory.
Definition UVec4.inl:78
JPH_INLINE UVec4 Expand4Byte12() const
Takes byte 12 .. 15 and expands them to X, Y, Z and W.
Definition UVec4.inl:532
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:180
JPH_INLINE UVec4 Expand4Uint16Hi() const
Takes the upper 4 16 bits and expands them to X, Y, Z and W.
Definition UVec4.inl:471
static JPH_INLINE UVec4 sZero()
Vector with all zeros.
Definition UVec4.inl:45
JPH_INLINE UVec4 operator+(UVec4Arg inV2)
Adds an integer value to all integer components (discards any overflow)
Definition UVec4.inl:250
JPH_INLINE UVec4 ArithmeticShiftRight() const
Shift all components by Count bits to the right (shifting in the value of the highest bit)
UVec4()=default
Constructor.
JPH_INLINE UVec4 operator*(UVec4Arg inV2) const
Multiplies each of the 4 integer components with an integer (discards any overflow)
Definition UVec4.inl:236
static JPH_INLINE UVec4 sSelect(UVec4Arg inV1, UVec4Arg inV2, UVec4Arg inControl)
Component wise select, returns inV1 when highest bit of inControl = 0 and inV2 when highest bit of in...
Definition UVec4.inl:152
JPH_INLINE Vec4 ToFloat() const
Convert each component from an int to a float.
Definition UVec4.inl:321
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:332
JPH_INLINE void StoreInt4Aligned(uint32 *outV) const
Store 4 ints to memory, aligned to 16 bytes.
Definition UVec4.inl:355
JPH_INLINE bool TestAnyTrue() const
Test if any of the components are true (true is when highest bit of component is set)
Definition UVec4.inl:390
uint32 mU32[4]
Definition UVec4.h:211