Jolt Physics
A multi core friendly Game Physics Engine
Loading...
Searching...
No Matches
Vec3.inl
Go to the documentation of this file.
1// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3// SPDX-License-Identifier: MIT
4
5#include <Jolt/Math/Vec4.h>
6#include <Jolt/Math/UVec4.h>
8
10#include <random>
12
13// Create a std::hash for Vec3
14JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
15
17
18void Vec3::CheckW() const
19{
20#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
21 // Avoid asserts when both components are NaN
22 JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
23#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
24}
25
26JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
27{
28#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
29 #if defined(JPH_USE_SSE)
30 return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
31 #elif defined(JPH_USE_NEON)
32 return JPH_NEON_SHUFFLE_F32x4(inValue, inValue, 0, 1, 2, 2);
33 #else
34 Type value;
35 value.mData[0] = inValue.mData[0];
36 value.mData[1] = inValue.mData[1];
37 value.mData[2] = inValue.mData[2];
38 value.mData[3] = inValue.mData[2];
39 return value;
40 #endif
41#else
42 return inValue;
43#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
44}
45
47 mValue(sFixW(inRHS.mValue))
48{
49}
50
51Vec3::Vec3(const Float3 &inV)
52{
53#if defined(JPH_USE_SSE)
54 Type x = _mm_load_ss(&inV.x);
55 Type y = _mm_load_ss(&inV.y);
56 Type z = _mm_load_ss(&inV.z);
57 Type xy = _mm_unpacklo_ps(x, y);
58 mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
59#elif defined(JPH_USE_NEON)
60 float32x2_t xy = vld1_f32(&inV.x);
61 float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
62 mValue = vcombine_f32(xy, zz);
63#else
64 mF32[0] = inV[0];
65 mF32[1] = inV[1];
66 mF32[2] = inV[2];
67 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
68 mF32[3] = inV[2];
69 #endif
70#endif
71}
72
73Vec3::Vec3(float inX, float inY, float inZ)
74{
75#if defined(JPH_USE_SSE)
76 mValue = _mm_set_ps(inZ, inZ, inY, inX);
77#elif defined(JPH_USE_NEON)
78 uint32x2_t xy = vcreate_f32(static_cast<uint64>(*reinterpret_cast<uint32 *>(&inX)) | (static_cast<uint64>(*reinterpret_cast<uint32 *>(&inY)) << 32));
79 uint32x2_t zz = vcreate_f32(static_cast<uint64>(*reinterpret_cast<uint32* >(&inZ)) | (static_cast<uint64>(*reinterpret_cast<uint32 *>(&inZ)) << 32));
80 mValue = vcombine_f32(xy, zz);
81#else
82 mF32[0] = inX;
83 mF32[1] = inY;
84 mF32[2] = inZ;
85 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
86 mF32[3] = inZ;
87 #endif
88#endif
89}
90
91template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
93{
94 static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
95 static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
96 static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
97
98#if defined(JPH_USE_SSE)
99 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
100#elif defined(JPH_USE_NEON)
101 return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
102#else
103 return Vec3(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ]);
104#endif
105}
106
108{
109#if defined(JPH_USE_SSE)
110 return _mm_setzero_ps();
111#elif defined(JPH_USE_NEON)
112 return vdupq_n_f32(0);
113#else
114 return Vec3(0, 0, 0);
115#endif
116}
117
119{
120#if defined(JPH_USE_SSE)
121 return _mm_set1_ps(inV);
122#elif defined(JPH_USE_NEON)
123 return vdupq_n_f32(inV);
124#else
125 return Vec3(inV, inV, inV);
126#endif
127}
128
130{
131 return sReplicate(numeric_limits<float>::quiet_NaN());
132}
133
135{
136#if defined(JPH_USE_SSE)
137 Type v = _mm_loadu_ps(&inV.x);
138#elif defined(JPH_USE_NEON)
139 Type v = vld1q_f32(&inV.x);
140#else
141 Type v = { inV.x, inV.y, inV.z };
142#endif
143 return sFixW(v);
144}
145
147{
148#if defined(JPH_USE_SSE)
149 return _mm_min_ps(inV1.mValue, inV2.mValue);
150#elif defined(JPH_USE_NEON)
151 return vminq_f32(inV1.mValue, inV2.mValue);
152#else
153 return Vec3(min(inV1.mF32[0], inV2.mF32[0]),
154 min(inV1.mF32[1], inV2.mF32[1]),
155 min(inV1.mF32[2], inV2.mF32[2]));
156#endif
157}
158
160{
161#if defined(JPH_USE_SSE)
162 return _mm_max_ps(inV1.mValue, inV2.mValue);
163#elif defined(JPH_USE_NEON)
164 return vmaxq_f32(inV1.mValue, inV2.mValue);
165#else
166 return Vec3(max(inV1.mF32[0], inV2.mF32[0]),
167 max(inV1.mF32[1], inV2.mF32[1]),
168 max(inV1.mF32[2], inV2.mF32[2]));
169#endif
170}
171
173{
174 return sMax(sMin(inV, inMax), inMin);
175}
176
178{
179#if defined(JPH_USE_SSE)
180 return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
181#elif defined(JPH_USE_NEON)
182 return vceqq_f32(inV1.mValue, inV2.mValue);
183#else
184 uint32 z = inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0;
185 return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
186 inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
187 z,
188 z);
189#endif
190}
191
193{
194#if defined(JPH_USE_SSE)
195 return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
196#elif defined(JPH_USE_NEON)
197 return vcltq_f32(inV1.mValue, inV2.mValue);
198#else
199 uint32 z = inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0;
200 return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
201 inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
202 z,
203 z);
204#endif
205}
206
208{
209#if defined(JPH_USE_SSE)
210 return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
211#elif defined(JPH_USE_NEON)
212 return vcleq_f32(inV1.mValue, inV2.mValue);
213#else
214 uint32 z = inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0;
215 return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
216 inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
217 z,
218 z);
219#endif
220}
221
223{
224#if defined(JPH_USE_SSE)
225 return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
226#elif defined(JPH_USE_NEON)
227 return vcgtq_f32(inV1.mValue, inV2.mValue);
228#else
229 uint32 z = inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0;
230 return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
231 inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
232 z,
233 z);
234#endif
235}
236
238{
239#if defined(JPH_USE_SSE)
240 return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
241#elif defined(JPH_USE_NEON)
242 return vcgeq_f32(inV1.mValue, inV2.mValue);
243#else
244 uint32 z = inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0;
245 return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
246 inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
247 z,
248 z);
249#endif
250}
251
253{
254#if defined(JPH_USE_SSE)
255 #ifdef JPH_USE_FMADD
256 return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
257 #else
258 return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
259 #endif
260#elif defined(JPH_USE_NEON)
261 return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
262#else
263 return Vec3(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
264 inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
265 inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2]);
266#endif
267}
268
270{
271#if defined(JPH_USE_SSE4_1)
272 Type v = _mm_blendv_ps(inV1.mValue, inV2.mValue, _mm_castsi128_ps(inControl.mValue));
273 return sFixW(v);
274#elif defined(JPH_USE_NEON)
275 Type v = vbslq_f32(vshrq_n_s32(inControl.mValue, 31), inV2.mValue, inV1.mValue);
276 return sFixW(v);
277#else
278 Vec3 result;
279 for (int i = 0; i < 3; i++)
280 result.mF32[i] = inControl.mU32[i] ? inV2.mF32[i] : inV1.mF32[i];
281#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
282 result.mF32[3] = result.mF32[2];
283#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
284 return result;
285#endif
286}
287
289{
290#if defined(JPH_USE_SSE)
291 return _mm_or_ps(inV1.mValue, inV2.mValue);
292#elif defined(JPH_USE_NEON)
293 return vorrq_s32(inV1.mValue, inV2.mValue);
294#else
296#endif
297}
298
300{
301#if defined(JPH_USE_SSE)
302 return _mm_xor_ps(inV1.mValue, inV2.mValue);
303#elif defined(JPH_USE_NEON)
304 return veorq_s32(inV1.mValue, inV2.mValue);
305#else
307#endif
308}
309
311{
312#if defined(JPH_USE_SSE)
313 return _mm_and_ps(inV1.mValue, inV2.mValue);
314#elif defined(JPH_USE_NEON)
315 return vandq_s32(inV1.mValue, inV2.mValue);
316#else
318#endif
319}
320
321Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
322{
323 Vec4 s, c;
324 Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
325 return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
326}
327
328template <class Random>
329Vec3 Vec3::sRandom(Random &inRandom)
330{
331 std::uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
332 float theta = JPH_PI * zero_to_one(inRandom);
333 float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
334 return sUnitSpherical(theta, phi);
335}
336
338{
339 return sEquals(*this, inV2).TestAllXYZTrue();
340}
341
342bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
343{
344 return (inV2 - *this).LengthSq() <= inMaxDistSq;
345}
346
347bool Vec3::IsNearZero(float inMaxDistSq) const
348{
349 return LengthSq() <= inMaxDistSq;
350}
351
353{
354#if defined(JPH_USE_SSE)
355 return _mm_mul_ps(mValue, inV2.mValue);
356#elif defined(JPH_USE_NEON)
357 return vmulq_f32(mValue, inV2.mValue);
358#else
359 return Vec3(mF32[0] * inV2.mF32[0], mF32[1] * inV2.mF32[1], mF32[2] * inV2.mF32[2]);
360#endif
361}
362
363Vec3 Vec3::operator * (float inV2) const
364{
365#if defined(JPH_USE_SSE)
366 return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
367#elif defined(JPH_USE_NEON)
368 return vmulq_n_f32(mValue, inV2);
369#else
370 return Vec3(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2);
371#endif
372}
373
374Vec3 operator * (float inV1, Vec3Arg inV2)
375{
376#if defined(JPH_USE_SSE)
377 return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
378#elif defined(JPH_USE_NEON)
379 return vmulq_n_f32(inV2.mValue, inV1);
380#else
381 return Vec3(inV1 * inV2.mF32[0], inV1 * inV2.mF32[1], inV1 * inV2.mF32[2]);
382#endif
383}
384
385Vec3 Vec3::operator / (float inV2) const
386{
387#if defined(JPH_USE_SSE)
388 return _mm_div_ps(mValue, _mm_set1_ps(inV2));
389#elif defined(JPH_USE_NEON)
390 return vdivq_f32(mValue, vdupq_n_f32(inV2));
391#else
392 return Vec3(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2);
393#endif
394}
395
397{
398#if defined(JPH_USE_SSE)
399 mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
400#elif defined(JPH_USE_NEON)
401 mValue = vmulq_n_f32(mValue, inV2);
402#else
403 for (int i = 0; i < 3; ++i)
404 mF32[i] *= inV2;
405 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
406 mF32[3] = mF32[2];
407 #endif
408#endif
409 return *this;
410}
411
413{
414#if defined(JPH_USE_SSE)
415 mValue = _mm_mul_ps(mValue, inV2.mValue);
416#elif defined(JPH_USE_NEON)
417 mValue = vmulq_f32(mValue, inV2.mValue);
418#else
419 for (int i = 0; i < 3; ++i)
420 mF32[i] *= inV2.mF32[i];
421 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
422 mF32[3] = mF32[2];
423 #endif
424#endif
425 return *this;
426}
427
429{
430#if defined(JPH_USE_SSE)
431 mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
432#elif defined(JPH_USE_NEON)
433 mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
434#else
435 for (int i = 0; i < 3; ++i)
436 mF32[i] /= inV2;
437 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
438 mF32[3] = mF32[2];
439 #endif
440#endif
441 return *this;
442}
443
445{
446#if defined(JPH_USE_SSE)
447 return _mm_add_ps(mValue, inV2.mValue);
448#elif defined(JPH_USE_NEON)
449 return vaddq_f32(mValue, inV2.mValue);
450#else
451 return Vec3(mF32[0] + inV2.mF32[0], mF32[1] + inV2.mF32[1], mF32[2] + inV2.mF32[2]);
452#endif
453}
454
456{
457#if defined(JPH_USE_SSE)
458 mValue = _mm_add_ps(mValue, inV2.mValue);
459#elif defined(JPH_USE_NEON)
460 mValue = vaddq_f32(mValue, inV2.mValue);
461#else
462 for (int i = 0; i < 3; ++i)
463 mF32[i] += inV2.mF32[i];
464 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
465 mF32[3] = mF32[2];
466 #endif
467#endif
468 return *this;
469}
470
472{
473#if defined(JPH_USE_SSE)
474 return _mm_sub_ps(_mm_setzero_ps(), mValue);
475#elif defined(JPH_USE_NEON)
476 return vnegq_f32(mValue);
477#else
478 return Vec3(-mF32[0], -mF32[1], -mF32[2]);
479#endif
480}
481
483{
484#if defined(JPH_USE_SSE)
485 return _mm_sub_ps(mValue, inV2.mValue);
486#elif defined(JPH_USE_NEON)
487 return vsubq_f32(mValue, inV2.mValue);
488#else
489 return Vec3(mF32[0] - inV2.mF32[0], mF32[1] - inV2.mF32[1], mF32[2] - inV2.mF32[2]);
490#endif
491}
492
494{
495#if defined(JPH_USE_SSE)
496 mValue = _mm_sub_ps(mValue, inV2.mValue);
497#elif defined(JPH_USE_NEON)
498 mValue = vsubq_f32(mValue, inV2.mValue);
499#else
500 for (int i = 0; i < 3; ++i)
501 mF32[i] -= inV2.mF32[i];
502 #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
503 mF32[3] = mF32[2];
504 #endif
505#endif
506 return *this;
507}
508
510{
511 inV2.CheckW(); // Check W equals Z to avoid div by zero
512#if defined(JPH_USE_SSE)
513 return _mm_div_ps(mValue, inV2.mValue);
514#elif defined(JPH_USE_NEON)
515 return vdivq_f32(mValue, inV2.mValue);
516#else
517 return Vec3(mF32[0] / inV2.mF32[0], mF32[1] / inV2.mF32[1], mF32[2] / inV2.mF32[2]);
518#endif
519}
520
522{
523#if defined(JPH_USE_SSE)
524 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
525#elif defined(JPH_USE_NEON)
526 return vdupq_laneq_f32(mValue, 0);
527#else
528 return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
529#endif
530}
531
533{
534#if defined(JPH_USE_SSE)
535 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
536#elif defined(JPH_USE_NEON)
537 return vdupq_laneq_f32(mValue, 1);
538#else
539 return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
540#endif
541}
542
544{
545#if defined(JPH_USE_SSE)
546 return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
547#elif defined(JPH_USE_NEON)
548 return vdupq_laneq_f32(mValue, 2);
549#else
550 return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
551#endif
552}
553
555{
556 return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
557}
558
560{
561 return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
562}
563
565{
566#if defined(JPH_USE_AVX512)
567 return _mm_range_ps(mValue, mValue, 0b1000);
568#elif defined(JPH_USE_SSE)
569 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
570#elif defined(JPH_USE_NEON)
571 return vabsq_f32(mValue);
572#else
573 return Vec3(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]));
574#endif
575}
576
578{
579 return sReplicate(1.0f) / mValue;
580}
581
583{
584#if defined(JPH_USE_SSE)
585 Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
586 t1 = _mm_mul_ps(t1, mValue);
587 Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
588 t2 = _mm_mul_ps(t2, inV2.mValue);
589 Type t3 = _mm_sub_ps(t1, t2);
590 return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
591#elif defined(JPH_USE_NEON)
592 Type t1 = JPH_NEON_SHUFFLE_F32x4(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
593 t1 = vmulq_f32(t1, mValue);
594 Type t2 = JPH_NEON_SHUFFLE_F32x4(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
595 t2 = vmulq_f32(t2, inV2.mValue);
596 Type t3 = vsubq_f32(t1, t2);
597 return JPH_NEON_SHUFFLE_F32x4(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
598#else
599 return Vec3(mF32[1] * inV2.mF32[2] - mF32[2] * inV2.mF32[1],
600 mF32[2] * inV2.mF32[0] - mF32[0] * inV2.mF32[2],
601 mF32[0] * inV2.mF32[1] - mF32[1] * inV2.mF32[0]);
602#endif
603}
604
606{
607#if defined(JPH_USE_SSE4_1)
608 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
609#elif defined(JPH_USE_NEON)
610 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
611 mul = vsetq_lane_f32(0, mul, 3);
612 return vdupq_n_f32(vaddvq_f32(mul));
613#else
614 float dot = 0.0f;
615 for (int i = 0; i < 3; i++)
616 dot += mF32[i] * inV2.mF32[i];
617 return Vec3::sReplicate(dot);
618#endif
619}
620
622{
623#if defined(JPH_USE_SSE4_1)
624 return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
625#elif defined(JPH_USE_NEON)
626 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
627 mul = vsetq_lane_f32(0, mul, 3);
628 return vdupq_n_f32(vaddvq_f32(mul));
629#else
630 float dot = 0.0f;
631 for (int i = 0; i < 3; i++)
632 dot += mF32[i] * inV2.mF32[i];
633 return Vec4::sReplicate(dot);
634#endif
635}
636
637float Vec3::Dot(Vec3Arg inV2) const
638{
639#if defined(JPH_USE_SSE4_1)
640 return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0x7f));
641#elif defined(JPH_USE_NEON)
642 float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
643 mul = vsetq_lane_f32(0, mul, 3);
644 return vaddvq_f32(mul);
645#else
646 float dot = 0.0f;
647 for (int i = 0; i < 3; i++)
648 dot += mF32[i] * inV2.mF32[i];
649 return dot;
650#endif
651}
652
653float Vec3::LengthSq() const
654{
655#if defined(JPH_USE_SSE4_1)
656 return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0x7f));
657#elif defined(JPH_USE_NEON)
658 float32x4_t mul = vmulq_f32(mValue, mValue);
659 mul = vsetq_lane_f32(0, mul, 3);
660 return vaddvq_f32(mul);
661#else
662 float len_sq = 0.0f;
663 for (int i = 0; i < 3; i++)
664 len_sq += mF32[i] * mF32[i];
665 return len_sq;
666#endif
667}
668
669float Vec3::Length() const
670{
671#if defined(JPH_USE_SSE4_1)
672 return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0x7f)));
673#elif defined(JPH_USE_NEON)
674 float32x4_t mul = vmulq_f32(mValue, mValue);
675 mul = vsetq_lane_f32(0, mul, 3);
676 float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
677 return vget_lane_f32(vsqrt_f32(sum), 0);
678#else
679 return sqrt(LengthSq());
680#endif
681}
682
684{
685#if defined(JPH_USE_SSE)
686 return _mm_sqrt_ps(mValue);
687#elif defined(JPH_USE_NEON)
688 return vsqrtq_f32(mValue);
689#else
690 return Vec3(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]));
691#endif
692}
693
695{
696#if defined(JPH_USE_SSE4_1)
697 return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0x7f)));
698#elif defined(JPH_USE_NEON)
699 float32x4_t mul = vmulq_f32(mValue, mValue);
700 mul = vsetq_lane_f32(0, mul, 3);
701 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
702 return vdivq_f32(mValue, vsqrtq_f32(sum));
703#else
704 return *this / Length();
705#endif
706}
707
709{
710#if defined(JPH_USE_SSE4_1)
711 Type len_sq = _mm_dp_ps(mValue, mValue, 0x7f);
712 Type is_zero = _mm_cmpeq_ps(len_sq, _mm_setzero_ps());
713#ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
714 if (_mm_movemask_ps(is_zero) == 0xf)
715 return inZeroValue;
716 else
717 return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
718#else
719 return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
720#endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
721#elif defined(JPH_USE_NEON)
722 float32x4_t mul = vmulq_f32(mValue, mValue);
723 mul = vsetq_lane_f32(0, mul, 3);
724 float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
725 float32x4_t len = vsqrtq_f32(sum);
726 float32x4_t is_zero = vceqq_f32(len, vdupq_n_f32(0));
727 return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, len));
728#else
729 float len_sq = LengthSq();
730 if (len_sq == 0.0f)
731 return inZeroValue;
732 else
733 return *this / sqrt(len_sq);
734#endif
735}
736
737bool Vec3::IsNormalized(float inTolerance) const
738{
739 return abs(LengthSq() - 1.0f) <= inTolerance;
740}
741
742bool Vec3::IsNaN() const
743{
744#if defined(JPH_USE_AVX512)
745 return (_mm_fpclass_ps_mask(mValue, 0b10000001) & 0x7) != 0;
746#elif defined(JPH_USE_SSE)
747 return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
748#elif defined(JPH_USE_NEON)
749 uint32x4_t mask = JPH_NEON_UINT32x4(1, 1, 1, 0);
750 uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
751 return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
752#else
753 return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]);
754#endif
755}
756
757void Vec3::StoreFloat3(Float3 *outV) const
758{
759#if defined(JPH_USE_SSE)
760 _mm_store_ss(&outV->x, mValue);
761 Vec3 t = Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED>();
762 _mm_store_ss(&outV->y, t.mValue);
764 _mm_store_ss(&outV->z, t.mValue);
765#elif defined(JPH_USE_NEON)
766 float32x2_t xy = vget_low_f32(mValue);
767 vst1_f32(&outV->x, xy);
768 vst1q_lane_f32(&outV->z, mValue, 2);
769#else
770 outV->x = mF32[0];
771 outV->y = mF32[1];
772 outV->z = mF32[2];
773#endif
774}
775
777{
778#if defined(JPH_USE_SSE)
779 return _mm_cvttps_epi32(mValue);
780#elif defined(JPH_USE_NEON)
781 return vcvtq_u32_f32(mValue);
782#else
783 return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
784#endif
785}
786
788{
789#if defined(JPH_USE_SSE)
790 return UVec4(_mm_castps_si128(mValue));
791#elif defined(JPH_USE_NEON)
792 return vreinterpretq_u32_f32(mValue);
793#else
794 return *reinterpret_cast<const UVec4 *>(this);
795#endif
796}
797
798float Vec3::ReduceMin() const
799{
800 Vec3 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
802 return v.GetX();
803}
804
805float Vec3::ReduceMax() const
806{
807 Vec3 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
809 return v.GetX();
810}
811
813{
814 if (abs(mF32[0]) > abs(mF32[1]))
815 {
816 float len = sqrt(mF32[0] * mF32[0] + mF32[2] * mF32[2]);
817 return Vec3(mF32[2], 0.0f, -mF32[0]) / len;
818 }
819 else
820 {
821 float len = sqrt(mF32[1] * mF32[1] + mF32[2] * mF32[2]);
822 return Vec3(0.0f, mF32[2], -mF32[1]) / len;
823 }
824}
825
827{
828#if defined(JPH_USE_AVX512)
829 return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
830#elif defined(JPH_USE_SSE)
831 Type minus_one = _mm_set1_ps(-1.0f);
832 Type one = _mm_set1_ps(1.0f);
833 return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
834#elif defined(JPH_USE_NEON)
835 Type minus_one = vdupq_n_f32(-1.0f);
836 Type one = vdupq_n_f32(1.0f);
837 return vorrq_s32(vandq_s32(mValue, minus_one), one);
838#else
839 return Vec3(signbit(mF32[0])? -1.0f : 1.0f,
840 signbit(mF32[1])? -1.0f : 1.0f,
841 signbit(mF32[2])? -1.0f : 1.0f);
842#endif
843}
844
#define JPH_SUPPRESS_WARNINGS_STD_BEGIN
Definition Core.h:359
#define JPH_SUPPRESS_WARNINGS_STD_END
Definition Core.h:371
std::uint64_t uint64
Definition Core.h:430
#define JPH_NAMESPACE_END
Definition Core.h:354
std::uint32_t uint32
Definition Core.h:429
#define JPH_NAMESPACE_BEGIN
Definition Core.h:348
#define JPH_MAKE_HASHABLE(type,...)
Definition HashCombine.h:87
#define JPH_ASSERT(...)
Definition IssueReporting.h:33
@ SWIZZLE_Z
Use the Z component.
Definition Swizzle.h:14
@ SWIZZLE_UNUSED
We always use the Z component when we don't specifically want to initialize a value,...
Definition Swizzle.h:16
@ SWIZZLE_Y
Use the Y component.
Definition Swizzle.h:13
Vec3 operator*(float inV1, Vec3Arg inV2)
Definition Vec3.inl:374
Class that holds 3 floats. Used as a storage class. Convert to Vec3 for calculations.
Definition Float3.h:13
float y
Definition Float3.h:39
float z
Definition Float3.h:40
float x
Definition Float3.h:38
Definition UVec4.h:12
static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2)
Logical and (component wise)
Definition UVec4.inl:194
static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2)
Logical or (component wise)
Definition UVec4.inl:166
JPH_INLINE bool TestAllXYZTrue() const
Test if X, Y and Z components are true (true is when highest bit of component is set)
Definition UVec4.inl:405
Type mValue
Definition UVec4.h:211
static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2)
Logical xor (component wise)
Definition UVec4.inl:180
JPH_INLINE Vec4 ReinterpretAsFloat() const
Reinterpret UVec4 as a Vec4 (doesn't change the bits)
Definition UVec4.inl:332
uint32 mU32[4]
Definition UVec4.h:212
Definition Vec3.h:16
JPH_INLINE bool IsClose(Vec3Arg inV2, float inMaxDistSq=1.0e-12f) const
Test if two vectors are close.
Definition Vec3.inl:342
static JPH_INLINE Vec3 sMax(Vec3Arg inV1, Vec3Arg inV2)
Return the maximum of each of the components.
Definition Vec3.inl:159
JPH_INLINE float Dot(Vec3Arg inV2) const
Dot product.
Definition Vec3.inl:637
JPH_INLINE Vec3 Normalized() const
Normalize vector.
Definition Vec3.inl:694
static JPH_INLINE Type sFixW(Type inValue)
Internal helper function that ensures that the Z component is replicated to the W component to preven...
Vec4::Type Type
Definition Vec3.h:26
JPH_INLINE bool operator==(Vec3Arg inV2) const
Comparison.
Definition Vec3.inl:337
JPH_INLINE Vec4 SplatX() const
Replicate the X component to all components.
Definition Vec3.inl:521
static JPH_INLINE Vec3 sMin(Vec3Arg inV1, Vec3Arg inV2)
Return the minimum value of each of the components.
Definition Vec3.inl:146
JPH_INLINE Vec3 Cross(Vec3Arg inV2) const
Cross product.
Definition Vec3.inl:582
JPH_INLINE Vec3 GetNormalizedPerpendicular() const
Get normalized vector that is perpendicular to this vector.
Definition Vec3.inl:812
static Vec3 sRandom(Random &inRandom)
Get random unit vector.
Definition Vec3.inl:329
JPH_INLINE float GetX() const
Get individual components.
Definition Vec3.h:123
JPH_INLINE bool IsNormalized(float inTolerance=1.0e-6f) const
Test if vector is normalized.
Definition Vec3.inl:737
static JPH_INLINE Vec3 sXor(Vec3Arg inV1, Vec3Arg inV2)
Logical xor (component wise)
Definition Vec3.inl:299
JPH_INLINE float Length() const
Length of vector.
Definition Vec3.inl:669
static JPH_INLINE UVec4 sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Greater than or equal (component wise)
Definition Vec3.inl:237
JPH_INLINE float ReduceMin() const
Get the minimum of X, Y and Z.
Definition Vec3.inl:798
JPH_INLINE Vec3 & operator-=(Vec3Arg inV2)
Add two float vectors (component wise)
Definition Vec3.inl:493
JPH_INLINE float ReduceMax() const
Get the maximum of X, Y and Z.
Definition Vec3.inl:805
static JPH_INLINE UVec4 sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
Less than or equal (component wise)
Definition Vec3.inl:207
JPH_INLINE Vec3 operator/(float inV2) const
Divide vector by float.
Definition Vec3.inl:385
friend JPH_INLINE Vec3 operator*(float inV1, Vec3Arg inV2)
Multiply vector with float.
Definition Vec3.inl:374
JPH_INLINE int GetLowestComponentIndex() const
Get index of component with lowest value.
Definition Vec3.inl:554
JPH_INLINE Vec3 & operator/=(float inV2)
Divide vector by float.
Definition Vec3.inl:428
JPH_INLINE Vec4 DotV4(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y, Z and W components.
Definition Vec3.inl:621
JPH_INLINE Vec3 Abs() const
Return the absolute value of each of the components.
Definition Vec3.inl:564
JPH_INLINE Vec3 Reciprocal() const
Reciprocal vector (1 / value) for each of the components.
Definition Vec3.inl:577
JPH_INLINE Vec3 NormalizedOr(Vec3Arg inZeroValue) const
Normalize vector or return inZeroValue if the length of the vector is zero.
Definition Vec3.inl:708
JPH_INLINE Vec3 operator+(Vec3Arg inV2) const
Add two float vectors (component wise)
Definition Vec3.inl:444
JPH_INLINE Vec4 SplatZ() const
Replicate the Z component to all components.
Definition Vec3.inl:543
static JPH_INLINE Vec3 sOr(Vec3Arg inV1, Vec3Arg inV2)
Logical or (component wise)
Definition Vec3.inl:288
static JPH_INLINE UVec4 sGreater(Vec3Arg inV1, Vec3Arg inV2)
Greater than (component wise)
Definition Vec3.inl:222
static JPH_INLINE Vec3 sAnd(Vec3Arg inV1, Vec3Arg inV2)
Logical and (component wise)
Definition Vec3.inl:310
JPH_INLINE void CheckW() const
Internal helper function that checks that W is equal to Z, so e.g. dividing by it should not generate...
static JPH_INLINE Vec3 sSelect(Vec3Arg inV1, Vec3Arg inV2, UVec4Arg inControl)
Component wise select, returns inV1 when highest bit of inControl = 0 and inV2 when highest bit of in...
Definition Vec3.inl:269
static JPH_INLINE Vec3 sUnitSpherical(float inTheta, float inPhi)
Definition Vec3.inl:321
JPH_INLINE UVec4 ToInt() const
Convert each component from a float to an int.
Definition Vec3.inl:776
Type mValue
Definition Vec3.h:285
JPH_INLINE float GetY() const
Definition Vec3.h:124
JPH_INLINE Vec4 SplatY() const
Replicate the Y component to all components.
Definition Vec3.inl:532
JPH_INLINE Vec3 operator-() const
Negate.
Definition Vec3.inl:471
JPH_INLINE void StoreFloat3(Float3 *outV) const
Store 3 floats to memory.
Definition Vec3.inl:757
JPH_INLINE float LengthSq() const
Squared length of vector.
Definition Vec3.inl:653
float mF32[4]
Definition Vec3.h:286
static JPH_INLINE UVec4 sEquals(Vec3Arg inV1, Vec3Arg inV2)
Equals (component wise)
Definition Vec3.inl:177
JPH_INLINE bool IsNearZero(float inMaxDistSq=1.0e-12f) const
Test if vector is near zero.
Definition Vec3.inl:347
static JPH_INLINE Vec3 sZero()
Vector with all zeros.
Definition Vec3.inl:107
static JPH_INLINE UVec4 sLess(Vec3Arg inV1, Vec3Arg inV2)
Less than (component wise)
Definition Vec3.inl:192
static JPH_INLINE Vec3 sReplicate(float inV)
Replicate inV across all components.
Definition Vec3.inl:118
static JPH_INLINE Vec3 sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
Clamp a vector between min and max (component wise)
Definition Vec3.inl:172
JPH_INLINE Vec3 & operator*=(float inV2)
Multiply vector with float.
Definition Vec3.inl:396
JPH_INLINE Vec3 & operator+=(Vec3Arg inV2)
Add two float vectors (component wise)
Definition Vec3.inl:455
JPH_INLINE bool IsNaN() const
Test if vector contains NaN elements.
Definition Vec3.inl:742
JPH_INLINE Vec3 Sqrt() const
Component wise square root.
Definition Vec3.inl:683
JPH_INLINE UVec4 ReinterpretAsInt() const
Reinterpret Vec3 as a UVec4 (doesn't change the bits)
Definition Vec3.inl:787
JPH_INLINE Vec3 DotV(Vec3Arg inV2) const
Dot product, returns the dot product in X, Y and Z components.
Definition Vec3.inl:605
static JPH_INLINE Vec3 sLoadFloat3Unsafe(const Float3 &inV)
Load 3 floats from memory (reads 32 bits extra which it doesn't use)
Definition Vec3.inl:134
JPH_INLINE float GetZ() const
Definition Vec3.h:125
JPH_INLINE Vec3 GetSign() const
Get vector that contains the sign of each element (returns 1.0f if positive, -1.0f if negative)
Definition Vec3.inl:826
static JPH_INLINE Vec3 sNaN()
Vector with all NaN's.
Definition Vec3.inl:129
Vec3()=default
Constructor.
JPH_INLINE int GetHighestComponentIndex() const
Get index of component with highest value.
Definition Vec3.inl:559
static JPH_INLINE Vec3 sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
Calculates inMul1 * inMul2 + inAdd.
Definition Vec3.inl:252
JPH_INLINE Vec3 Swizzle() const
Swizzle the elements in inV.
Definition Vec4.h:14
JPH_INLINE float GetX() const
Get individual components.
Definition Vec4.h:113
JPH_INLINE float GetY() const
Definition Vec4.h:114
static JPH_INLINE Vec4 sReplicate(float inV)
Replicate inV across all components.
Definition Vec4.inl:74
void SinCos(Vec4 &outSin, Vec4 &outCos) const
Calcluate the sine and cosine for each element of this vector (input in radians)
Definition Vec4.inl:767