forked from pelletier/vector3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vector3.h
127 lines (108 loc) · 5.17 KB
/
vector3.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#pragma once
#include <smmintrin.h>
#include <iostream>
#include <cstdlib>
#if __APPLE__
# include <stdlib.h>
#else
# include <malloc.h>
#endif
// __m128 bits mask to target the floating point sign bit.
static const __m128 SIGNMASK = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
/**
** 16-bytes aligned memory allocation function.
** \param size Size of the memory chunk to allocate in bytes.
** \return A pointer to the newly aligned memory, or nullptr.
*/
void* malloc_simd(const size_t size);
/**
** 16-bytes aligned memory free function.
** \param v Memory pointer to free, which must have been allocated using
** malloc_simd.
*/
void free_simd(void* v);
/**
** 3D floating-point precission mathematical vector class.
*/
#ifdef __GNUC__
class __attribute__((aligned (16))) Vector3
#else
_MM_ALIGN16 class Vector3
#endif
{
public:
/// Constructors
inline Vector3() : mmvalue(_mm_setzero_ps()) {}
inline Vector3(float x, float y, float z) : mmvalue(_mm_set_ps(0, z, y, x)) {}
inline Vector3(__m128 m) : mmvalue(m) {}
/// Arithmetic operators with Vector3
inline Vector3 operator+(const Vector3& b) const { return _mm_add_ps(mmvalue, b.mmvalue); }
inline Vector3 operator-(const Vector3& b) const { return _mm_sub_ps(mmvalue, b.mmvalue); }
inline Vector3 operator*(const Vector3& b) const { return _mm_mul_ps(mmvalue, b.mmvalue); }
inline Vector3 operator/(const Vector3& b) const { return _mm_div_ps(mmvalue, b.mmvalue); }
/// Assignation and arithmetic operators with Vector3
inline Vector3& operator+=(const Vector3& b) { mmvalue = _mm_add_ps(mmvalue, b.mmvalue); return *this; }
inline Vector3& operator-=(const Vector3& b) { mmvalue = _mm_sub_ps(mmvalue, b.mmvalue); return *this; }
inline Vector3& operator*=(const Vector3& b) { mmvalue = _mm_mul_ps(mmvalue, b.mmvalue); return *this; }
inline Vector3& operator/=(const Vector3& b) { mmvalue = _mm_div_ps(mmvalue, b.mmvalue); return *this; }
/// Arithmetic operators with floats
inline Vector3 operator+(float b) const { return _mm_add_ps(mmvalue, _mm_set1_ps(b)); }
inline Vector3 operator-(float b) const { return _mm_sub_ps(mmvalue, _mm_set1_ps(b)); }
inline Vector3 operator*(float b) const { return _mm_mul_ps(mmvalue, _mm_set1_ps(b)); }
inline Vector3 operator/(float b) const { return _mm_div_ps(mmvalue, _mm_set1_ps(b)); }
/// Assignation and arithmetic operators with float
inline Vector3& operator+=(float b) { mmvalue = _mm_add_ps(mmvalue, _mm_set1_ps(b)); return *this; }
inline Vector3& operator-=(float b) { mmvalue = _mm_sub_ps(mmvalue, _mm_set1_ps(b)); return *this; }
inline Vector3& operator*=(float b) { mmvalue = _mm_mul_ps(mmvalue, _mm_set1_ps(b)); return *this; }
inline Vector3& operator/=(float b) { mmvalue = _mm_div_ps(mmvalue, _mm_set1_ps(b)); return *this; }
/// Equality operators
inline bool operator==(const Vector3& b) const {
return (((_mm_movemask_ps(_mm_cmpeq_ps(mmvalue, b.mmvalue))) & 0x7) == 0x7);
}
inline bool operator!=(const Vector3& b) const { return !(*this == b); }
/// Unary minus operator
inline Vector3 operator-() const { return _mm_xor_ps(mmvalue, SIGNMASK); }
/// Subscript operator
// Note: there is not bound checking here.
inline const float& operator[](const int i) const {
return i == 0 ? this->x : (i == 1 ? this->y : this->z);
}
inline float& operator[](const int i) {
return i == 0 ? this->x : (i == 1 ? this->y : this->z);
}
/// Cross product
inline Vector3 cross(const Vector3& b) const {
return _mm_sub_ps(
_mm_mul_ps(_mm_shuffle_ps(mmvalue, mmvalue, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(b.mmvalue, b.mmvalue, _MM_SHUFFLE(3, 1, 0, 2))),
_mm_mul_ps(_mm_shuffle_ps(mmvalue, mmvalue, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(b.mmvalue, b.mmvalue, _MM_SHUFFLE(3, 0, 2, 1)))
);
}
/// Dot product
inline float dot(const Vector3& b) const { return _mm_cvtss_f32(_mm_dp_ps(mmvalue, b.mmvalue, 0x71)); }
/// Length of the vector
inline float length() const { return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mmvalue, mmvalue, 0x71))); }
/// Returns the normalized vector
inline Vector3 normalize() const {
// multiplying by rsqrt does not yield an accurate enough result, so we
// divide by sqrt instead.
return _mm_div_ps(mmvalue, _mm_sqrt_ps(_mm_dp_ps(mmvalue, mmvalue, 0xFF)));
}
/// Overloaded operators that ensure alignment
inline void* operator new[](size_t x) { return malloc_simd(x); }
inline void operator delete[](void* x) { if (x) free_simd(x); }
/// Textual representation
friend std::ostream& operator<<(std::ostream& os, const Vector3& t);
/// Direct access member variables.
union
{
struct { float x, y, z; };
__m128 mmvalue;
};
};
/**
** External operators that maps to the actual Vector3 method.
*/
inline Vector3 operator+(float a, const Vector3& b) { return b + a; }
inline Vector3 operator-(float a, const Vector3& b) { return Vector3(_mm_set1_ps(a)) - b; }
inline Vector3 operator*(float a, const Vector3& b) { return b * a; }
inline Vector3 operator/(float a, const Vector3& b) { return Vector3(_mm_set1_ps(a)) / b; }