MatrixProductCommon.h
Go to the documentation of this file.
1 //#define EIGEN_POWER_USE_PREFETCH // Use prefetching in gemm routines
2 #ifdef EIGEN_POWER_USE_PREFETCH
3 #define EIGEN_POWER_PREFETCH(p) prefetch(p)
4 #else
5 #define EIGEN_POWER_PREFETCH(p)
6 #endif
7 
8 namespace Eigen {
9 
10 namespace internal {
11 
12 template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows>
14  const DataMapper& res,
15  const Scalar* lhs_base,
16  const Scalar* rhs_base,
17  Index depth,
18  Index strideA,
19  Index offsetA,
20  Index row,
21  Index col,
22  Index remaining_rows,
23  Index remaining_cols,
24  const Packet& pAlpha);
25 
26 template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accRows, const Index accCols>
28  const DataMapper& res,
29  const Scalar* lhs_base,
30  const Scalar* rhs_base,
31  Index depth,
32  Index strideA,
33  Index offsetA,
34  Index row,
35  Index col,
36  Index rows,
37  Index cols,
38  Index remaining_rows,
39  const Packet& pAlpha,
40  const Packet& pMask);
41 
42 template<typename Scalar, typename Packet, typename DataMapper, typename Index, const Index accCols>
44  const DataMapper& res,
45  const Scalar* lhs_base,
46  const Scalar* rhs_base,
47  Index depth,
48  Index strideA,
49  Index offsetA,
50  Index& row,
51  Index rows,
52  Index col,
53  Index remaining_cols,
54  const Packet& pAlpha);
55 
56 template<typename Packet>
57 EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows);
58 
59 template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
61  const DataMapper& res,
62  const Scalar* lhs_base,
63  const Scalar* rhs_base,
64  Index depth,
65  Index strideA,
66  Index offsetA,
67  Index strideB,
68  Index row,
69  Index col,
70  Index remaining_rows,
71  Index remaining_cols,
72  const Packet& pAlphaReal,
73  const Packet& pAlphaImag);
74 
75 template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
77  const DataMapper& res,
78  const Scalar* lhs_base,
79  const Scalar* rhs_base,
80  Index depth,
81  Index strideA,
82  Index offsetA,
83  Index strideB,
84  Index row,
85  Index col,
86  Index rows,
87  Index cols,
88  Index remaining_rows,
89  const Packet& pAlphaReal,
90  const Packet& pAlphaImag,
91  const Packet& pMask);
92 
93 template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
95  const DataMapper& res,
96  const Scalar* lhs_base,
97  const Scalar* rhs_base,
98  Index depth,
99  Index strideA,
100  Index offsetA,
101  Index strideB,
102  Index& row,
103  Index rows,
104  Index col,
105  Index remaining_cols,
106  const Packet& pAlphaReal,
107  const Packet& pAlphaImag);
108 
109 template<typename Scalar, typename Packet>
111 
112 template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
113 EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,4>& acc, const DataMapper& res, Index row, Index col);
114 
115 template<typename DataMapper, typename Packet, typename Index, const Index accCols, int N, int StorageOrder>
116 EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,8>& acc, const DataMapper& res, Index row, Index col);
117 
118 template<typename Packet>
119 EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,4>& acc, PacketBlock<Packet,4>& accZ, const Packet& pAlpha);
120 
121 template<typename Packet, int N>
122 EIGEN_ALWAYS_INLINE void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag, const Packet& bReal, const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag);
123 
124 const static Packet16uc p16uc_SETCOMPLEX32_FIRST = { 0, 1, 2, 3,
125  16, 17, 18, 19,
126  4, 5, 6, 7,
127  20, 21, 22, 23};
128 
129 const static Packet16uc p16uc_SETCOMPLEX32_SECOND = { 8, 9, 10, 11,
130  24, 25, 26, 27,
131  12, 13, 14, 15,
132  28, 29, 30, 31};
133 //[a,b],[ai,bi] = [a,ai] - This is equivalent to p16uc_GETREAL64
134 const static Packet16uc p16uc_SETCOMPLEX64_FIRST = { 0, 1, 2, 3, 4, 5, 6, 7,
135  16, 17, 18, 19, 20, 21, 22, 23};
136 
137 //[a,b],[ai,bi] = [b,bi] - This is equivalent to p16uc_GETIMAG64
138 const static Packet16uc p16uc_SETCOMPLEX64_SECOND = { 8, 9, 10, 11, 12, 13, 14, 15,
139  24, 25, 26, 27, 28, 29, 30, 31};
140 
141 
142 // Grab two decouples real/imaginary PacketBlocks and return two coupled (real/imaginary pairs) PacketBlocks.
143 template<typename Packet, typename Packetc>
145 {
146  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);
147  acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_FIRST);
148  acc1.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_FIRST);
149  acc1.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_FIRST);
150 
151  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);
152  acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX32_SECOND);
153  acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX32_SECOND);
154  acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX32_SECOND);
155 }
156 
157 template<typename Packet, typename Packetc>
159 {
160  bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);
161 
162  acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
163  acc1.packet[1] = padd<Packetc>(tRes.packet[1], acc1.packet[1]);
164  acc1.packet[2] = padd<Packetc>(tRes.packet[2], acc1.packet[2]);
165  acc1.packet[3] = padd<Packetc>(tRes.packet[3], acc1.packet[3]);
166 
167  acc2.packet[0] = padd<Packetc>(tRes.packet[4], acc2.packet[0]);
168  acc2.packet[1] = padd<Packetc>(tRes.packet[5], acc2.packet[1]);
169  acc2.packet[2] = padd<Packetc>(tRes.packet[6], acc2.packet[2]);
170  acc2.packet[3] = padd<Packetc>(tRes.packet[7], acc2.packet[3]);
171 }
172 
173 template<typename Packet, typename Packetc>
175 {
176  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_FIRST);
177 
178  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX32_SECOND);
179 }
180 
181 template<typename Packet, typename Packetc>
183 {
184  bcouple_common<Packet, Packetc>(taccReal, taccImag, acc1, acc2);
185 
186  acc1.packet[0] = padd<Packetc>(tRes.packet[0], acc1.packet[0]);
187 
188  acc2.packet[0] = padd<Packetc>(tRes.packet[1], acc2.packet[0]);
189 }
190 
191 template<>
193 {
194  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);
195  acc1.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_FIRST);
196  acc1.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_FIRST);
197  acc1.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_FIRST);
198 
199  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);
200  acc2.packet[1].v = vec_perm(taccReal.packet[1], taccImag.packet[1], p16uc_SETCOMPLEX64_SECOND);
201  acc2.packet[2].v = vec_perm(taccReal.packet[2], taccImag.packet[2], p16uc_SETCOMPLEX64_SECOND);
202  acc2.packet[3].v = vec_perm(taccReal.packet[3], taccImag.packet[3], p16uc_SETCOMPLEX64_SECOND);
203 }
204 
205 template<>
207 {
208  acc1.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_FIRST);
209 
210  acc2.packet[0].v = vec_perm(taccReal.packet[0], taccImag.packet[0], p16uc_SETCOMPLEX64_SECOND);
211 }
212 
213 // This is necessary because ploadRhs for double returns a pair of vectors when MMA is enabled.
214 template<typename Scalar, typename Packet>
216 {
217  return ploadu<Packet>(rhs);
218 }
219 
220 } // end namespace internal
221 } // end namespace Eigen
EIGEN_STRONG_INLINE void gemm_extra_col(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index row, Index col, Index remaining_rows, Index remaining_cols, const Packet &pAlpha)
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:932
SCALAR Scalar
Definition: bench_gemm.cpp:46
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
__vector unsigned char Packet16uc
static const Packet16uc p16uc_SETCOMPLEX32_SECOND
EIGEN_ALWAYS_INLINE Packet ploadRhs(const Scalar *rhs)
static double depth
static const Packet16uc p16uc_SETCOMPLEX64_SECOND
static const Packet16uc p16uc_SETCOMPLEX32_FIRST
EIGEN_ALWAYS_INLINE void bscalec(PacketBlock< Packet, N > &aReal, PacketBlock< Packet, N > &aImag, const Packet &bReal, const Packet &bImag, PacketBlock< Packet, N > &cReal, PacketBlock< Packet, N > &cImag)
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
EIGEN_ALWAYS_INLINE void bcouple_common< Packet2d, Packet1cd >(PacketBlock< Packet2d, 4 > &taccReal, PacketBlock< Packet2d, 4 > &taccImag, PacketBlock< Packet1cd, 4 > &acc1, PacketBlock< Packet1cd, 4 > &acc2)
EIGEN_STRONG_INLINE void gemm_complex_extra_row(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index col, Index rows, Index cols, Index remaining_rows, const Packet &pAlphaReal, const Packet &pAlphaImag, const Packet &pMask)
EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
EIGEN_ALWAYS_INLINE void bcouple_common(PacketBlock< Packet, 4 > &taccReal, PacketBlock< Packet, 4 > &taccImag, PacketBlock< Packetc, 4 > &acc1, PacketBlock< Packetc, 4 > &acc2)
m row(1)
EIGEN_STRONG_INLINE void gemm_extra_row(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index row, Index col, Index rows, Index cols, Index remaining_rows, const Packet &pAlpha, const Packet &pMask)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
EIGEN_STRONG_INLINE void gemm_complex_extra_col(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index col, Index remaining_rows, Index remaining_cols, const Packet &pAlphaReal, const Packet &pAlphaImag)
static const Packet16uc p16uc_SETCOMPLEX64_FIRST
EIGEN_ALWAYS_INLINE void bload(PacketBlock< Packet, 4 > &acc, const DataMapper &res, Index row, Index col)
EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar *lhs)
EIGEN_STRONG_INLINE void gemm_complex_unrolled_col(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index &row, Index rows, Index col, Index remaining_cols, const Packet &pAlphaReal, const Packet &pAlphaImag)
EIGEN_ALWAYS_INLINE void bscale(PacketBlock< Packet, 4 > &acc, PacketBlock< Packet, 4 > &accZ, const Packet &pAlpha)
m col(1)
EIGEN_ALWAYS_INLINE void bcouple(PacketBlock< Packet, 4 > &taccReal, PacketBlock< Packet, 4 > &taccImag, PacketBlock< Packetc, 8 > &tRes, PacketBlock< Packetc, 4 > &acc1, PacketBlock< Packetc, 4 > &acc2)
EIGEN_STRONG_INLINE void gemm_unrolled_col(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index &row, Index rows, Index col, Index remaining_cols, const Packet &pAlpha)


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:34:54