product_notemporary.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #define TEST_ENABLE_TEMPORARY_TRACKING
11 
12 #include "main.h"
13 
14 template<typename Dst, typename Lhs, typename Rhs>
15 void check_scalar_multiple3(Dst &dst, const Lhs& A, const Rhs& B)
16 {
17  VERIFY_EVALUATION_COUNT( (dst.noalias() = A * B), 0);
18  VERIFY_IS_APPROX( dst, (A.eval() * B.eval()).eval() );
19  VERIFY_EVALUATION_COUNT( (dst.noalias() += A * B), 0);
20  VERIFY_IS_APPROX( dst, 2*(A.eval() * B.eval()).eval() );
21  VERIFY_EVALUATION_COUNT( (dst.noalias() -= A * B), 0);
22  VERIFY_IS_APPROX( dst, (A.eval() * B.eval()).eval() );
23 }
24 
25 template<typename Dst, typename Lhs, typename Rhs, typename S2>
26 void check_scalar_multiple2(Dst &dst, const Lhs& A, const Rhs& B, S2 s2)
27 {
29  CALL_SUBTEST( check_scalar_multiple3(dst, A, -B) );
30  CALL_SUBTEST( check_scalar_multiple3(dst, A, s2*B) );
31  CALL_SUBTEST( check_scalar_multiple3(dst, A, B*s2) );
32  CALL_SUBTEST( check_scalar_multiple3(dst, A, (B*s2).conjugate()) );
33 }
34 
35 template<typename Dst, typename Lhs, typename Rhs, typename S1, typename S2>
36 void check_scalar_multiple1(Dst &dst, const Lhs& A, const Rhs& B, S1 s1, S2 s2)
37 {
38  CALL_SUBTEST( check_scalar_multiple2(dst, A, B, s2) );
39  CALL_SUBTEST( check_scalar_multiple2(dst, -A, B, s2) );
40  CALL_SUBTEST( check_scalar_multiple2(dst, s1*A, B, s2) );
41  CALL_SUBTEST( check_scalar_multiple2(dst, A*s1, B, s2) );
42  CALL_SUBTEST( check_scalar_multiple2(dst, (A*s1).conjugate(), B, s2) );
43 }
44 
45 template<typename MatrixType> void product_notemporary(const MatrixType& m)
46 {
47  /* This test checks the number of temporaries created
48  * during the evaluation of a complex expression */
49  typedef typename MatrixType::Scalar Scalar;
50  typedef typename MatrixType::RealScalar RealScalar;
51  typedef Matrix<Scalar, 1, Dynamic> RowVectorType;
52  typedef Matrix<Scalar, Dynamic, 1> ColVectorType;
53  typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> ColMajorMatrixType;
54  typedef Matrix<Scalar, Dynamic, Dynamic, RowMajor> RowMajorMatrixType;
55 
56  Index rows = m.rows();
57  Index cols = m.cols();
58 
59  ColMajorMatrixType m1 = MatrixType::Random(rows, cols),
60  m2 = MatrixType::Random(rows, cols),
61  m3(rows, cols);
62  RowVectorType rv1 = RowVectorType::Random(rows), rvres(rows);
63  ColVectorType cv1 = ColVectorType::Random(cols), cvres(cols);
64  RowMajorMatrixType rm3(rows, cols);
65 
66  Scalar s1 = internal::random<Scalar>(),
67  s2 = internal::random<Scalar>(),
68  s3 = internal::random<Scalar>();
69 
70  Index c0 = internal::random<Index>(4,cols-8),
71  c1 = internal::random<Index>(8,cols-c0),
72  r0 = internal::random<Index>(4,cols-8),
73  r1 = internal::random<Index>(8,rows-r0);
74 
75  VERIFY_EVALUATION_COUNT( m3 = (m1 * m2.adjoint()), 1);
76  VERIFY_EVALUATION_COUNT( m3 = (m1 * m2.adjoint()).transpose(), 1);
77  VERIFY_EVALUATION_COUNT( m3.noalias() = m1 * m2.adjoint(), 0);
78 
79  VERIFY_EVALUATION_COUNT( m3 = s1 * (m1 * m2.transpose()), 1);
80 // VERIFY_EVALUATION_COUNT( m3 = m3 + s1 * (m1 * m2.transpose()), 1);
81  VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0);
82 
83  VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()), 1);
84  VERIFY_EVALUATION_COUNT( m3 = m3 - (m1 * m2.adjoint()), 1);
85 
86  VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()).transpose(), 1);
87  VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0);
88  VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0);
89  VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0);
90  VERIFY_EVALUATION_COUNT( m3.noalias() = m3 - m1 * m2.transpose(), 0);
91  VERIFY_EVALUATION_COUNT( m3.noalias() += m3 - m1 * m2.transpose(), 0);
92  VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 - m1 * m2.transpose(), 0);
93 
94  VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * m2.adjoint(), 0);
95  VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * (m1*s3+m2*s2).adjoint(), 1);
96  VERIFY_EVALUATION_COUNT( m3.noalias() = (s1 * m1).adjoint() * s2 * m2, 0);
97  VERIFY_EVALUATION_COUNT( m3.noalias() += s1 * (-m1*s3).adjoint() * (s2 * m2 * s3), 0);
98  VERIFY_EVALUATION_COUNT( m3.noalias() -= s1 * (m1.transpose() * m2), 0);
99 
100  VERIFY_EVALUATION_COUNT(( m3.block(r0,r0,r1,r1).noalias() += -m1.block(r0,c0,r1,c1) * (s2*m2.block(r0,c0,r1,c1)).adjoint() ), 0);
101  VERIFY_EVALUATION_COUNT(( m3.block(r0,r0,r1,r1).noalias() -= s1 * m1.block(r0,c0,r1,c1) * m2.block(c0,r0,c1,r1) ), 0);
102 
103  // NOTE this is because the Block expression is not handled yet by our expression analyser
104  VERIFY_EVALUATION_COUNT(( m3.block(r0,r0,r1,r1).noalias() = s1 * m1.block(r0,c0,r1,c1) * (s1*m2).block(c0,r0,c1,r1) ), 1);
105 
106  VERIFY_EVALUATION_COUNT( m3.noalias() -= (s1 * m1).template triangularView<Lower>() * m2, 0);
107  VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView<Upper>() * (m2+m2), 1);
108  VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView<UnitUpper>() * m2.adjoint(), 0);
109 
110  VERIFY_EVALUATION_COUNT( m3.template triangularView<Upper>() = (m1 * m2.adjoint()), 0);
111  VERIFY_EVALUATION_COUNT( m3.template triangularView<Upper>() -= (m1 * m2.adjoint()), 0);
112 
113  // NOTE this is because the blas_traits require innerstride==1 to avoid a temporary, but that doesn't seem to be actually needed for the triangular products
114  VERIFY_EVALUATION_COUNT( rm3.col(c0).noalias() = (s1 * m1.adjoint()).template triangularView<UnitUpper>() * (s2*m2.row(c0)).adjoint(), 1);
115 
116  VERIFY_EVALUATION_COUNT( m1.template triangularView<Lower>().solveInPlace(m3), 0);
117  VERIFY_EVALUATION_COUNT( m1.adjoint().template triangularView<Lower>().solveInPlace(m3.transpose()), 0);
118 
119  VERIFY_EVALUATION_COUNT( m3.noalias() -= (s1 * m1).adjoint().template selfadjointView<Lower>() * (-m2*s3).adjoint(), 0);
120  VERIFY_EVALUATION_COUNT( m3.noalias() = s2 * m2.adjoint() * (s1 * m1.adjoint()).template selfadjointView<Upper>(), 0);
121  VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template selfadjointView<Lower>() * m2.adjoint(), 0);
122 
123  // NOTE this is because the blas_traits require innerstride==1 to avoid a temporary, but that doesn't seem to be actually needed for the triangular products
124  VERIFY_EVALUATION_COUNT( m3.col(c0).noalias() = (s1 * m1).adjoint().template selfadjointView<Lower>() * (-m2.row(c0)*s3).adjoint(), 1);
125  VERIFY_EVALUATION_COUNT( m3.col(c0).noalias() -= (s1 * m1).adjoint().template selfadjointView<Upper>() * (-m2.row(c0)*s3).adjoint(), 1);
126 
127  VERIFY_EVALUATION_COUNT( m3.block(r0,c0,r1,c1).noalias() += m1.block(r0,r0,r1,r1).template selfadjointView<Upper>() * (s1*m2.block(r0,c0,r1,c1)), 0);
128  VERIFY_EVALUATION_COUNT( m3.block(r0,c0,r1,c1).noalias() = m1.block(r0,r0,r1,r1).template selfadjointView<Upper>() * m2.block(r0,c0,r1,c1), 0);
129 
130  VERIFY_EVALUATION_COUNT( m3.template selfadjointView<Lower>().rankUpdate(m2.adjoint()), 0);
131 
132  // Here we will get 1 temporary for each resize operation of the lhs operator; resize(r1,c1) would lead to zero temporaries
133  m3.resize(1,1);
134  VERIFY_EVALUATION_COUNT( m3.noalias() = m1.block(r0,r0,r1,r1).template selfadjointView<Lower>() * m2.block(r0,c0,r1,c1), 1);
135  m3.resize(1,1);
136  VERIFY_EVALUATION_COUNT( m3.noalias() = m1.block(r0,r0,r1,r1).template triangularView<UnitUpper>() * m2.block(r0,c0,r1,c1), 1);
137 
138  // Zero temporaries for lazy products ...
139  m3.setRandom(rows,cols);
140  VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) / (m3.transpose().lazyProduct(m3)).diagonal().sum(), 0 );
141  VERIFY_EVALUATION_COUNT( m3.noalias() = m1.conjugate().lazyProduct(m2.conjugate()), 0);
142 
143  // ... and even no temporary for even deeply (>=2) nested products
144  VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) / (m3.transpose() * m3).diagonal().sum(), 0 );
145  VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) / (m3.transpose() * m3).diagonal().array().abs().sum(), 0 );
146 
147  // Zero temporaries for ... CoeffBasedProductMode
148  VERIFY_EVALUATION_COUNT( m3.col(0).template head<5>() * m3.col(0).transpose() + m3.col(0).template head<5>() * m3.col(0).transpose(), 0 );
149 
150  // Check matrix * vectors
151  VERIFY_EVALUATION_COUNT( cvres.noalias() = m1 * cv1, 0 );
152  VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * cv1, 0 );
153  VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.col(0), 0 );
154  VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * rv1.adjoint(), 0 );
155  VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.row(0).transpose(), 0 );
156 
157  VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * cv1, 0 );
158  VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * cv1, 0 );
159  VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * (m1*cv1), 1 );
160  VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * (m1*cv1), 1 );
161 
162  // Check outer products
163  #ifdef EIGEN_ALLOCA
164  bool temp_via_alloca = m3.rows()*sizeof(Scalar) <= EIGEN_STACK_ALLOCATION_LIMIT;
165  #else
166  bool temp_via_alloca = false;
167  #endif
168  m3 = cv1 * rv1;
169  VERIFY_EVALUATION_COUNT( m3.noalias() = cv1 * rv1, 0 );
170  VERIFY_EVALUATION_COUNT( m3.noalias() = (cv1+cv1) * (rv1+rv1), temp_via_alloca ? 0 : 1 );
171  VERIFY_EVALUATION_COUNT( m3.noalias() = (m1*cv1) * (rv1), 1 );
172  VERIFY_EVALUATION_COUNT( m3.noalias() += (m1*cv1) * (rv1), 1 );
173  rm3 = cv1 * rv1;
174  VERIFY_EVALUATION_COUNT( rm3.noalias() = cv1 * rv1, 0 );
175  VERIFY_EVALUATION_COUNT( rm3.noalias() = (cv1+cv1) * (rv1+rv1), temp_via_alloca ? 0 : 1 );
176  VERIFY_EVALUATION_COUNT( rm3.noalias() = (cv1) * (rv1 * m1), 1 );
177  VERIFY_EVALUATION_COUNT( rm3.noalias() -= (cv1) * (rv1 * m1), 1 );
178  VERIFY_EVALUATION_COUNT( rm3.noalias() = (m1*cv1) * (rv1 * m1), 2 );
179  VERIFY_EVALUATION_COUNT( rm3.noalias() += (m1*cv1) * (rv1 * m1), 2 );
180 
181  // Check nested products
182  VERIFY_EVALUATION_COUNT( cvres.noalias() = m1.adjoint() * m1 * cv1, 1 );
183  VERIFY_EVALUATION_COUNT( rvres.noalias() = rv1 * (m1 * m2.adjoint()), 1 );
184 
185  // exhaustively check all scalar multiple combinations:
186  {
187  // Generic path:
188  check_scalar_multiple1(m3, m1, m2, s1, s2);
189  // Force fall back to coeff-based:
190  typename ColMajorMatrixType::BlockXpr m3_blck = m3.block(r0,r0,1,1);
191  check_scalar_multiple1(m3_blck, m1.block(r0,c0,1,1), m2.block(c0,r0,1,1), s1, s2);
192  }
193 }
194 
196 {
197  int s;
198  for(int i = 0; i < g_repeat; i++) {
199  s = internal::random<int>(16,EIGEN_TEST_MAX_SIZE);
200  CALL_SUBTEST_1( product_notemporary(MatrixXf(s, s)) );
201  CALL_SUBTEST_2( product_notemporary(MatrixXd(s, s)) );
203 
204  s = internal::random<int>(16,EIGEN_TEST_MAX_SIZE/2);
205  CALL_SUBTEST_3( product_notemporary(MatrixXcf(s,s)) );
206  CALL_SUBTEST_4( product_notemporary(MatrixXcd(s,s)) );
208  }
209 }
Matrix< SCALARB, Dynamic, Dynamic, opt_B > B
Definition: bench_gemm.cpp:49
Matrix3f m
SCALAR Scalar
Definition: bench_gemm.cpp:46
void product_notemporary(const MatrixType &m)
#define CALL_SUBTEST_4(FUNC)
m m block(1, 0, 2, 2)<< 4
void adjoint(const MatrixType &m)
Definition: adjoint.cpp:67
Symmetric< 2 > S2
Definition: testGroup.cpp:80
#define CALL_SUBTEST_3(FUNC)
MatrixType m2(n_dims)
void check_scalar_multiple3(Dst &dst, const Lhs &A, const Rhs &B)
void diagonal(const MatrixType &m)
Definition: diagonal.cpp:12
EIGEN_DECLARE_TEST(product_notemporary)
Block< Derived > BlockXpr
Definition: BlockMethods.h:32
MatrixXf MatrixType
Matrix< SCALARA, Dynamic, Dynamic, opt_A > A
Definition: bench_gemm.cpp:48
#define VERIFY_EVALUATION_COUNT(XPR, N)
#define VERIFY_IS_APPROX(a, b)
void check_scalar_multiple1(Dst &dst, const Lhs &A, const Rhs &B, S1 s1, S2 s2)
#define CALL_SUBTEST_1(FUNC)
Matrix3d m1
Definition: IOFormat.cpp:2
static int g_repeat
Definition: main.h:169
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
RealScalar s
EIGEN_DEVICE_FUNC ConjugateReturnType conjugate() const
NumTraits< Scalar >::Real RealScalar
Definition: bench_gemm.cpp:47
void check_scalar_multiple2(Dst &dst, const Lhs &A, const Rhs &B, S2 s2)
#define TEST_SET_BUT_UNUSED_VARIABLE(X)
Definition: main.h:121
static const double r1
#define CALL_SUBTEST(FUNC)
Definition: main.h:399
#define EIGEN_TEST_MAX_SIZE
A triangularView< Lower >().adjoint().solveInPlace(B)
#define EIGEN_STACK_ALLOCATION_LIMIT
Definition: Macros.h:54
#define CALL_SUBTEST_2(FUNC)
internal::nested_eval< T, 1 >::type eval(const T &xpr)
The matrix class, also used for vectors and row-vectors.


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:35:18