Skip to content

Commit

Permalink
Removed path to g++ compiler
Browse files Browse the repository at this point in the history
Improved unit tests to prevent g++ from skipping loop iterations and only performing the final loop iteration which messed up the timing comparisons they were intended to show.
Added some "best case" and "worst case" versions to some of the tests to show fully inlined version of ref performs OK, but the worst case when all matrices are passed as parameters to a function it does not perform well.

	modified:   .gitignore
	modified:   unittest/InverseSxSH.cpp
	modified:   unittest/Makefile
	modified:   unittest/PartialPrecomputedSxSHxRxT.cpp
	modified:   unittest/PrecomputedSxSHxRxT.cpp
	modified:   unittest/StandardSxSHxPSxPSHxRxIPSHxIPSxTxP.cpp
	modified:   unittest/StandardSxSHxRxT.cpp
	modified:   unittest/TestValues.cpp
	modified:   unittest/TestValues.h
	modified:   unittest/ref/Matrix4x4.h
  • Loading branch information
AlexMWells committed Sep 8, 2015
1 parent 7198de3 commit 17cc398
Show file tree
Hide file tree
Showing 10 changed files with 361 additions and 53 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.o
*.d
*~
*.*~
testXbb
16 changes: 6 additions & 10 deletions unittest/InverseSxSH.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,10 @@ static const char * sInverseSxSHName = "Inverse S*SH";
TEST_CASE(sInverseSxSHName, "")
{
// time inverse S*SH with reference vs XBB
volatile TestValues tv = TestValues::get();

volatile const TestValues tv = TestValues::get();
const int repeatCount = 1000000;

ref::Matrix4x4 refFinalTransform;
// Compose a transform using traditional OO approach
double refTime = 0.0;
{
Expand All @@ -62,8 +61,7 @@ TEST_CASE(sInverseSxSHName, "")
ref::Matrix4x4 SH;
SH.makeShear3(tv.shearX, tv.shearY, tv.shearZ);

refFinalTransform = (S*SH).inverse();

*gRefFinalTransform = (S*SH).inverse();
}
}
}
Expand All @@ -76,21 +74,19 @@ TEST_CASE(sInverseSxSHName, "")
XBB_INLINE_BLOCK
{
for (int rep=0; rep < repeatCount; ++rep) {

xbb::Scale S(tv.scaleX, tv.scaleY, tv.scaleZ);
xbb::Shear3 SH(tv.shearX, tv.shearY, tv.shearZ);

(S*SH).inverse().to(xbbFinalTransform);

(S*SH).inverse().to(*gXbbFinalTransform);
}
}
}


std::cout << sInverseSxSHName << ":"<< std::endl;
// std::cout << "Ref Transform " << refFinalTransform << std::endl;
// std::cout << "xbb Transform " << xbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(xbbFinalTransform, refFinalTransform);
// std::cout << "Ref Transform " << gRefFinalTransform << std::endl;
// std::cout << "xbb Transform " << gXbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(*gXbbFinalTransform, *gRefFinalTransform);

std::cout << " ref Time " << refTime << std::endl;
std::cout << " xbb Time " << xbbTime << std::endl;
Expand Down
8 changes: 5 additions & 3 deletions unittest/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
#CXX=icpc
#LD=icpc

CXX=/opt/rh/devtoolset-2/root/usr/bin/g++
LD=/opt/rh/devtoolset-2/root/usr/bin/g++
#CXX=/opt/rh/devtoolset-2/root/usr/bin/g++
#LD=/opt/rh/devtoolset-2/root/usr/bin/g++
CXX=g++
LD=g++

RM=rm -rf
OUT_DIR=bin
Expand Down Expand Up @@ -121,4 +123,4 @@ clean:
-include $(AUTO_DEPS)


.DEFAULT_GOAL=all
.DEFAULT_GOAL=all
12 changes: 5 additions & 7 deletions unittest/PartialPrecomputedSxSHxRxT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ TEST_CASE(sPartialPrecomputedSxSHxRxTName, "")

const int repeatCount = 10000000;

ref::Matrix4x4 refFinalTransform;
// Compose a transform using traditional OO approach
double refTime = 0.0;
{
Expand Down Expand Up @@ -77,13 +76,12 @@ TEST_CASE(sPartialPrecomputedSxSHxRxTName, "")
ref::Matrix4x4 T;
T.makeTranslation(tv.translateX, tv.translateY, tv.translateZ);

refFinalTransform = S*SH*Rz*Ry*Rx*T;
*gRefFinalTransform = S*SH*Rz*Ry*Rx*T;
}
}
}

// Compose a transform using XBB
xbb::Matrix4x3 xbbFinalTransform;
double xbbTime = 0.0;
{

Expand All @@ -107,16 +105,16 @@ TEST_CASE(sPartialPrecomputedSxSHxRxTName, "")
xbb::Shear3 SH(tv.shearX, tv.shearY, tv.shearZ);
xbb::Translation T(tv.translateX, tv.translateY, tv.translateZ);

(S*SH*Rz*Ry*Rx*T).to(xbbFinalTransform);
(S*SH*Rz*Ry*Rx*T).to(*gXbbFinalTransform);
}
}
}


std::cout << sPartialPrecomputedSxSHxRxTName << ":"<< std::endl;
//std::cout << "Ref Transform " << refFinalTransform << std::endl;
//std::cout << "xbb Transform " << xbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(xbbFinalTransform, refFinalTransform);
//std::cout << "Ref Transform " << *gRefFinalTransform << std::endl;
//std::cout << "xbb Transform " << gXbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(*gXbbFinalTransform, *gRefFinalTransform);

std::cout << " ref Time " << refTime << std::endl;
std::cout << " xbb Time " << xbbTime << std::endl;
Expand Down
12 changes: 5 additions & 7 deletions unittest/PrecomputedSxSHxRxT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ TEST_CASE(sPrecomputedSxSHxRxTName, "")

const int repeatCount = 10000000;

ref::Matrix4x4 refFinalTransform;
// Compose a transform using traditional OO approach
// Precompute the individual components
// and just time how long it takes to concatenate them
Expand All @@ -116,14 +115,13 @@ TEST_CASE(sPrecomputedSxSHxRxTName, "")

for (int rep=0; rep < repeatCount; ++rep) {

refFinalTransform = (*S)*(*SH)*(*Rz)*(*Ry)*(*Rx)*(*T);
*gRefFinalTransform = (*S)*(*SH)*(*Rz)*(*Ry)*(*Rx)*(*T);

}
}
}

// Compose a transform using XBB
xbb::Matrix4x3 xbbFinalTransform;
double xbbTime = 0.0;
{

Expand All @@ -147,16 +145,16 @@ TEST_CASE(sPrecomputedSxSHxRxTName, "")
xbb::Shear3 SH(tv.shearX, tv.shearY, tv.shearZ);
xbb::Translation T(tv.translateX, tv.translateY, tv.translateZ);

(S*SH*Rz*Ry*Rx*T).to(xbbFinalTransform);
(S*SH*Rz*Ry*Rx*T).to(*gXbbFinalTransform);

}
}
}

std::cout << sPrecomputedSxSHxRxTName << ":"<< std::endl;
//std::cout << "Ref Transform " << refFinalTransform << std::endl;
//std::cout << "xbb Transform " << xbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(xbbFinalTransform, refFinalTransform);
//std::cout << "Ref Transform " << *gRefFinalTransform << std::endl;
//std::cout << "xbb Transform " << *gXbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(*gXbbFinalTransform, *gRefFinalTransform);

std::cout << " ref Time " << refTime << std::endl;
std::cout << " xbb Time " << xbbTime << std::endl;
Expand Down
190 changes: 177 additions & 13 deletions unittest/StandardSxSHxPSxPSHxRxIPSHxIPSxTxP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
using namespace xbb;


static const char * sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPName = "Standard S*SH*PS*PSH*R*IPSH*IPS*T*P";
TEST_CASE(sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPName, "")
static const char * sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPBestCaseName = "Standard S*SH*PS*PSH*R*IPSH*IPS*T*P (best case)";
TEST_CASE(sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPBestCaseName, "")
{
// time composing S*SH*R*T with reference vs XBB
volatile TestValues tv = TestValues::get();

const int repeatCount = 10000000;
//const int repeatCount = 1;

ref::Matrix4x4 refFinalTransform;
// Compose a transform using traditional OO approach
double refTime = 0.0;
{
Expand Down Expand Up @@ -82,20 +81,23 @@ TEST_CASE(sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPName, "")
ref::Matrix4x4 PSH;
PSH.makeShear3(tv.parentShearX, tv.parentShearY, tv.parentShearZ);

// const_cast is to remove the volatile
const ref::Matrix4x4 & P = const_cast<const ref::Matrix4x4 &>(tv.parentWorld);

// NOTE: if fully inlined, a compiler could do a pretty decent job
// taking advantage of compile time constants from the "make???" routines.

// Small optimization so we only have to call inverse once
ref::Matrix4x4 PSPSH = PS*PSH;
ref::Matrix4x4 IPSPSH = PSPSH.inverse();

ref::Matrix4x4 & P = *const_cast<ref::Matrix4x4 *>(&tv.parentWorld);

refFinalTransform = S*SH*PSPSH*Rz*Ry*Rx*IPSPSH*T*P;
//refFinalTransform = S*SH*PSPSH*Rz*Ry*Rx*T;
*gRefFinalTransform = S*SH*PSPSH*Rz*Ry*Rx*IPSPSH*T*P;
}
}
}

// Compose a transform using XBB
xbb::Matrix4x3 xbbFinalTransform;
double xbbTime = 0.0;
{
ScopedTimer timer(xbbTime);
Expand All @@ -116,20 +118,182 @@ TEST_CASE(sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPName, "")
auto PSPSH = PS*PSH;
auto IPSPSH = PSPSH.inverse();

xbb::ProxyMatrix4x3<ref::Matrix4x4> P(*const_cast<ref::Matrix4x4 *>(&tv.parentWorld));
// const_cast is to remove the volatile
const ref::Matrix4x4 & refP = const_cast<const ref::Matrix4x4 &>(tv.parentWorld);
const xbb::ProxyMatrix4x3<ref::Matrix4x4> P(refP);


(S*SH*PSPSH*Rz*Ry*Rx*IPSPSH*T*P).to(*gXbbFinalTransform);
}
}
}


std::cout << sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPBestCaseName << ":"<< std::endl;
//std::cout << "Ref Transform " << *gRefFinalTransform << std::endl;
//std::cout << "xbb Transform " << *gXbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(*gXbbFinalTransform, *gRefFinalTransform);

std::cout << " ref Time " << refTime << std::endl;
std::cout << " xbb Time " << xbbTime << std::endl;
std::cout << " speedup " << refTime/xbbTime << "x" << std::endl << std::endl;
}




namespace {

__attribute__((noinline)) void multiplySxSHxPSxPSHxRzxRyxRxxIPSPSHxT(
ref::Matrix4x4 & result,
const ref::Matrix4x4 & S,
const ref::Matrix4x4 & SH,
const ref::Matrix4x4 & Rz,
const ref::Matrix4x4 & Ry,
const ref::Matrix4x4 & Rx,
const ref::Matrix4x4 & T,
const ref::Matrix4x4 & PS,
const ref::Matrix4x4 & PSH,
const ref::Matrix4x4 & P
);

__attribute__((noinline)) void multiplySxSHxPSxPSHxRzxRyxRxxIPSPSHxT(
xbb::Matrix4x3 & result,
const xbb::Scale & S,
const xbb::Shear3 & SH,
const xbb::RotationZ & Rz,
const xbb::RotationY & Ry,
const xbb::RotationX & Rx,
const xbb::Translation & T,
const xbb::Scale & PS,
const xbb::Shear3 & PSH,
const xbb::ProxyMatrix4x3<ref::Matrix4x4> & P
);

void multiplySxSHxPSxPSHxRzxRyxRxxIPSPSHxT(
ref::Matrix4x4 & result,
const ref::Matrix4x4 & S,
const ref::Matrix4x4 & SH,
const ref::Matrix4x4 & Rz,
const ref::Matrix4x4 & Ry,
const ref::Matrix4x4 & Rx,
const ref::Matrix4x4 & T,
const ref::Matrix4x4 & PS,
const ref::Matrix4x4 & PSH,
const ref::Matrix4x4 & P)
{
// Small optimization so we only have to call inverse once
ref::Matrix4x4 PSPSH = PS*PSH;
ref::Matrix4x4 IPSPSH = PSPSH.inverse();

result = S*SH*PSPSH*Rz*Ry*Rx*IPSPSH*T*P;
}


void multiplySxSHxPSxPSHxRzxRyxRxxIPSPSHxT(
xbb::Matrix4x3 & result,
const xbb::Scale & S,
const xbb::Shear3 & SH,
const xbb::RotationZ & Rz,
const xbb::RotationY & Ry,
const xbb::RotationX & Rx,
const xbb::Translation & T,
const xbb::Scale & PS,
const xbb::Shear3 & PSH,
const xbb::ProxyMatrix4x3<ref::Matrix4x4> & P)
{
auto PSPSH = PS*PSH;
auto IPSPSH = PSPSH.inverse();

(S*SH*PSPSH*Rz*Ry*Rx*IPSPSH*T*P).to(result);
}

}


static const char * sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPWorstCaseName = "Standard S*SH*PS*PSH*R*IPSH*IPS*T*P (worst case)";
TEST_CASE(sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPWorstCaseName, "")
{
// time composing S*SH*R*T with reference vs XBB
volatile TestValues tv = TestValues::get();

const int repeatCount = 10000000;
//const int repeatCount = 1;

// Compose a transform using traditional OO approach
double refTime = 0.0;
{
ScopedTimer timer(refTime);
for (int rep=0; rep < repeatCount; ++rep) {
XBB_INLINE_BLOCK
{
ref::Matrix4x4 S;
S.makeScale(tv.scaleX, tv.scaleY, tv.scaleZ);

ref::Matrix4x4 SH;
SH.makeShear3(tv.shearX, tv.shearY, tv.shearZ);

ref::Matrix4x4 Rx;
Rx.makeRotationX(tv.rotX);

ref::Matrix4x4 Ry;
Ry.makeRotationY(tv.rotY);

ref::Matrix4x4 Rz;
Rz.makeRotationZ(tv.rotZ);

ref::Matrix4x4 T;
T.makeTranslation(tv.translateX, tv.translateY, tv.translateZ);

ref::Matrix4x4 PS;
PS.makeScale(tv.parentScaleX, tv.parentScaleY, tv.parentScaleZ);

ref::Matrix4x4 PSH;
PSH.makeShear3(tv.parentShearX, tv.parentShearY, tv.parentShearZ);

// const_cast is to remove the volatile
const ref::Matrix4x4 & P = const_cast<const ref::Matrix4x4 &>(tv.parentWorld);

// NOTE: what we found in practice is 4x4 matrices being passed to functions
// where the optimizer doesn't know the 0's or 1's.
// So this represents a worst case scenario.
multiplySxSHxPSxPSHxRzxRyxRxxIPSPSHxT(*gRefFinalTransform, S, SH, Rz, Ry, Rx, T, PS , PSH, P);
}
}
}

// Compose a transform using XBB
double xbbTime = 0.0;
{
ScopedTimer timer(xbbTime);
for (int rep=0; rep < repeatCount; ++rep) {

(S*SH*PSPSH*Rz*Ry*Rx*IPSPSH*T*P).to(xbbFinalTransform);
XBB_INLINE_BLOCK
{
xbb::Scale S(tv.scaleX, tv.scaleY, tv.scaleZ);
xbb::Shear3 SH(tv.shearX, tv.shearY, tv.shearZ);
xbb::RotationX Rx(tv.rotX);
xbb::RotationY Ry(tv.rotY);
xbb::RotationZ Rz(tv.rotZ);
xbb::Translation T(tv.translateX, tv.translateY, tv.translateZ);

xbb::Scale PS(tv.parentScaleX, tv.parentScaleY, tv.parentScaleZ);
xbb::Shear3 PSH(tv.parentShearX, tv.parentShearY, tv.parentShearZ);

// const_cast is to remove the volatile
const ref::Matrix4x4 & refP = const_cast<const ref::Matrix4x4 &>(tv.parentWorld);
const xbb::ProxyMatrix4x3<ref::Matrix4x4> P(refP);

multiplySxSHxPSxPSHxRzxRyxRxxIPSPSHxT(*gXbbFinalTransform, S, SH, Rz, Ry, Rx, T, PS , PSH, P);
}
}
}


std::cout << sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPName << ":"<< std::endl;
//std::cout << "Ref Transform " << refFinalTransform << std::endl;
//std::cout << "xbb Transform " << xbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(xbbFinalTransform, refFinalTransform);
std::cout << sStandardSxSHxPSxPSHxRxIPSHxIPSxTxPWorstCaseName << ":"<< std::endl;
//std::cout << "Ref Transform " << *gRefFinalTransform << std::endl;
//std::cout << "xbb Transform " << *gXbbFinalTransform << std::endl;
TRANSFORMS_ARE_CLOSE(*gXbbFinalTransform, *gRefFinalTransform);

std::cout << " ref Time " << refTime << std::endl;
std::cout << " xbb Time " << xbbTime << std::endl;
Expand Down
Loading

0 comments on commit 17cc398

Please sign in to comment.