[go: up one dir, main page]

Vector4f result = a1*v1 + a2*v2 compiled like ass by eigen3; much better with eigen2

Submitted by Benoit Jacob

Assigned to Gael Guennebaud @ggael

Link to original bugzilla bug (#203)
Operating system: Linux

Description

This test program:

#include <Eigen/Core>
using namespace Eigen;
void foo(float a1, const Vector4f& v1,
float a2, const Vector4f& v2,
Vector4f& result)
{
asm volatile("#begin");
result = a1v1 + a2v2;
asm volatile("#end");
}

compiled like this with eigen3 and gcc 4.4.5 x86-64 linux:
$ g++ -c -S -O2 -I eigen derf.cpp -DNDEBUG -o derf.s

gives this crappy assembly:

#APP

9 "derf.cpp" 1

    #begin  

0 "" 2

#NO_APP
xorps %xmm2, %xmm2
movss %xmm1, %xmm2
pshufd $0, %xmm2, %xmm1
xorps %xmm2, %xmm2
mulps (%rsi), %xmm1
movss %xmm0, %xmm2
pshufd $0, %xmm2, %xmm0
mulps (%rdi), %xmm0
addps %xmm1, %xmm0
movaps %xmm0, (%rdx)
#APP

11 "derf.cpp" 1

    #end  

0 "" 2

#NO_APP

while with eigen2, it gives this good assembly:

#APP

9 "derf.cpp" 1

    #begin  

0 "" 2

#NO_APP
shufps $0, %xmm1, %xmm1
shufps $0, %xmm0, %xmm0
mulps (%rsi), %xmm1
mulps (%rdi), %xmm0
addps %xmm1, %xmm0
movaps %xmm0, (%rdx)
#APP

11 "derf.cpp" 1

    #end  

0 "" 2

#NO_APP

Blocking

#25 (closed)

Edited by Eigen Bugzilla