From: Rich Felker Date: Mon, 19 Mar 2012 13:00:30 +0000 (-0400) Subject: optimize exponential asm for i386 X-Git-Tag: v0.8.8~93 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=02db27d9deaee71b244c91e720ec819c74dab150;p=musl optimize exponential asm for i386 up to 30% faster exp2 by avoiding slow frndint and fscale functions. expm1 also takes a much more direct path for small arguments (the expected usage case). --- diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index f4769d59..76ab4d64 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -1,3 +1,37 @@ +.global expm1f +.type expm1f,@function +expm1f: + flds 4(%esp) + jmp 1f + +.global expm1l +.type expm1l,@function +expm1l: + fldt 4(%esp) + jmp 1f + +.global expm1 +.type expm1,@function +expm1: + fldl 4(%esp) +1: fldl2e + fmulp + fld1 + fld %st(1) + fabs + fucom %st(1) + fnstsw %ax + fstp %st(0) + fstp %st(0) + sahf + ja 1f + f2xm1 + ret +1: call 1f + fld1 + fsubrp + ret + .global exp2f .type exp2f,@function exp2f: @@ -34,22 +68,53 @@ exp: .type exp2,@function exp2: fldl 4(%esp) -1: fxam - fnstsw %ax +1: mov $0x47000000,%eax + push %eax + flds (%esp) + shl $7,%eax + push %eax + add %eax,%eax + push %eax + fld %st(1) + fabs + fucom %st(1) + fnstsw sahf - jnp 1f - jnc 1f - fstps 4(%esp) - mov $0xfe,%al - and %al,7(%esp) - flds 4(%esp) -1: fld %st(0) - frndint + ja 2f + fstp %st(0) + fstp %st(0) + fld %st(0) + fistpl 8(%esp) + fildl 8(%esp) fxch %st(1) fsub %st(1) + mov $0x3fff,%eax + add %eax,8(%esp) f2xm1 fld1 faddp - fscale + fldt (%esp) + fmulp fstp %st(1) + add $12,%esp + ret + +2: fstp %st(0) + fstp %st(0) + fsts 8(%esp) + mov 8(%esp),%eax + lea (%eax,%eax),%ecx + cmp $0xff000000,%ecx + ja 2f + fstp %st(0) + xor %ecx,%ecx + inc %ecx + add %eax,%eax + jc 1f + mov $0x7ffe,%ecx +1: mov %ecx,8(%esp) + fldt (%esp) + fld %st(0) + fmulp +2: add $12,%esp ret diff --git a/src/math/i386/expm1.s b/src/math/i386/expm1.s index bbb5d12e..f335a3e5 100644 --- a/src/math/i386/expm1.s +++ b/src/math/i386/expm1.s @@ -1,47 +1 @@ -.global expm1f -.type expm1f,@function -expm1f: - flds 4(%esp) - jmp 1f - -.global expm1l -.type expm1l,@function -expm1l: - fldt 4(%esp) - jmp 1f - -.global expm1 -.type expm1,@function -expm1: - fldl 4(%esp) -1: fxam - fnstsw %ax - sahf - jnp 1f - jnc 1f - fstps 4(%esp) - mov $0xfe,%al - and %al,7(%esp) - flds 4(%esp) -1: fldl2e - fmulp - fld %st(0) - frndint - fldz - fcomp - fnstsw %ax - sahf - jnz 1f - fstp %st(0) - f2xm1 - ret -1: fxch %st(1) - fsub %st(1) - f2xm1 - fld1 - faddp - fscale - fld1 - fsubrp - fstp %st(1) - ret +# see exp.s