From 31c5fb80b9eae86f801be4f46025bc6532a554c5 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Thu, 15 Aug 2013 10:56:57 +0000 Subject: [PATCH] math: fix x86 asin, atan, exp, log1p to raise underflow underflow is raised by an inexact subnormal float store, since subnormal operations are slow, check the underflow flag and skip the store if it's already raised --- src/math/i386/asin.s | 23 ++++++++++++++++++++++- src/math/i386/atan.s | 10 ++++++++++ src/math/i386/atanf.s | 12 ++++++++++++ src/math/i386/exp.s | 37 +++++++++++++++++++++++++++++++++++-- src/math/i386/log1p.s | 9 +++++++++ src/math/i386/log1pf.s | 10 ++++++++++ 6 files changed, 98 insertions(+), 3 deletions(-) diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s index 932c7542..a9f691bf 100644 --- a/src/math/i386/asin.s +++ b/src/math/i386/asin.s @@ -2,7 +2,18 @@ .type asinf,@function asinf: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret .global asinl .type asinl,@function @@ -14,6 +25,16 @@ asinl: .type asin,@function asin: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret 1: fld %st(0) fld1 fsub %st(0),%st(1) diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s index 7e28b395..d73137b2 100644 --- a/src/math/i386/atan.s +++ b/src/math/i386/atan.s @@ -2,6 +2,16 @@ .type atan,@function atan: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jb 1f fld1 fpatan ret + # subnormal x, return x with underflow +1: fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s index 3cd40233..8caddefa 100644 --- a/src/math/i386/atanf.s +++ b/src/math/i386/atanf.s @@ -2,6 +2,18 @@ .type atanf,@function atanf: flds 4(%esp) + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jb 1f fld1 fpatan ret + # subnormal x, return x with underflow +1: fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index e3b42af5..e5f54588 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -2,7 +2,18 @@ .type expm1f,@function expm1f: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret .global expm1l .type expm1l,@function @@ -14,10 +25,32 @@ expm1l: .type expm1,@function expm1: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret 1: fldl2e fmulp + mov $0xc2820000,%eax + push %eax + flds (%esp) + pop %eax + fucomp %st(1) + fnstsw %ax + sahf fld1 - fld %st(1) + jb 1f + # x*log2e < -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) fabs fucom %st(1) fnstsw %ax diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s index 9971e53c..6b6929c7 100644 --- a/src/math/i386/log1p.s +++ b/src/math/i386/log1p.s @@ -7,9 +7,18 @@ log1p: fldl 4(%esp) cmp $0x3fd28f00,%eax ja 1f + cmp $0x00100000,%eax + jb 2f fyl2xp1 ret 1: fld1 faddp fyl2x ret + # subnormal x, return x with underflow +2: fnstsw %ax + and $16,%ax + jnz 1f + fsts 4(%esp) + fstp %st(1) +1: ret diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s index 2680a8a6..c0bcd30f 100644 --- a/src/math/i386/log1pf.s +++ b/src/math/i386/log1pf.s @@ -7,9 +7,19 @@ log1pf: flds 4(%esp) cmp $0x3e940000,%eax ja 1f + cmp $0x00800000,%eax + jb 2f fyl2xp1 ret 1: fld1 faddp fyl2x ret + # subnormal x, return x with underflow +2: fnstsw %ax + and $16,%ax + jnz 1f + fxch + fmul %st(1) + fstps 4(%esp) +1: ret -- 2.40.0