From ad2d2b963a4bf9e2631b345c898e8715b36b459e Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 23 Mar 2012 01:52:49 -0400 Subject: [PATCH] asm for hypot and hypotf MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit special care is made to avoid any inexact computations when either arg is zero (in which case the exact absolute value of the other arg should be returned) and to support the special condition that hypot(±inf,nan) yields inf. hypotl is not yet implemented since avoiding overflow is nontrivial. --- src/math/i386/hypot.s | 45 ++++++++++++++++++++++++++++++++++++++++++ src/math/i386/hypotf.s | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 src/math/i386/hypot.s create mode 100644 src/math/i386/hypotf.s diff --git a/src/math/i386/hypot.s b/src/math/i386/hypot.s new file mode 100644 index 00000000..299c2e18 --- /dev/null +++ b/src/math/i386/hypot.s @@ -0,0 +1,45 @@ +.global hypot +.type hypot,@function +hypot: + mov 8(%esp),%eax + mov 16(%esp),%ecx + add %eax,%eax + add %ecx,%ecx + and %eax,%ecx + cmp $0xffe00000,%ecx + jae 2f + or 4(%esp),%eax + jnz 1f + fldl 12(%esp) + fabs + ret +1: mov 16(%esp),%eax + add %eax,%eax + or 12(%esp),%eax + jnz 1f + fldl 4(%esp) + fabs + ret +1: fldl 4(%esp) + fld %st(0) + fmulp + fldl 12(%esp) + fld %st(0) + fmulp + faddp + fsqrt + ret +2: sub $0xffe00000,%eax + or 4(%esp),%eax + jnz 1f + fldl 4(%esp) + fabs + ret +1: mov 16(%esp),%eax + add %eax,%eax + sub $0xffe00000,%eax + or 12(%esp),%eax + fldl 12(%esp) + jnz 1f + fabs +1: ret diff --git a/src/math/i386/hypotf.s b/src/math/i386/hypotf.s new file mode 100644 index 00000000..068935e2 --- /dev/null +++ b/src/math/i386/hypotf.s @@ -0,0 +1,42 @@ +.global hypotf +.type hypotf,@function +hypotf: + mov 4(%esp),%eax + mov 8(%esp),%ecx + add %eax,%eax + add %ecx,%ecx + and %eax,%ecx + cmp $0xff000000,%ecx + jae 2f + test %eax,%eax + jnz 1f + flds 8(%esp) + fabs + ret +1: mov 8(%esp),%eax + add %eax,%eax + jnz 1f + flds 4(%esp) + fabs + ret +1: flds 4(%esp) + fld %st(0) + fmulp + flds 8(%esp) + fld %st(0) + fmulp + faddp + fsqrt + ret +2: cmp $0xff000000,%eax + jnz 1f + flds 4(%esp) + fabs + ret +1: mov 8(%esp),%eax + add %eax,%eax + cmp $0xff000000,%eax + flds 8(%esp) + jnz 1f + fabs +1: ret -- 2.40.0