* [PATCH] MIPS: math-emu: Add madd/msub/nmadd/nmsub emulation for Loongson-3
@ 2019-02-06 12:03 Huacai Chen
0 siblings, 0 replies; only message in thread
From: Huacai Chen @ 2019-02-06 12:03 UTC (permalink / raw)
To: Paul Burton, Ralf Baechle, James Hogan
Cc: linux-mips, linux-mips, Fuxin Zhang, Zhangjin Wu, Huacai Chen,
Huacai Chen, Pei Huang
Add madd.s/madd.d/msub.s/msub.d/nmadd.s/nmadd.d/nmsub.s/nmsub.d
emulation for Loongson-3. MIPS R2 suggest these instructions be
unfused, but Loongson-3 suggest these instructions be fused, which
is similar to maddf/msubf in MIPS R6.
Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Pei Huang <huangpei@loongson.cn>
---
arch/mips/math-emu/cp1emu.c | 32 +++++++++++++++++++++++++
arch/mips/math-emu/dp_maddf.c | 53 +++++++++++++++++++++++++++--------------
arch/mips/math-emu/ieee754.h | 16 +++++++++++++
arch/mips/math-emu/ieee754int.h | 1 +
arch/mips/math-emu/sp_maddf.c | 53 +++++++++++++++++++++++++++--------------
5 files changed, 119 insertions(+), 36 deletions(-)
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index e60e290..2f99ec2 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -1463,6 +1463,7 @@ static union ieee754sp fpemu_sp_rsqrt(union ieee754sp s)
return ieee754sp_div(ieee754sp_one(0), ieee754sp_sqrt(s));
}
+#ifndef CONFIG_CPU_LOONGSON3
DEF3OP(madd, sp, ieee754sp_mul, ieee754sp_add, );
DEF3OP(msub, sp, ieee754sp_mul, ieee754sp_sub, );
DEF3OP(nmadd, sp, ieee754sp_mul, ieee754sp_add, ieee754sp_neg);
@@ -1471,6 +1472,7 @@ DEF3OP(madd, dp, ieee754dp_mul, ieee754dp_add, );
DEF3OP(msub, dp, ieee754dp_mul, ieee754dp_sub, );
DEF3OP(nmadd, dp, ieee754dp_mul, ieee754dp_add, ieee754dp_neg);
DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg);
+#endif
static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
mips_instruction ir, void __user **fault_addr)
@@ -1525,6 +1527,20 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
}
break;
+#ifdef CONFIG_CPU_LOONGSON3
+ case madd_s_op:
+ handler = ieee754sp_madd;
+ goto scoptop;
+ case msub_s_op:
+ handler = ieee754sp_msub;
+ goto scoptop;
+ case nmadd_s_op:
+ handler = ieee754sp_nmadd;
+ goto scoptop;
+ case nmsub_s_op:
+ handler = ieee754sp_nmsub;
+ goto scoptop;
+#else
case madd_s_op:
handler = fpemu_sp_madd;
goto scoptop;
@@ -1537,6 +1553,7 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
case nmsub_s_op:
handler = fpemu_sp_nmsub;
goto scoptop;
+#endif
scoptop:
SPFROMREG(fr, MIPSInst_FR(ir));
@@ -1621,6 +1638,20 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
}
break;
+#ifdef CONFIG_CPU_LOONGSON3
+ case madd_d_op:
+ handler = ieee754dp_madd;
+ goto dcoptop;
+ case msub_d_op:
+ handler = ieee754dp_msub;
+ goto dcoptop;
+ case nmadd_d_op:
+ handler = ieee754dp_nmadd;
+ goto dcoptop;
+ case nmsub_d_op:
+ handler = ieee754dp_nmsub;
+ goto dcoptop;
+#else
case madd_d_op:
handler = fpemu_dp_madd;
goto dcoptop;
@@ -1633,6 +1664,7 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
case nmsub_d_op:
handler = fpemu_dp_nmsub;
goto dcoptop;
+#endif
dcoptop:
DPFROMREG(fr, MIPSInst_FR(ir));
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 7ea2f82..a9c2db4 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -71,6 +71,12 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
ieee754_clearcx();
+ rs = xs ^ ys;
+ if (flags & MADDF_NEGATE_PRODUCT)
+ rs ^= 1;
+ if (flags & MADDF_NEGATE_ADDITION)
+ zs ^= 1;
+
/*
* Handle the cases when at least one of x, y or z is a NaN.
* Order of precedence is sNaN, qNaN and z, x, y.
@@ -107,9 +113,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if ((zc == IEEE754_CLASS_INF) &&
- ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
- ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
/*
* Cases of addition of infinities with opposite signs
* or subtraction of infinities with same signs.
@@ -119,15 +123,10 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
}
/*
* z is here either not an infinity, or an infinity having the
- * same sign as product (x*y) (in case of MADDF.D instruction)
- * or product -(x*y) (in MSUBF.D case). The result must be an
- * infinity, and its sign is determined only by the value of
- * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ * same sign as product (x*y). The result must be an infinity,
+ * and its sign is determined only by the sign of product (x*y).
*/
- if (flags & MADDF_NEGATE_PRODUCT)
- return ieee754dp_inf(1 ^ (xs ^ ys));
- else
- return ieee754dp_inf(xs ^ ys);
+ return ieee754dp_inf(rs);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -138,10 +137,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
return ieee754dp_inf(zs);
if (zc == IEEE754_CLASS_ZERO) {
/* Handle cases +0 + (-0) and similar ones. */
- if ((!(flags & MADDF_NEGATE_PRODUCT)
- && (zs == (xs ^ ys))) ||
- ((flags & MADDF_NEGATE_PRODUCT)
- && (zs != (xs ^ ys))))
+ if (zs == rs)
/*
* Cases of addition of zeros of equal signs
* or subtraction of zeroes of opposite signs.
@@ -190,9 +186,6 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
assert(ym & DP_HIDDEN_BIT);
re = xe + ye;
- rs = xs ^ ys;
- if (flags & MADDF_NEGATE_PRODUCT)
- rs ^= 1;
/* shunt to top of word */
xm <<= 64 - (DP_FBITS + 1);
@@ -343,3 +336,27 @@ union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
{
return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
+
+union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, 0);
+}
+
+union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
+}
+
+union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
+}
+
+union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
+}
diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h
index e0eb7a9..e12bb9f 100644
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h
@@ -80,6 +80,14 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
union ieee754sp y);
union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
union ieee754sp y);
+union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y);
+union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y);
+union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y);
+union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y);
int ieee754sp_2008class(union ieee754sp x);
union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y);
union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y);
@@ -115,6 +123,14 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
union ieee754dp y);
union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
union ieee754dp y);
+union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y);
+union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y);
+union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y);
+union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y);
int ieee754dp_2008class(union ieee754dp x);
union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y);
union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y);
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 06ac0e2..302815c 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -28,6 +28,7 @@
enum maddf_flags {
MADDF_NEGATE_PRODUCT = 1 << 0,
+ MADDF_NEGATE_ADDITION = 1 << 1,
};
static inline void ieee754_clearcx(void)
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index 07ba675..d334181 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -39,6 +39,12 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
ieee754_clearcx();
+ rs = xs ^ ys;
+ if (flags & MADDF_NEGATE_PRODUCT)
+ rs ^= 1;
+ if (flags & MADDF_NEGATE_ADDITION)
+ zs ^= 1;
+
/*
* Handle the cases when at least one of x, y or z is a NaN.
* Order of precedence is sNaN, qNaN and z, x, y.
@@ -76,9 +82,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if ((zc == IEEE754_CLASS_INF) &&
- ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
- ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
/*
* Cases of addition of infinities with opposite signs
* or subtraction of infinities with same signs.
@@ -88,15 +92,10 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
}
/*
* z is here either not an infinity, or an infinity having the
- * same sign as product (x*y) (in case of MADDF.D instruction)
- * or product -(x*y) (in MSUBF.D case). The result must be an
- * infinity, and its sign is determined only by the value of
- * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ * same sign as product (x*y). The result must be an infinity,
+ * and its sign is determined only by the sign of product (x*y).
*/
- if (flags & MADDF_NEGATE_PRODUCT)
- return ieee754sp_inf(1 ^ (xs ^ ys));
- else
- return ieee754sp_inf(xs ^ ys);
+ return ieee754sp_inf(rs);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -107,10 +106,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
return ieee754sp_inf(zs);
if (zc == IEEE754_CLASS_ZERO) {
/* Handle cases +0 + (-0) and similar ones. */
- if ((!(flags & MADDF_NEGATE_PRODUCT)
- && (zs == (xs ^ ys))) ||
- ((flags & MADDF_NEGATE_PRODUCT)
- && (zs != (xs ^ ys))))
+ if (zs == rs)
/*
* Cases of addition of zeros of equal signs
* or subtraction of zeroes of opposite signs.
@@ -161,9 +157,6 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
assert(ym & SP_HIDDEN_BIT);
re = xe + ye;
- rs = xs ^ ys;
- if (flags & MADDF_NEGATE_PRODUCT)
- rs ^= 1;
/* Multiple 24 bit xm and ym to give 48 bit results */
rm64 = (uint64_t)xm * ym;
@@ -263,3 +256,27 @@ union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
{
return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
+
+union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, 0);
+}
+
+union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
+}
+
+union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
+}
+
+union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
+}
--
2.7.0
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2019-02-06 12:03 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-06 12:03 [PATCH] MIPS: math-emu: Add madd/msub/nmadd/nmsub emulation for Loongson-3 Huacai Chen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).