This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[AArch64][PATCH 3/3] Add floating-point FP16 instructions
- From: Matthew Wahab <matthew dot wahab at foss dot arm dot com>
- To: binutils at sourceware dot org
- Date: Tue, 24 Nov 2015 11:55:07 +0000
- Subject: [AArch64][PATCH 3/3] Add floating-point FP16 instructions
- Authentication-results: sourceware.org; auth=none
- References: <56544EB9 dot 9000909 at foss dot arm dot com>
Hello,
ARMv8.2 adds 16-bit floating point operations as an optional extension
to the ARMv8 FP support. This patch adds the new FP16 instructions,
making them available when the architecture extension +fp+fp16 is
specified.
The instructions added are:
- Comparisons and conditionals: FCMP, FCCMPE, FCMP, FCMPE and FCSEL.
- Arithmetic: FABS, FNEG, FSQRT, FMUL, FDIV, FADD, FSUB, FMADD, FMSUB,
FNMADD and FNMSUB.
- Rounding: FRINTN, FRINTP, FRINTM, FRINTZ, FRINTA, FRINTX and FRINTI.
- Conversions: SCVTF (fixed-point), SCVTF (integer), UCVTF (fixed-point)
UCVTF (integer), FCVTZS (fixed-point), FCVTZS (integer), FCVTZU
(fixed-point), FCVTZU (integer), FCVTNS, FCVTNU, FCVTAS, FCVTAU,
FCVTPS, FCVTPU, FCVTMS and FCVTMU.
- Scalar FMOV: immediate, general and register
Tested the series for aarch64-none-linux gnu with cross-compiled
check-binutils and check-gas.
Ok for trunk?
Matthew
gas/testsuite/
2015-11-24 Matthew Wahab <matthew.wahab@arm.com>
* gas/aarch64/float-fp16.d: New.
* gas/aarch64/float-fp16.s: New.
opcodes/
2015-11-24 Matthew Wahab <matthew.wahab@arm.com>
* aarch64-asm-2.c: Regenerate.
* aarch64-dis-2.c: Regenerate.
* aarch64-opc-2.c: Regenerate.
* aarch64-tbl.h (QL_FIX2FP_H, QL_FP2FIX_H): New.
(QL_INT2FP_H, QL_FP2INT_H): New.
(QL_FP2_H, QL_FP3_H, QL_FP4_H): New
(QL_DST_H): New.
(QL_FCCMP_H): New.
(aarch64_opcode_table): Add 16-bit variants of scvt, ucvtf,
fcvtzs, fcvtzu, fcvtns, fcvtnu, scvtf, ucvtf, fcvtas, fcvtau,
fmov, fcvtpos, fcvtpu, fcvtms, fcvtmu, fcvtzs, fcvtzu, fccmp,
fccmpe, fcmp, fcmpe, fabs, fneg, fsqrt, frintn, frintp, frintm,
frintz, frinta, frintx, frinti, fmul, fdiv, fadd, fsub, fmax,
fmin, fmaxnm, fminnm, fnmul, fmadd, fmsub, fnmadd, fnmsub and
fcsel.
>From 3ab46e54742b143ddc98e1d2da2235cda16b06bc Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Thu, 24 Sep 2015 18:41:28 +0100
Subject: [PATCH 3/3] [AArch64][PATCH 3/3] Add floating-point FP16 instructions
Change-Id: I2c052266aee3dbf479e057055f799d1a6f44b49f
---
gas/testsuite/gas/aarch64/float-fp16.d | 172 ++++++
gas/testsuite/gas/aarch64/float-fp16.s | 151 +++++
opcodes/aarch64-asm-2.c | 422 +++++++-------
opcodes/aarch64-dis-2.c | 974 ++++++++++++++++++---------------
opcodes/aarch64-opc-2.c | 68 +--
opcodes/aarch64-tbl.h | 164 ++++++
6 files changed, 1269 insertions(+), 682 deletions(-)
create mode 100644 gas/testsuite/gas/aarch64/float-fp16.d
create mode 100644 gas/testsuite/gas/aarch64/float-fp16.s
diff --git a/gas/testsuite/gas/aarch64/float-fp16.d b/gas/testsuite/gas/aarch64/float-fp16.d
new file mode 100644
index 0000000..dc87981
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/float-fp16.d
@@ -0,0 +1,172 @@
+#as: -march=armv8.2-a+fp16
+#objdump: -dr
+
+.*: file format .*
+
+Disassembly of section \.text:
+
+0000000000000000 <.*>:
+ [0-9a-f]+: 1e200400 fccmp s0, s0, #0x0, eq
+ [0-9a-f]+: 1ee00400 fccmp h0, h0, #0x0, eq
+ [0-9a-f]+: 1e22d420 fccmp s1, s2, #0x0, le
+ [0-9a-f]+: 1ee2d420 fccmp h1, h2, #0x0, le
+ [0-9a-f]+: 1e200410 fccmpe s0, s0, #0x0, eq
+ [0-9a-f]+: 1ee00410 fccmpe h0, h0, #0x0, eq
+ [0-9a-f]+: 1e22d430 fccmpe s1, s2, #0x0, le
+ [0-9a-f]+: 1ee2d430 fccmpe h1, h2, #0x0, le
+ [0-9a-f]+: 1e202000 fcmp s0, s0
+ [0-9a-f]+: 1ee02000 fcmp h0, h0
+ [0-9a-f]+: 1e222020 fcmp s1, s2
+ [0-9a-f]+: 1ee22020 fcmp h1, h2
+ [0-9a-f]+: 1e202010 fcmpe s0, s0
+ [0-9a-f]+: 1ee02010 fcmpe h0, h0
+ [0-9a-f]+: 1e222030 fcmpe s1, s2
+ [0-9a-f]+: 1ee22030 fcmpe h1, h2
+ [0-9a-f]+: 1e202008 fcmp s0, #0\.0
+ [0-9a-f]+: 1ee02008 fcmp h0, #0\.0
+ [0-9a-f]+: 1e202018 fcmpe s0, #0\.0
+ [0-9a-f]+: 1ee02018 fcmpe h0, #0\.0
+ [0-9a-f]+: 1e210c00 fcsel s0, s0, s1, eq
+ [0-9a-f]+: 1ee10c00 fcsel h0, h0, h1, eq
+ [0-9a-f]+: 9ee60000 fmov x0, h0
+ [0-9a-f]+: 1ee60000 fmov w0, h0
+ [0-9a-f]+: 9ee70001 fmov h1, x0
+ [0-9a-f]+: 1ee70001 fmov h1, w0
+ [0-9a-f]+: 1ee0c020 fabs h0, h1
+ [0-9a-f]+: 1e20c020 fabs s0, s1
+ [0-9a-f]+: 1e60c020 fabs d0, d1
+ [0-9a-f]+: 1ee14020 fneg h0, h1
+ [0-9a-f]+: 1e214020 fneg s0, s1
+ [0-9a-f]+: 1e614020 fneg d0, d1
+ [0-9a-f]+: 1ee1c020 fsqrt h0, h1
+ [0-9a-f]+: 1e21c020 fsqrt s0, s1
+ [0-9a-f]+: 1e61c020 fsqrt d0, d1
+ [0-9a-f]+: 1ee44020 frintn h0, h1
+ [0-9a-f]+: 1e244020 frintn s0, s1
+ [0-9a-f]+: 1e644020 frintn d0, d1
+ [0-9a-f]+: 1ee4c020 frintp h0, h1
+ [0-9a-f]+: 1e24c020 frintp s0, s1
+ [0-9a-f]+: 1e64c020 frintp d0, d1
+ [0-9a-f]+: 1ee54020 frintm h0, h1
+ [0-9a-f]+: 1e254020 frintm s0, s1
+ [0-9a-f]+: 1e654020 frintm d0, d1
+ [0-9a-f]+: 1ee5c020 frintz h0, h1
+ [0-9a-f]+: 1e25c020 frintz s0, s1
+ [0-9a-f]+: 1e65c020 frintz d0, d1
+ [0-9a-f]+: 1ee64020 frinta h0, h1
+ [0-9a-f]+: 1e264020 frinta s0, s1
+ [0-9a-f]+: 1e664020 frinta d0, d1
+ [0-9a-f]+: 1ee74020 frintx h0, h1
+ [0-9a-f]+: 1e274020 frintx s0, s1
+ [0-9a-f]+: 1e674020 frintx d0, d1
+ [0-9a-f]+: 1ee7c020 frinti h0, h1
+ [0-9a-f]+: 1e27c020 frinti s0, s1
+ [0-9a-f]+: 1e67c020 frinti d0, d1
+ [0-9a-f]+: 1ee20820 fmul h0, h1, h2
+ [0-9a-f]+: 1e220820 fmul s0, s1, s2
+ [0-9a-f]+: 1e620820 fmul d0, d1, d2
+ [0-9a-f]+: 1ee21820 fdiv h0, h1, h2
+ [0-9a-f]+: 1e221820 fdiv s0, s1, s2
+ [0-9a-f]+: 1e621820 fdiv d0, d1, d2
+ [0-9a-f]+: 1ee22820 fadd h0, h1, h2
+ [0-9a-f]+: 1e222820 fadd s0, s1, s2
+ [0-9a-f]+: 1e622820 fadd d0, d1, d2
+ [0-9a-f]+: 1ee23820 fsub h0, h1, h2
+ [0-9a-f]+: 1e223820 fsub s0, s1, s2
+ [0-9a-f]+: 1e623820 fsub d0, d1, d2
+ [0-9a-f]+: 1ee24820 fmax h0, h1, h2
+ [0-9a-f]+: 1e224820 fmax s0, s1, s2
+ [0-9a-f]+: 1e624820 fmax d0, d1, d2
+ [0-9a-f]+: 1ee25820 fmin h0, h1, h2
+ [0-9a-f]+: 1e225820 fmin s0, s1, s2
+ [0-9a-f]+: 1e625820 fmin d0, d1, d2
+ [0-9a-f]+: 1ee26820 fmaxnm h0, h1, h2
+ [0-9a-f]+: 1e226820 fmaxnm s0, s1, s2
+ [0-9a-f]+: 1e626820 fmaxnm d0, d1, d2
+ [0-9a-f]+: 1ee27820 fminnm h0, h1, h2
+ [0-9a-f]+: 1e227820 fminnm s0, s1, s2
+ [0-9a-f]+: 1e627820 fminnm d0, d1, d2
+ [0-9a-f]+: 1ee28820 fnmul h0, h1, h2
+ [0-9a-f]+: 1e228820 fnmul s0, s1, s2
+ [0-9a-f]+: 1e628820 fnmul d0, d1, d2
+ [0-9a-f]+: 1fc20c20 fmadd h0, h1, h2, h3
+ [0-9a-f]+: 1f020c20 fmadd s0, s1, s2, s3
+ [0-9a-f]+: 1f420c20 fmadd d0, d1, d2, d3
+ [0-9a-f]+: 1fc28c20 fmsub h0, h1, h2, h3
+ [0-9a-f]+: 1f028c20 fmsub s0, s1, s2, s3
+ [0-9a-f]+: 1f428c20 fmsub d0, d1, d2, d3
+ [0-9a-f]+: 1fe20c20 fnmadd h0, h1, h2, h3
+ [0-9a-f]+: 1f220c20 fnmadd s0, s1, s2, s3
+ [0-9a-f]+: 1f620c20 fnmadd d0, d1, d2, d3
+ [0-9a-f]+: 1fe28c20 fnmsub h0, h1, h2, h3
+ [0-9a-f]+: 1f228c20 fnmsub s0, s1, s2, s3
+ [0-9a-f]+: 1f628c20 fnmsub d0, d1, d2, d3
+ [0-9a-f]+: 1e2e1000 fmov s0, #1\.000000000000000000e\+00
+ [0-9a-f]+: 1eee1000 fmov h0, #1\.000000000000000000e\+00
+ [0-9a-f]+: 1e02f820 scvtf s0, w1, #2
+ [0-9a-f]+: 9e02f420 scvtf s0, x1, #3
+ [0-9a-f]+: 1ec2f820 scvtf h0, w1, #2
+ [0-9a-f]+: 9ec2f420 scvtf h0, x1, #3
+ [0-9a-f]+: 1e03f820 ucvtf s0, w1, #2
+ [0-9a-f]+: 9e03f420 ucvtf s0, x1, #3
+ [0-9a-f]+: 1ec3f820 ucvtf h0, w1, #2
+ [0-9a-f]+: 9ec3f420 ucvtf h0, x1, #3
+ [0-9a-f]+: 1e58f801 fcvtzs w1, d0, #2
+ [0-9a-f]+: 9e58f401 fcvtzs x1, d0, #3
+ [0-9a-f]+: 1ed8f801 fcvtzs w1, h0, #2
+ [0-9a-f]+: 9ed8f401 fcvtzs x1, h0, #3
+ [0-9a-f]+: 1e59f801 fcvtzu w1, d0, #2
+ [0-9a-f]+: 9e59f401 fcvtzu x1, d0, #3
+ [0-9a-f]+: 1ed9f801 fcvtzu w1, h0, #2
+ [0-9a-f]+: 9ed9f401 fcvtzu x1, h0, #3
+ [0-9a-f]+: 1e200001 fcvtns w1, s0
+ [0-9a-f]+: 9e600001 fcvtns x1, d0
+ [0-9a-f]+: 1ee00001 fcvtns w1, h0
+ [0-9a-f]+: 9ee00001 fcvtns x1, h0
+ [0-9a-f]+: 1e210001 fcvtnu w1, s0
+ [0-9a-f]+: 9e610001 fcvtnu x1, d0
+ [0-9a-f]+: 1ee10001 fcvtnu w1, h0
+ [0-9a-f]+: 9ee10001 fcvtnu x1, h0
+ [0-9a-f]+: 1e250001 fcvtau w1, s0
+ [0-9a-f]+: 9e650001 fcvtau x1, d0
+ [0-9a-f]+: 1ee50001 fcvtau w1, h0
+ [0-9a-f]+: 9ee50001 fcvtau x1, h0
+ [0-9a-f]+: 1e240001 fcvtas w1, s0
+ [0-9a-f]+: 9e640001 fcvtas x1, d0
+ [0-9a-f]+: 1ee40001 fcvtas w1, h0
+ [0-9a-f]+: 9ee40001 fcvtas x1, h0
+ [0-9a-f]+: 1e280001 fcvtps w1, s0
+ [0-9a-f]+: 9e680001 fcvtps x1, d0
+ [0-9a-f]+: 1ee80001 fcvtps w1, h0
+ [0-9a-f]+: 9ee80001 fcvtps x1, h0
+ [0-9a-f]+: 1e290001 fcvtpu w1, s0
+ [0-9a-f]+: 9e690001 fcvtpu x1, d0
+ [0-9a-f]+: 1ee90001 fcvtpu w1, h0
+ [0-9a-f]+: 9ee90001 fcvtpu x1, h0
+ [0-9a-f]+: 1e300001 fcvtms w1, s0
+ [0-9a-f]+: 9e700001 fcvtms x1, d0
+ [0-9a-f]+: 1ef00001 fcvtms w1, h0
+ [0-9a-f]+: 9ef00001 fcvtms x1, h0
+ [0-9a-f]+: 1e310001 fcvtmu w1, s0
+ [0-9a-f]+: 9e710001 fcvtmu x1, d0
+ [0-9a-f]+: 1ef10001 fcvtmu w1, h0
+ [0-9a-f]+: 9ef10001 fcvtmu x1, h0
+ [0-9a-f]+: 1e220020 scvtf s0, w1
+ [0-9a-f]+: 9e620020 scvtf d0, x1
+ [0-9a-f]+: 1ee20020 scvtf h0, w1
+ [0-9a-f]+: 9ee20020 scvtf h0, x1
+ [0-9a-f]+: 1e230020 ucvtf s0, w1
+ [0-9a-f]+: 9e630020 ucvtf d0, x1
+ [0-9a-f]+: 1ee30020 ucvtf h0, w1
+ [0-9a-f]+: 9ee30020 ucvtf h0, x1
+ [0-9a-f]+: 1e604020 fmov d0, d1
+ [0-9a-f]+: 1e204020 fmov s0, s1
+ [0-9a-f]+: 1ee04020 fmov h0, h1
+ [0-9a-f]+: 9ee60020 fmov x0, h1
+ [0-9a-f]+: 1ee60020 fmov w0, h1
+ [0-9a-f]+: 9ee70001 fmov h1, x0
+ [0-9a-f]+: 1ee70001 fmov h1, w0
+ [0-9a-f]+: 1e260020 fmov w0, s1
+ [0-9a-f]+: 9e660020 fmov x0, d1
+ [0-9a-f]+: 1e270001 fmov s1, w0
+ [0-9a-f]+: 9e670001 fmov d1, x0
diff --git a/gas/testsuite/gas/aarch64/float-fp16.s b/gas/testsuite/gas/aarch64/float-fp16.s
new file mode 100644
index 0000000..0c30038
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/float-fp16.s
@@ -0,0 +1,151 @@
+/* Test file for AArch64 half-precision floating-point instructions. */
+
+ .text
+ fccmp s0, s0, #0, eq
+ fccmp h0, h0, #0, eq
+ fccmp s1, s2, #0, le
+ fccmp h1, h2, #0, le
+
+ fccmpe s0, s0, #0, eq
+ fccmpe h0, h0, #0, eq
+ fccmpe s1, s2, #0, le
+ fccmpe h1, h2, #0, le
+
+ fcmp s0, s0
+ fcmp h0, h0
+ fcmp s1, s2
+ fcmp h1, h2
+
+ fcmpe s0, s0
+ fcmpe h0, h0
+ fcmpe s1, s2
+ fcmpe h1, h2
+
+ fcmp s0, #0.0
+ fcmp h0, #0.0
+
+ fcmpe s0, #0.0
+ fcmpe h0, #0.0
+
+ fcsel s0, s0, s1, eq
+ fcsel h0, h0, h1, eq
+
+ fmov x0, h0
+ fmov w0, h0
+ fmov h1, x0
+ fmov h1, w0
+
+ /* Scalar data-processing with one source. */
+ .macro sdp1src op
+ \op h0, h1
+ \op s0, s1
+ \op d0, d1
+ .endm
+
+ .text
+ .irp op, fabs, fneg, fsqrt, frintn, frintp, frintm, frintz
+ sdp1src \op
+ .endr
+
+ .irp op, frinta, frintx, frinti
+ sdp1src \op
+ .endr
+
+ /* Scalar data-processing with two sources. */
+ .macro sdp2src op
+ \op h0, h1, h2
+ \op s0, s1, s2
+ \op d0, d1, d2
+ .endm
+
+ .text
+ .irp op, fmul, fdiv, fadd, fsub, fmax, fmin, fmaxnm, fminnm, fnmul
+ sdp2src \op
+ .endr
+
+ /* Scalar data-processing with three sources. */
+ .macro sdp3src op
+ \op h0, h1, h2, h3
+ \op s0, s1, s2, s3
+ \op d0, d1, d2, d3
+ .endm
+
+ .text
+ .irp op, fmadd, fmsub, fnmadd, fnmsub
+ sdp3src \op
+ .endr
+
+ /* Scalar conversion. */
+
+ .macro scvt_fix2fp op
+ \op s0, w1, #2
+ \op s0, x1, #3
+ \op h0, w1, #2
+ \op h0, x1, #3
+ .endm
+
+ .macro scvt_fp2fix op
+ \op w1, d0, #2
+ \op x1, d0, #3
+ \op w1, h0, #2
+ \op x1, h0, #3
+ .endm
+
+ .text
+
+ fmov s0, #1.0
+ fmov h0, #1.0
+
+ .irp op, scvtf, ucvtf
+ scvt_fix2fp \op
+ .endr
+
+ .irp op, fcvtzs, fcvtzu
+ scvt_fp2fix \op
+ .endr
+
+ .macro scvt_fp2int op
+ \op w1, s0
+ \op x1, d0
+ \op w1, h0
+ \op x1, h0
+ .endm
+
+ .macro scvt_int2fp op
+ \op s0, w1
+ \op d0, x1
+ \op h0, w1
+ \op h0, x1
+ .endm
+
+ .text
+ .irp op, fcvtns, fcvtnu, fcvtau, fcvtas
+ scvt_fp2int \op
+ .endr
+
+ .text
+ .irp op, fcvtps, fcvtpu, fcvtms, fcvtmu
+ scvt_fp2int \op
+ .endr
+
+ .irp op, scvtf, ucvtf
+ scvt_int2fp \op
+ .endr
+
+ /* FMOV. */
+
+ fmov d0, d1
+ fmov s0, s1
+ fmov h0, h1
+
+ fmov x0, h1
+ fmov w0, h1
+
+ fmov h1, x0
+ fmov h1, w0
+
+ fmov w0, s1
+ fmov x0, d1
+
+ fmov s1, w0
+ fmov d1, x0
diff --git a/opcodes/aarch64-asm-2.c b/opcodes/aarch64-asm-2.c
index 51a8860..64bc136 100644
diff --git a/opcodes/aarch64-dis-2.c b/opcodes/aarch64-dis-2.c
index 57630d5..9dfa7e4 100644
diff --git a/opcodes/aarch64-opc-2.c b/opcodes/aarch64-opc-2.c
index ca3f35b..968e99c 100644
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index 6b77b36..50bbc2d 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -259,6 +259,13 @@
QLF3(S_S,X,imm_1_64), \
}
+/* e.g. SCVTF <Hd>, <Xn>, #<fbits>. */
+#define QL_FIX2FP_H \
+{ \
+ QLF3 (S_H, W, imm_1_32), \
+ QLF3 (S_H, X, imm_1_64), \
+}
+
/* e.g. FCVTZS <Wd>, <Dn>, #<fbits>. */
#define QL_FP2FIX \
{ \
@@ -268,6 +275,13 @@
QLF3(X,S_S,imm_1_64), \
}
+/* e.g. FCVTZS <Wd>, <Hn>, #<fbits>. */
+#define QL_FP2FIX_H \
+{ \
+ QLF3 (W, S_H, imm_1_32), \
+ QLF3 (X, S_H, imm_1_64), \
+}
+
/* e.g. SCVTF <Dd>, <Wn>. */
#define QL_INT2FP \
{ \
@@ -277,6 +291,13 @@
QLF2(S_S,X), \
}
+/* e.g. SCVTF <Hd>, <Wn>. */
+#define QL_INT2FP_H \
+{ \
+ QLF2 (S_H, W), \
+ QLF2 (S_H, X), \
+}
+
/* e.g. FCVTNS <Xd>, <Dn>. */
#define QL_FP2INT \
{ \
@@ -286,6 +307,13 @@
QLF2(X,S_S), \
}
+/* e.g. FCVTNS <Hd>, <Wn>. */
+#define QL_FP2INT_H \
+{ \
+ QLF2 (W, S_H), \
+ QLF2 (X, S_H), \
+}
+
/* e.g. FMOV <Xd>, <Vn>.D[1]. */
#define QL_XVD1 \
{ \
@@ -504,6 +532,12 @@
QLF2(S_D, S_D), \
}
+/* FMOV <Hd>, <Hn>. */
+#define QL_FP2_H \
+{ \
+ QLF2 (S_H, S_H), \
+}
+
/* e.g. SQADD <V><d>, <V><n>, <V><m>. */
#define QL_S_3SAME \
{ \
@@ -540,6 +574,12 @@
QLF3(S_D, S_D, S_D), \
}
+/* FMUL <Hd>, <Hn>, <Hm>. */
+#define QL_FP3_H \
+{ \
+ QLF3 (S_H, S_H, S_H), \
+}
+
/* FMADD <Dd>, <Dn>, <Dm>, <Da>. */
#define QL_FP4 \
{ \
@@ -547,6 +587,12 @@
QLF4(S_D, S_D, S_D, S_D), \
}
+/* FMADD <Hd>, <Hn>, <Hm>, <Ha>. */
+#define QL_FP4_H \
+{ \
+ QLF4 (S_H, S_H, S_H, S_H), \
+}
+
/* e.g. FCMP <Dn>, #0.0. */
#define QL_DST_SD \
{ \
@@ -554,6 +600,12 @@
QLF2(S_D, NIL), \
}
+/* e.g. FCMP <Hn>, #0.0. */
+#define QL_DST_H \
+{ \
+ QLF2 (S_H, NIL), \
+}
+
/* FCSEL <Sd>, <Sn>, <Sm>, <cond>. */
#define QL_FP_COND \
{ \
@@ -561,6 +613,12 @@
QLF4(S_D, S_D, S_D, NIL), \
}
+/* FCSEL <Hd>, <Hn>, <Hm>, <cond>. */
+#define QL_FP_COND_H \
+{ \
+ QLF4 (S_H, S_H, S_H, NIL), \
+}
+
/* e.g. CCMN <Xn>, <Xm>, #<nzcv>, <cond>. */
#define QL_CCMP \
{ \
@@ -582,6 +640,12 @@
QLF4(S_D, S_D, NIL, NIL), \
}
+/* e.g. FCCMP <Sn>, <Sm>, #<nzcv>, <cond>. */
+#define QL_FCCMP_H \
+{ \
+ QLF4 (S_H, S_H, NIL, NIL), \
+}
+
/* e.g. DUP <Vd>.<T>, <Vn>.<Ts>[<index>]. */
#define QL_DUP_VX \
{ \
@@ -1907,66 +1971,166 @@ struct aarch64_opcode aarch64_opcode_table[] =
{"ror", 0x13800000, 0x7fa00000, extract, OP_ROR_IMM, CORE, OP3 (Rd, Rm, IMMS), QL_SHIFT, F_ALIAS | F_CONV},
/* Floating-point<->fixed-point conversions. */
{"scvtf", 0x1e020000, 0x7f3f0000, float2fix, 0, FP, OP3 (Fd, Rn, FBITS), QL_FIX2FP, F_FPTYPE | F_SF},
+ {"scvtf", 0x1ec20000, 0x7f3f0000, float2fix, 0, FP_F16,
+ OP3 (Fd, Rn, FBITS), QL_FIX2FP_H, F_FPTYPE | F_SF},
{"ucvtf", 0x1e030000, 0x7f3f0000, float2fix, 0, FP, OP3 (Fd, Rn, FBITS), QL_FIX2FP, F_FPTYPE | F_SF},
+ {"ucvtf", 0x1ec30000, 0x7f3f0000, float2fix, 0, FP_F16,
+ OP3 (Fd, Rn, FBITS), QL_FIX2FP_H, F_FPTYPE | F_SF},
{"fcvtzs", 0x1e180000, 0x7f3f0000, float2fix, 0, FP, OP3 (Rd, Fn, FBITS), QL_FP2FIX, F_FPTYPE | F_SF},
+ {"fcvtzs", 0x1ed80000, 0x7f3f0000, float2fix, 0, FP_F16,
+ OP3 (Rd, Fn, FBITS), QL_FP2FIX_H, F_FPTYPE | F_SF},
{"fcvtzu", 0x1e190000, 0x7f3f0000, float2fix, 0, FP, OP3 (Rd, Fn, FBITS), QL_FP2FIX, F_FPTYPE | F_SF},
+ {"fcvtzu", 0x1ed90000, 0x7f3f0000, float2fix, 0, FP_F16,
+ OP3 (Rd, Fn, FBITS), QL_FP2FIX_H, F_FPTYPE | F_SF},
/* Floating-point<->integer conversions. */
{"fcvtns", 0x1e200000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtns", 0x1ee00000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fcvtnu", 0x1e210000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtnu", 0x1ee10000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"scvtf", 0x1e220000, 0x7f3ffc00, float2int, 0, FP, OP2 (Fd, Rn), QL_INT2FP, F_FPTYPE | F_SF},
+ {"scvtf", 0x1ee20000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Fd, Rn), QL_INT2FP_H, F_FPTYPE | F_SF},
{"ucvtf", 0x1e230000, 0x7f3ffc00, float2int, 0, FP, OP2 (Fd, Rn), QL_INT2FP, F_FPTYPE | F_SF},
+ {"ucvtf", 0x1ee30000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Fd, Rn), QL_INT2FP_H, F_FPTYPE | F_SF},
{"fcvtas", 0x1e240000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtas", 0x1ee40000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fcvtau", 0x1e250000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtau", 0x1ee50000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fmov", 0x1e260000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fmov", 0x1ee60000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fmov", 0x1e270000, 0x7f3ffc00, float2int, 0, FP, OP2 (Fd, Rn), QL_INT2FP, F_FPTYPE | F_SF},
+ {"fmov", 0x1ee70000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Fd, Rn), QL_INT2FP_H, F_FPTYPE | F_SF},
{"fcvtps", 0x1e280000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtps", 0x1ee80000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fcvtpu", 0x1e290000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtpu", 0x1ee90000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fcvtms", 0x1e300000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtms", 0x1ef00000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fcvtmu", 0x1e310000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtmu", 0x1ef10000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fcvtzs", 0x1e380000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtzs", 0x1ef80000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fcvtzu", 0x1e390000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+ {"fcvtzu", 0x1ef90000, 0x7f3ffc00, float2int, 0, FP_F16,
+ OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
{"fmov", 0x9eae0000, 0xfffffc00, float2int, 0, FP, OP2 (Rd, VnD1), QL_XVD1, 0},
{"fmov", 0x9eaf0000, 0xfffffc00, float2int, 0, FP, OP2 (VdD1, Rn), QL_VD1X, 0},
/* Floating-point conditional compare. */
{"fccmp", 0x1e200400, 0xff200c10, floatccmp, 0, FP, OP4 (Fn, Fm, NZCV, COND), QL_FCCMP, F_FPTYPE},
+ {"fccmp", 0x1ee00400, 0xff200c10, floatccmp, 0, FP_F16,
+ OP4 (Fn, Fm, NZCV, COND), QL_FCCMP_H, F_FPTYPE},
{"fccmpe", 0x1e200410, 0xff200c10, floatccmp, 0, FP, OP4 (Fn, Fm, NZCV, COND), QL_FCCMP, F_FPTYPE},
+ {"fccmpe", 0x1ee00410, 0xff200c10, floatccmp, 0, FP_F16,
+ OP4 (Fn, Fm, NZCV, COND), QL_FCCMP_H, F_FPTYPE},
/* Floating-point compare. */
{"fcmp", 0x1e202000, 0xff20fc1f, floatcmp, 0, FP, OP2 (Fn, Fm), QL_FP2, F_FPTYPE},
+ {"fcmp", 0x1ee02000, 0xff20fc1f, floatcmp, 0, FP_F16,
+ OP2 (Fn, Fm), QL_FP2_H, F_FPTYPE},
{"fcmpe", 0x1e202010, 0xff20fc1f, floatcmp, 0, FP, OP2 (Fn, Fm), QL_FP2, F_FPTYPE},
+ {"fcmpe", 0x1ee02010, 0xff20fc1f, floatcmp, 0, FP_F16,
+ OP2 (Fn, Fm), QL_FP2_H, F_FPTYPE},
{"fcmp", 0x1e202008, 0xff20fc1f, floatcmp, 0, FP, OP2 (Fn, FPIMM0), QL_DST_SD, F_FPTYPE},
+ {"fcmp", 0x1ee02008, 0xff20fc1f, floatcmp, 0, FP_F16,
+ OP2 (Fn, FPIMM0), QL_FP2_H, F_FPTYPE},
{"fcmpe", 0x1e202018, 0xff20fc1f, floatcmp, 0, FP, OP2 (Fn, FPIMM0), QL_DST_SD, F_FPTYPE},
+ {"fcmpe", 0x1ee02018, 0xff20fc1f, floatcmp, 0, FP_F16,
+ OP2 (Fn, FPIMM0), QL_FP2_H, F_FPTYPE},
/* Floating-point data-processing (1 source). */
{"fmov", 0x1e204000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"fmov", 0x1ee04000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"fabs", 0x1e20c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"fabs", 0x1ee0c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"fneg", 0x1e214000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"fneg", 0x1ee14000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"fsqrt", 0x1e21c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"fsqrt", 0x1ee1c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"fcvt", 0x1e224000, 0xff3e7c00, floatdp1, OP_FCVT, FP, OP2 (Fd, Fn), QL_FCVT, F_FPTYPE | F_MISC},
{"frintn", 0x1e244000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"frintn", 0x1ee44000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"frintp", 0x1e24c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"frintp", 0x1ee4c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"frintm", 0x1e254000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"frintm", 0x1ee54000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"frintz", 0x1e25c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"frintz", 0x1ee5c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"frinta", 0x1e264000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"frinta", 0x1ee64000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"frintx", 0x1e274000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"frintx", 0x1ee74000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
{"frinti", 0x1e27c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+ {"frinti", 0x1ee7c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+ OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
/* Floating-point data-processing (2 source). */
{"fmul", 0x1e200800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fmul", 0x1ee00800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
{"fdiv", 0x1e201800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fdiv", 0x1ee01800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
{"fadd", 0x1e202800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fadd", 0x1ee02800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
{"fsub", 0x1e203800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fsub", 0x1ee03800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
{"fmax", 0x1e204800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fmax", 0x1ee04800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
{"fmin", 0x1e205800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fmin", 0x1ee05800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
{"fmaxnm", 0x1e206800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fmaxnm", 0x1ee06800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
{"fminnm", 0x1e207800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fminnm", 0x1ee07800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
{"fnmul", 0x1e208800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+ {"fnmul", 0x1ee08800, 0xff20fc00, floatdp2, 0, FP_F16,
+ OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
/* Floating-point data-processing (3 source). */
{"fmadd", 0x1f000000, 0xff208000, floatdp3, 0, FP, OP4 (Fd, Fn, Fm, Fa), QL_FP4, F_FPTYPE},
+ {"fmadd", 0x1fc00000, 0xff208000, floatdp3, 0, FP_F16,
+ OP4 (Fd, Fn, Fm, Fa), QL_FP4_H, F_FPTYPE},
{"fmsub", 0x1f008000, 0xff208000, floatdp3, 0, FP, OP4 (Fd, Fn, Fm, Fa), QL_FP4, F_FPTYPE},
+ {"fmsub", 0x1fc08000, 0xff208000, floatdp3, 0, FP_F16,
+ OP4 (Fd, Fn, Fm, Fa), QL_FP4_H, F_FPTYPE},
{"fnmadd", 0x1f200000, 0xff208000, floatdp3, 0, FP, OP4 (Fd, Fn, Fm, Fa), QL_FP4, F_FPTYPE},
+ {"fnmadd", 0x1fe00000, 0xff208000, floatdp3, 0, FP_F16,
+ OP4 (Fd, Fn, Fm, Fa), QL_FP4_H, F_FPTYPE},
{"fnmsub", 0x1f208000, 0xff208000, floatdp3, 0, FP, OP4 (Fd, Fn, Fm, Fa), QL_FP4, F_FPTYPE},
+ {"fnmsub", 0x1fe08000, 0xff208000, floatdp3, 0, FP_F16,
+ OP4 (Fd, Fn, Fm, Fa), QL_FP4_H, F_FPTYPE},
/* Floating-point immediate. */
{"fmov", 0x1e201000, 0xff201fe0, floatimm, 0, FP, OP2 (Fd, FPIMM), QL_DST_SD, F_FPTYPE},
+ {"fmov", 0x1ee01000, 0xff201fe0, floatimm, 0, FP_F16,
+ OP2 (Fd, FPIMM), QL_DST_H, F_FPTYPE},
/* Floating-point conditional select. */
{"fcsel", 0x1e200c00, 0xff200c00, floatsel, 0, FP, OP4 (Fd, Fn, Fm, COND), QL_FP_COND, F_FPTYPE},
+ {"fcsel", 0x1ee00c00, 0xff200c00, floatsel, 0, FP_F16,
+ OP4 (Fd, Fn, Fm, COND), QL_FP_COND_H, F_FPTYPE},
/* Load/store register (immediate indexed). */
{"strb", 0x38000400, 0xffe00400, ldst_imm9, 0, CORE, OP2 (Rt, ADDR_SIMM9), QL_LDST_W8, 0},
{"ldrb", 0x38400400, 0xffe00400, ldst_imm9, 0, CORE, OP2 (Rt, ADDR_SIMM9), QL_LDST_W8, 0},
--
2.1.4