This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[AArch64][PATCH 3/3] Add floating-point FP16 instructions

From: Matthew Wahab <matthew dot wahab at foss dot arm dot com>
To: binutils at sourceware dot org
Date: Tue, 24 Nov 2015 11:55:07 +0000
Subject: [AArch64][PATCH 3/3] Add floating-point FP16 instructions
Authentication-results: sourceware.org; auth=none
References: <56544EB9 dot 9000909 at foss dot arm dot com>

Hello,

ARMv8.2 adds 16-bit floating point operations as an optional extension
to the ARMv8 FP support. This patch adds the new FP16 instructions,
making them available when the architecture extension +fp+fp16 is
specified.

The instructions added are:

- Comparisons and conditionals: FCMP, FCCMPE, FCMP, FCMPE and FCSEL.
- Arithmetic: FABS, FNEG, FSQRT, FMUL, FDIV, FADD, FSUB, FMADD, FMSUB,
  FNMADD and FNMSUB.
- Rounding: FRINTN, FRINTP, FRINTM, FRINTZ, FRINTA, FRINTX and FRINTI.
- Conversions: SCVTF (fixed-point), SCVTF (integer), UCVTF (fixed-point)
  UCVTF (integer), FCVTZS (fixed-point), FCVTZS (integer), FCVTZU
  (fixed-point), FCVTZU (integer), FCVTNS, FCVTNU, FCVTAS, FCVTAU,
  FCVTPS, FCVTPU, FCVTMS and  FCVTMU.
- Scalar FMOV: immediate, general and register

Tested the series for aarch64-none-linux gnu with cross-compiled
check-binutils and check-gas.

Ok for trunk?
Matthew

gas/testsuite/
2015-11-24  Matthew Wahab  <matthew.wahab@arm.com>

	* gas/aarch64/float-fp16.d: New.
	* gas/aarch64/float-fp16.s: New.

opcodes/
2015-11-24  Matthew Wahab  <matthew.wahab@arm.com>

	* aarch64-asm-2.c: Regenerate.
	* aarch64-dis-2.c: Regenerate.
	* aarch64-opc-2.c: Regenerate.
	* aarch64-tbl.h (QL_FIX2FP_H, QL_FP2FIX_H): New.
	(QL_INT2FP_H, QL_FP2INT_H): New.
	(QL_FP2_H, QL_FP3_H, QL_FP4_H): New
	(QL_DST_H): New.
	(QL_FCCMP_H): New.
	(aarch64_opcode_table): Add 16-bit variants of scvt, ucvtf,
	fcvtzs, fcvtzu, fcvtns, fcvtnu, scvtf, ucvtf, fcvtas, fcvtau,
	fmov, fcvtpos, fcvtpu, fcvtms, fcvtmu, fcvtzs, fcvtzu, fccmp,
	fccmpe, fcmp, fcmpe, fabs, fneg, fsqrt, frintn, frintp, frintm,
	frintz, frinta, frintx, frinti, fmul, fdiv, fadd, fsub, fmax,
	fmin, fmaxnm, fminnm, fnmul, fmadd, fmsub, fnmadd, fnmsub and
	fcsel.

>From 3ab46e54742b143ddc98e1d2da2235cda16b06bc Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Thu, 24 Sep 2015 18:41:28 +0100
Subject: [PATCH 3/3] [AArch64][PATCH 3/3] Add floating-point FP16 instructions

Change-Id: I2c052266aee3dbf479e057055f799d1a6f44b49f
---
 gas/testsuite/gas/aarch64/float-fp16.d | 172 ++++++
 gas/testsuite/gas/aarch64/float-fp16.s | 151 +++++
 opcodes/aarch64-asm-2.c                | 422 +++++++-------
 opcodes/aarch64-dis-2.c                | 974 ++++++++++++++++++---------------
 opcodes/aarch64-opc-2.c                |  68 +--
 opcodes/aarch64-tbl.h                  | 164 ++++++
 6 files changed, 1269 insertions(+), 682 deletions(-)
 create mode 100644 gas/testsuite/gas/aarch64/float-fp16.d
 create mode 100644 gas/testsuite/gas/aarch64/float-fp16.s

diff --git a/gas/testsuite/gas/aarch64/float-fp16.d b/gas/testsuite/gas/aarch64/float-fp16.d
new file mode 100644
index 0000000..dc87981
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/float-fp16.d
@@ -0,0 +1,172 @@
+#as: -march=armv8.2-a+fp16
+#objdump: -dr
+
+.*:     file format .*
+
+Disassembly of section \.text:
+
+0000000000000000 <.*>:
+   [0-9a-f]+:	1e200400 	fccmp	s0, s0, #0x0, eq
+   [0-9a-f]+:	1ee00400 	fccmp	h0, h0, #0x0, eq
+   [0-9a-f]+:	1e22d420 	fccmp	s1, s2, #0x0, le
+   [0-9a-f]+:	1ee2d420 	fccmp	h1, h2, #0x0, le
+  [0-9a-f]+:	1e200410 	fccmpe	s0, s0, #0x0, eq
+  [0-9a-f]+:	1ee00410 	fccmpe	h0, h0, #0x0, eq
+  [0-9a-f]+:	1e22d430 	fccmpe	s1, s2, #0x0, le
+  [0-9a-f]+:	1ee2d430 	fccmpe	h1, h2, #0x0, le
+  [0-9a-f]+:	1e202000 	fcmp	s0, s0
+  [0-9a-f]+:	1ee02000 	fcmp	h0, h0
+  [0-9a-f]+:	1e222020 	fcmp	s1, s2
+  [0-9a-f]+:	1ee22020 	fcmp	h1, h2
+  [0-9a-f]+:	1e202010 	fcmpe	s0, s0
+  [0-9a-f]+:	1ee02010 	fcmpe	h0, h0
+  [0-9a-f]+:	1e222030 	fcmpe	s1, s2
+  [0-9a-f]+:	1ee22030 	fcmpe	h1, h2
+  [0-9a-f]+:	1e202008 	fcmp	s0, #0\.0
+  [0-9a-f]+:	1ee02008 	fcmp	h0, #0\.0
+  [0-9a-f]+:	1e202018 	fcmpe	s0, #0\.0
+  [0-9a-f]+:	1ee02018 	fcmpe	h0, #0\.0
+  [0-9a-f]+:	1e210c00 	fcsel	s0, s0, s1, eq
+  [0-9a-f]+:	1ee10c00 	fcsel	h0, h0, h1, eq
+  [0-9a-f]+:	9ee60000 	fmov	x0, h0
+  [0-9a-f]+:	1ee60000 	fmov	w0, h0
+  [0-9a-f]+:	9ee70001 	fmov	h1, x0
+  [0-9a-f]+:	1ee70001 	fmov	h1, w0
+  [0-9a-f]+:	1ee0c020 	fabs	h0, h1
+  [0-9a-f]+:	1e20c020 	fabs	s0, s1
+  [0-9a-f]+:	1e60c020 	fabs	d0, d1
+  [0-9a-f]+:	1ee14020 	fneg	h0, h1
+  [0-9a-f]+:	1e214020 	fneg	s0, s1
+  [0-9a-f]+:	1e614020 	fneg	d0, d1
+  [0-9a-f]+:	1ee1c020 	fsqrt	h0, h1
+  [0-9a-f]+:	1e21c020 	fsqrt	s0, s1
+  [0-9a-f]+:	1e61c020 	fsqrt	d0, d1
+  [0-9a-f]+:	1ee44020 	frintn	h0, h1
+  [0-9a-f]+:	1e244020 	frintn	s0, s1
+  [0-9a-f]+:	1e644020 	frintn	d0, d1
+  [0-9a-f]+:	1ee4c020 	frintp	h0, h1
+  [0-9a-f]+:	1e24c020 	frintp	s0, s1
+  [0-9a-f]+:	1e64c020 	frintp	d0, d1
+  [0-9a-f]+:	1ee54020 	frintm	h0, h1
+  [0-9a-f]+:	1e254020 	frintm	s0, s1
+  [0-9a-f]+:	1e654020 	frintm	d0, d1
+  [0-9a-f]+:	1ee5c020 	frintz	h0, h1
+  [0-9a-f]+:	1e25c020 	frintz	s0, s1
+  [0-9a-f]+:	1e65c020 	frintz	d0, d1
+  [0-9a-f]+:	1ee64020 	frinta	h0, h1
+  [0-9a-f]+:	1e264020 	frinta	s0, s1
+  [0-9a-f]+:	1e664020 	frinta	d0, d1
+  [0-9a-f]+:	1ee74020 	frintx	h0, h1
+  [0-9a-f]+:	1e274020 	frintx	s0, s1
+  [0-9a-f]+:	1e674020 	frintx	d0, d1
+  [0-9a-f]+:	1ee7c020 	frinti	h0, h1
+  [0-9a-f]+:	1e27c020 	frinti	s0, s1
+  [0-9a-f]+:	1e67c020 	frinti	d0, d1
+  [0-9a-f]+:	1ee20820 	fmul	h0, h1, h2
+  [0-9a-f]+:	1e220820 	fmul	s0, s1, s2
+  [0-9a-f]+:	1e620820 	fmul	d0, d1, d2
+  [0-9a-f]+:	1ee21820 	fdiv	h0, h1, h2
+  [0-9a-f]+:	1e221820 	fdiv	s0, s1, s2
+  [0-9a-f]+:	1e621820 	fdiv	d0, d1, d2
+  [0-9a-f]+:	1ee22820 	fadd	h0, h1, h2
+  [0-9a-f]+:	1e222820 	fadd	s0, s1, s2
+ [0-9a-f]+:	1e622820 	fadd	d0, d1, d2
+ [0-9a-f]+:	1ee23820 	fsub	h0, h1, h2
+ [0-9a-f]+:	1e223820 	fsub	s0, s1, s2
+ [0-9a-f]+:	1e623820 	fsub	d0, d1, d2
+ [0-9a-f]+:	1ee24820 	fmax	h0, h1, h2
+ [0-9a-f]+:	1e224820 	fmax	s0, s1, s2
+ [0-9a-f]+:	1e624820 	fmax	d0, d1, d2
+ [0-9a-f]+:	1ee25820 	fmin	h0, h1, h2
+ [0-9a-f]+:	1e225820 	fmin	s0, s1, s2
+ [0-9a-f]+:	1e625820 	fmin	d0, d1, d2
+ [0-9a-f]+:	1ee26820 	fmaxnm	h0, h1, h2
+ [0-9a-f]+:	1e226820 	fmaxnm	s0, s1, s2
+ [0-9a-f]+:	1e626820 	fmaxnm	d0, d1, d2
+ [0-9a-f]+:	1ee27820 	fminnm	h0, h1, h2
+ [0-9a-f]+:	1e227820 	fminnm	s0, s1, s2
+ [0-9a-f]+:	1e627820 	fminnm	d0, d1, d2
+ [0-9a-f]+:	1ee28820 	fnmul	h0, h1, h2
+ [0-9a-f]+:	1e228820 	fnmul	s0, s1, s2
+ [0-9a-f]+:	1e628820 	fnmul	d0, d1, d2
+ [0-9a-f]+:	1fc20c20 	fmadd	h0, h1, h2, h3
+ [0-9a-f]+:	1f020c20 	fmadd	s0, s1, s2, s3
+ [0-9a-f]+:	1f420c20 	fmadd	d0, d1, d2, d3
+ [0-9a-f]+:	1fc28c20 	fmsub	h0, h1, h2, h3
+ [0-9a-f]+:	1f028c20 	fmsub	s0, s1, s2, s3
+ [0-9a-f]+:	1f428c20 	fmsub	d0, d1, d2, d3
+ [0-9a-f]+:	1fe20c20 	fnmadd	h0, h1, h2, h3
+ [0-9a-f]+:	1f220c20 	fnmadd	s0, s1, s2, s3
+ [0-9a-f]+:	1f620c20 	fnmadd	d0, d1, d2, d3
+ [0-9a-f]+:	1fe28c20 	fnmsub	h0, h1, h2, h3
+ [0-9a-f]+:	1f228c20 	fnmsub	s0, s1, s2, s3
+ [0-9a-f]+:	1f628c20 	fnmsub	d0, d1, d2, d3
+ [0-9a-f]+:	1e2e1000 	fmov	s0, #1\.000000000000000000e\+00
+ [0-9a-f]+:	1eee1000 	fmov	h0, #1\.000000000000000000e\+00
+ [0-9a-f]+:	1e02f820 	scvtf	s0, w1, #2
+ [0-9a-f]+:	9e02f420 	scvtf	s0, x1, #3
+ [0-9a-f]+:	1ec2f820 	scvtf	h0, w1, #2
+ [0-9a-f]+:	9ec2f420 	scvtf	h0, x1, #3
+ [0-9a-f]+:	1e03f820 	ucvtf	s0, w1, #2
+ [0-9a-f]+:	9e03f420 	ucvtf	s0, x1, #3
+ [0-9a-f]+:	1ec3f820 	ucvtf	h0, w1, #2
+ [0-9a-f]+:	9ec3f420 	ucvtf	h0, x1, #3
+ [0-9a-f]+:	1e58f801 	fcvtzs	w1, d0, #2
+ [0-9a-f]+:	9e58f401 	fcvtzs	x1, d0, #3
+ [0-9a-f]+:	1ed8f801 	fcvtzs	w1, h0, #2
+ [0-9a-f]+:	9ed8f401 	fcvtzs	x1, h0, #3
+ [0-9a-f]+:	1e59f801 	fcvtzu	w1, d0, #2
+ [0-9a-f]+:	9e59f401 	fcvtzu	x1, d0, #3
+ [0-9a-f]+:	1ed9f801 	fcvtzu	w1, h0, #2
+ [0-9a-f]+:	9ed9f401 	fcvtzu	x1, h0, #3
+ [0-9a-f]+:	1e200001 	fcvtns	w1, s0
+ [0-9a-f]+:	9e600001 	fcvtns	x1, d0
+ [0-9a-f]+:	1ee00001 	fcvtns	w1, h0
+ [0-9a-f]+:	9ee00001 	fcvtns	x1, h0
+ [0-9a-f]+:	1e210001 	fcvtnu	w1, s0
+ [0-9a-f]+:	9e610001 	fcvtnu	x1, d0
+ [0-9a-f]+:	1ee10001 	fcvtnu	w1, h0
+ [0-9a-f]+:	9ee10001 	fcvtnu	x1, h0
+ [0-9a-f]+:	1e250001 	fcvtau	w1, s0
+ [0-9a-f]+:	9e650001 	fcvtau	x1, d0
+ [0-9a-f]+:	1ee50001 	fcvtau	w1, h0
+ [0-9a-f]+:	9ee50001 	fcvtau	x1, h0
+ [0-9a-f]+:	1e240001 	fcvtas	w1, s0
+ [0-9a-f]+:	9e640001 	fcvtas	x1, d0
+ [0-9a-f]+:	1ee40001 	fcvtas	w1, h0
+ [0-9a-f]+:	9ee40001 	fcvtas	x1, h0
+ [0-9a-f]+:	1e280001 	fcvtps	w1, s0
+ [0-9a-f]+:	9e680001 	fcvtps	x1, d0
+ [0-9a-f]+:	1ee80001 	fcvtps	w1, h0
+ [0-9a-f]+:	9ee80001 	fcvtps	x1, h0
+ [0-9a-f]+:	1e290001 	fcvtpu	w1, s0
+ [0-9a-f]+:	9e690001 	fcvtpu	x1, d0
+ [0-9a-f]+:	1ee90001 	fcvtpu	w1, h0
+ [0-9a-f]+:	9ee90001 	fcvtpu	x1, h0
+ [0-9a-f]+:	1e300001 	fcvtms	w1, s0
+ [0-9a-f]+:	9e700001 	fcvtms	x1, d0
+ [0-9a-f]+:	1ef00001 	fcvtms	w1, h0
+ [0-9a-f]+:	9ef00001 	fcvtms	x1, h0
+ [0-9a-f]+:	1e310001 	fcvtmu	w1, s0
+ [0-9a-f]+:	9e710001 	fcvtmu	x1, d0
+ [0-9a-f]+:	1ef10001 	fcvtmu	w1, h0
+ [0-9a-f]+:	9ef10001 	fcvtmu	x1, h0
+ [0-9a-f]+:	1e220020 	scvtf	s0, w1
+ [0-9a-f]+:	9e620020 	scvtf	d0, x1
+ [0-9a-f]+:	1ee20020 	scvtf	h0, w1
+ [0-9a-f]+:	9ee20020 	scvtf	h0, x1
+ [0-9a-f]+:	1e230020 	ucvtf	s0, w1
+ [0-9a-f]+:	9e630020 	ucvtf	d0, x1
+ [0-9a-f]+:	1ee30020 	ucvtf	h0, w1
+ [0-9a-f]+:	9ee30020 	ucvtf	h0, x1
+ [0-9a-f]+:	1e604020 	fmov	d0, d1
+ [0-9a-f]+:	1e204020 	fmov	s0, s1
+ [0-9a-f]+:	1ee04020 	fmov	h0, h1
+ [0-9a-f]+:	9ee60020 	fmov	x0, h1
+ [0-9a-f]+:	1ee60020 	fmov	w0, h1
+ [0-9a-f]+:	9ee70001 	fmov	h1, x0
+ [0-9a-f]+:	1ee70001 	fmov	h1, w0
+ [0-9a-f]+:	1e260020 	fmov	w0, s1
+ [0-9a-f]+:	9e660020 	fmov	x0, d1
+ [0-9a-f]+:	1e270001 	fmov	s1, w0
+ [0-9a-f]+:	9e670001 	fmov	d1, x0
diff --git a/gas/testsuite/gas/aarch64/float-fp16.s b/gas/testsuite/gas/aarch64/float-fp16.s
new file mode 100644
index 0000000..0c30038
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/float-fp16.s
@@ -0,0 +1,151 @@
+/* Test file for AArch64 half-precision floating-point instructions.  */
+
+	.text
+	fccmp s0, s0, #0, eq
+	fccmp h0, h0, #0, eq
+	fccmp s1, s2, #0, le
+	fccmp h1, h2, #0, le
+
+	fccmpe s0, s0, #0, eq
+	fccmpe h0, h0, #0, eq
+	fccmpe s1, s2, #0, le
+	fccmpe h1, h2, #0, le
+
+	fcmp s0, s0
+	fcmp h0, h0
+	fcmp s1, s2
+	fcmp h1, h2
+
+	fcmpe s0, s0
+	fcmpe h0, h0
+	fcmpe s1, s2
+	fcmpe h1, h2
+
+	fcmp s0, #0.0
+	fcmp h0, #0.0
+
+	fcmpe s0, #0.0
+	fcmpe h0, #0.0
+
+	fcsel s0, s0, s1, eq
+	fcsel h0, h0, h1, eq
+
+	fmov x0, h0
+	fmov w0, h0
+	fmov h1, x0
+	fmov h1, w0
+
+	/* Scalar data-processing with one source.  */
+	.macro sdp1src op
+	\op     h0, h1
+	\op     s0, s1
+	\op     d0, d1
+	.endm
+
+	.text
+	.irp op, fabs, fneg, fsqrt, frintn, frintp, frintm, frintz
+	sdp1src \op
+	.endr
+
+	.irp op, frinta, frintx, frinti
+	sdp1src \op
+	.endr
+
+	/* Scalar data-processing with two sources.  */
+	.macro sdp2src op
+	\op     h0, h1, h2
+	\op     s0, s1, s2
+	\op     d0, d1, d2
+	.endm
+
+	.text
+	.irp op, fmul, fdiv, fadd, fsub, fmax, fmin, fmaxnm, fminnm, fnmul
+	sdp2src \op
+	.endr
+
+	/* Scalar data-processing with three sources.  */
+	.macro sdp3src op
+	\op     h0, h1, h2, h3
+	\op     s0, s1, s2, s3
+	\op     d0, d1, d2, d3
+	.endm
+
+	.text
+	.irp op, fmadd, fmsub, fnmadd, fnmsub
+	sdp3src \op
+	.endr
+
+	/* Scalar conversion.  */
+
+	.macro scvt_fix2fp op
+	\op     s0, w1, #2
+	\op     s0, x1, #3
+	\op     h0, w1, #2
+	\op     h0, x1, #3
+	.endm
+
+	.macro scvt_fp2fix op
+	\op     w1, d0, #2
+	\op     x1, d0, #3
+	\op     w1, h0, #2
+	\op     x1, h0, #3
+	.endm
+
+	.text
+
+	fmov s0, #1.0
+	fmov h0, #1.0
+
+	.irp op, scvtf, ucvtf
+	scvt_fix2fp \op
+	.endr
+
+	.irp op, fcvtzs, fcvtzu
+	scvt_fp2fix \op
+	.endr
+
+	.macro scvt_fp2int op
+	\op w1, s0
+	\op x1, d0
+	\op w1, h0
+	\op x1, h0
+	.endm
+
+	.macro scvt_int2fp op
+	\op s0, w1
+	\op d0, x1
+	\op h0, w1
+	\op h0, x1
+	.endm
+
+	.text
+	.irp op, fcvtns, fcvtnu, fcvtau, fcvtas
+	scvt_fp2int \op
+	.endr
+
+	.text
+	.irp op, fcvtps, fcvtpu, fcvtms, fcvtmu
+	scvt_fp2int \op
+	.endr
+
+	.irp op, scvtf, ucvtf
+	scvt_int2fp \op
+	.endr
+
+	/* FMOV.  */
+
+	fmov d0, d1
+	fmov s0, s1
+	fmov h0, h1
+
+	fmov x0, h1
+	fmov w0, h1
+
+	fmov h1, x0
+	fmov h1, w0
+
+	fmov w0, s1
+	fmov x0, d1
+
+	fmov s1, w0
+	fmov d1, x0
diff --git a/opcodes/aarch64-asm-2.c b/opcodes/aarch64-asm-2.c
index 51a8860..64bc136 100644
diff --git a/opcodes/aarch64-dis-2.c b/opcodes/aarch64-dis-2.c
index 57630d5..9dfa7e4 100644
diff --git a/opcodes/aarch64-opc-2.c b/opcodes/aarch64-opc-2.c
index ca3f35b..968e99c 100644
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index 6b77b36..50bbc2d 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -259,6 +259,13 @@
   QLF3(S_S,X,imm_1_64),		\
 }
 
+/* e.g. SCVTF <Hd>, <Xn>, #<fbits>.  */
+#define QL_FIX2FP_H			\
+{					\
+  QLF3 (S_H, W, imm_1_32),		\
+  QLF3 (S_H, X, imm_1_64),		\
+}
+
 /* e.g. FCVTZS <Wd>, <Dn>, #<fbits>.  */
 #define QL_FP2FIX		\
 {				\
@@ -268,6 +275,13 @@
   QLF3(X,S_S,imm_1_64),		\
 }
 
+/* e.g. FCVTZS <Wd>, <Hn>, #<fbits>.  */
+#define QL_FP2FIX_H			\
+{					\
+  QLF3 (W, S_H, imm_1_32),		\
+  QLF3 (X, S_H, imm_1_64),		\
+}
+
 /* e.g. SCVTF <Dd>, <Wn>.  */
 #define QL_INT2FP		\
 {				\
@@ -277,6 +291,13 @@
   QLF2(S_S,X),			\
 }
 
+/* e.g. SCVTF <Hd>, <Wn>.  */
+#define QL_INT2FP_H			\
+{					\
+  QLF2 (S_H, W),			\
+  QLF2 (S_H, X),			\
+}
+
 /* e.g. FCVTNS <Xd>, <Dn>.  */
 #define QL_FP2INT		\
 {				\
@@ -286,6 +307,13 @@
   QLF2(X,S_S),			\
 }
 
+/* e.g. FCVTNS <Hd>, <Wn>.  */
+#define QL_FP2INT_H			\
+{					\
+  QLF2 (W, S_H),			\
+  QLF2 (X, S_H),			\
+}
+
 /* e.g. FMOV <Xd>, <Vn>.D[1].  */
 #define QL_XVD1			\
 {				\
@@ -504,6 +532,12 @@
   QLF2(S_D, S_D),		\
 }
 
+/* FMOV <Hd>, <Hn>.  */
+#define QL_FP2_H		\
+{				\
+  QLF2 (S_H, S_H),		\
+}
+
 /* e.g. SQADD <V><d>, <V><n>, <V><m>.  */
 #define QL_S_3SAME		\
 {				\
@@ -540,6 +574,12 @@
   QLF3(S_D, S_D, S_D),		\
 }
 
+/* FMUL <Hd>, <Hn>, <Hm>.  */
+#define QL_FP3_H		\
+{				\
+  QLF3 (S_H, S_H, S_H),		\
+}
+
 /* FMADD <Dd>, <Dn>, <Dm>, <Da>.  */
 #define QL_FP4			\
 {				\
@@ -547,6 +587,12 @@
   QLF4(S_D, S_D, S_D, S_D),	\
 }
 
+/* FMADD <Hd>, <Hn>, <Hm>, <Ha>.  */
+#define QL_FP4_H		\
+{				\
+  QLF4 (S_H, S_H, S_H, S_H),	\
+}
+
 /* e.g. FCMP <Dn>, #0.0.  */
 #define QL_DST_SD			\
 {				\
@@ -554,6 +600,12 @@
   QLF2(S_D, NIL),		\
 }
 
+/* e.g. FCMP <Hn>, #0.0.  */
+#define QL_DST_H		\
+{				\
+  QLF2 (S_H, NIL),		\
+}
+
 /* FCSEL <Sd>, <Sn>, <Sm>, <cond>.  */
 #define QL_FP_COND		\
 {				\
@@ -561,6 +613,12 @@
   QLF4(S_D, S_D, S_D, NIL),	\
 }
 
+/* FCSEL <Hd>, <Hn>, <Hm>, <cond>.  */
+#define QL_FP_COND_H		\
+{				\
+  QLF4 (S_H, S_H, S_H, NIL),	\
+}
+
 /* e.g. CCMN <Xn>, <Xm>, #<nzcv>, <cond>.  */
 #define QL_CCMP			\
 {				\
@@ -582,6 +640,12 @@
   QLF4(S_D, S_D, NIL, NIL),	\
 }
 
+/* e.g. FCCMP <Sn>, <Sm>, #<nzcv>, <cond>.  */
+#define QL_FCCMP_H		\
+{				\
+  QLF4 (S_H, S_H, NIL, NIL),	\
+}
+
 /* e.g. DUP <Vd>.<T>, <Vn>.<Ts>[<index>].  */
 #define QL_DUP_VX		\
 {				\
@@ -1907,66 +1971,166 @@ struct aarch64_opcode aarch64_opcode_table[] =
   {"ror", 0x13800000, 0x7fa00000, extract, OP_ROR_IMM, CORE, OP3 (Rd, Rm, IMMS), QL_SHIFT, F_ALIAS | F_CONV},
   /* Floating-point<->fixed-point conversions.  */
   {"scvtf", 0x1e020000, 0x7f3f0000, float2fix, 0, FP, OP3 (Fd, Rn, FBITS), QL_FIX2FP, F_FPTYPE | F_SF},
+  {"scvtf", 0x1ec20000, 0x7f3f0000, float2fix, 0, FP_F16,
+   OP3 (Fd, Rn, FBITS), QL_FIX2FP_H, F_FPTYPE | F_SF},
   {"ucvtf", 0x1e030000, 0x7f3f0000, float2fix, 0, FP, OP3 (Fd, Rn, FBITS), QL_FIX2FP, F_FPTYPE | F_SF},
+  {"ucvtf", 0x1ec30000, 0x7f3f0000, float2fix, 0, FP_F16,
+   OP3 (Fd, Rn, FBITS), QL_FIX2FP_H, F_FPTYPE | F_SF},
   {"fcvtzs", 0x1e180000, 0x7f3f0000, float2fix, 0, FP, OP3 (Rd, Fn, FBITS), QL_FP2FIX, F_FPTYPE | F_SF},
+  {"fcvtzs", 0x1ed80000, 0x7f3f0000, float2fix, 0, FP_F16,
+   OP3 (Rd, Fn, FBITS), QL_FP2FIX_H, F_FPTYPE | F_SF},
   {"fcvtzu", 0x1e190000, 0x7f3f0000, float2fix, 0, FP, OP3 (Rd, Fn, FBITS), QL_FP2FIX, F_FPTYPE | F_SF},
+  {"fcvtzu", 0x1ed90000, 0x7f3f0000, float2fix, 0, FP_F16,
+   OP3 (Rd, Fn, FBITS), QL_FP2FIX_H, F_FPTYPE | F_SF},
   /* Floating-point<->integer conversions.  */
   {"fcvtns", 0x1e200000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtns", 0x1ee00000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fcvtnu", 0x1e210000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtnu", 0x1ee10000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"scvtf", 0x1e220000, 0x7f3ffc00, float2int, 0, FP, OP2 (Fd, Rn), QL_INT2FP, F_FPTYPE | F_SF},
+  {"scvtf", 0x1ee20000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Fd, Rn), QL_INT2FP_H, F_FPTYPE | F_SF},
   {"ucvtf", 0x1e230000, 0x7f3ffc00, float2int, 0, FP, OP2 (Fd, Rn), QL_INT2FP, F_FPTYPE | F_SF},
+  {"ucvtf", 0x1ee30000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Fd, Rn), QL_INT2FP_H, F_FPTYPE | F_SF},
   {"fcvtas", 0x1e240000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtas", 0x1ee40000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fcvtau", 0x1e250000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtau", 0x1ee50000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fmov", 0x1e260000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fmov", 0x1ee60000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fmov", 0x1e270000, 0x7f3ffc00, float2int, 0, FP, OP2 (Fd, Rn), QL_INT2FP, F_FPTYPE | F_SF},
+  {"fmov", 0x1ee70000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Fd, Rn), QL_INT2FP_H, F_FPTYPE | F_SF},
   {"fcvtps", 0x1e280000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtps", 0x1ee80000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fcvtpu", 0x1e290000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtpu", 0x1ee90000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fcvtms", 0x1e300000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtms", 0x1ef00000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fcvtmu", 0x1e310000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtmu", 0x1ef10000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fcvtzs", 0x1e380000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtzs", 0x1ef80000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fcvtzu", 0x1e390000, 0x7f3ffc00, float2int, 0, FP, OP2 (Rd, Fn), QL_FP2INT, F_FPTYPE | F_SF},
+  {"fcvtzu", 0x1ef90000, 0x7f3ffc00, float2int, 0, FP_F16,
+   OP2 (Rd, Fn), QL_FP2INT_H, F_FPTYPE | F_SF},
   {"fmov", 0x9eae0000, 0xfffffc00, float2int, 0, FP, OP2 (Rd, VnD1), QL_XVD1, 0},
   {"fmov", 0x9eaf0000, 0xfffffc00, float2int, 0, FP, OP2 (VdD1, Rn), QL_VD1X, 0},
   /* Floating-point conditional compare.  */
   {"fccmp", 0x1e200400, 0xff200c10, floatccmp, 0, FP, OP4 (Fn, Fm, NZCV, COND), QL_FCCMP, F_FPTYPE},
+  {"fccmp", 0x1ee00400, 0xff200c10, floatccmp, 0, FP_F16,
+   OP4 (Fn, Fm, NZCV, COND), QL_FCCMP_H, F_FPTYPE},
   {"fccmpe", 0x1e200410, 0xff200c10, floatccmp, 0, FP, OP4 (Fn, Fm, NZCV, COND), QL_FCCMP, F_FPTYPE},
+  {"fccmpe", 0x1ee00410, 0xff200c10, floatccmp, 0, FP_F16,
+   OP4 (Fn, Fm, NZCV, COND), QL_FCCMP_H, F_FPTYPE},
   /* Floating-point compare.  */
   {"fcmp", 0x1e202000, 0xff20fc1f, floatcmp, 0, FP, OP2 (Fn, Fm), QL_FP2, F_FPTYPE},
+  {"fcmp", 0x1ee02000, 0xff20fc1f, floatcmp, 0, FP_F16,
+   OP2 (Fn, Fm), QL_FP2_H, F_FPTYPE},
   {"fcmpe", 0x1e202010, 0xff20fc1f, floatcmp, 0, FP, OP2 (Fn, Fm), QL_FP2, F_FPTYPE},
+  {"fcmpe", 0x1ee02010, 0xff20fc1f, floatcmp, 0, FP_F16,
+   OP2 (Fn, Fm), QL_FP2_H, F_FPTYPE},
   {"fcmp", 0x1e202008, 0xff20fc1f, floatcmp, 0, FP, OP2 (Fn, FPIMM0), QL_DST_SD, F_FPTYPE},
+  {"fcmp", 0x1ee02008, 0xff20fc1f, floatcmp, 0, FP_F16,
+   OP2 (Fn, FPIMM0), QL_FP2_H, F_FPTYPE},
   {"fcmpe", 0x1e202018, 0xff20fc1f, floatcmp, 0, FP, OP2 (Fn, FPIMM0), QL_DST_SD, F_FPTYPE},
+  {"fcmpe", 0x1ee02018, 0xff20fc1f, floatcmp, 0, FP_F16,
+   OP2 (Fn, FPIMM0), QL_FP2_H, F_FPTYPE},
   /* Floating-point data-processing (1 source).  */
   {"fmov", 0x1e204000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"fmov", 0x1ee04000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"fabs", 0x1e20c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"fabs", 0x1ee0c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"fneg", 0x1e214000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"fneg", 0x1ee14000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"fsqrt", 0x1e21c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"fsqrt", 0x1ee1c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"fcvt", 0x1e224000, 0xff3e7c00, floatdp1, OP_FCVT, FP, OP2 (Fd, Fn), QL_FCVT, F_FPTYPE | F_MISC},
   {"frintn", 0x1e244000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"frintn", 0x1ee44000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"frintp", 0x1e24c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"frintp", 0x1ee4c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"frintm", 0x1e254000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"frintm", 0x1ee54000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"frintz", 0x1e25c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"frintz", 0x1ee5c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"frinta", 0x1e264000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"frinta", 0x1ee64000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"frintx", 0x1e274000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"frintx", 0x1ee74000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   {"frinti", 0x1e27c000, 0xff3ffc00, floatdp1, 0, FP, OP2 (Fd, Fn), QL_FP2, F_FPTYPE},
+  {"frinti", 0x1ee7c000, 0xff3ffc00, floatdp1, 0, FP_F16,
+   OP2 (Fd, Fn), QL_FP2_H, F_FPTYPE},
   /* Floating-point data-processing (2 source).  */
   {"fmul", 0x1e200800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fmul", 0x1ee00800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   {"fdiv", 0x1e201800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fdiv", 0x1ee01800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   {"fadd", 0x1e202800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fadd", 0x1ee02800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   {"fsub", 0x1e203800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fsub", 0x1ee03800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   {"fmax", 0x1e204800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fmax", 0x1ee04800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   {"fmin", 0x1e205800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fmin", 0x1ee05800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   {"fmaxnm", 0x1e206800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fmaxnm", 0x1ee06800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   {"fminnm", 0x1e207800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fminnm", 0x1ee07800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   {"fnmul", 0x1e208800, 0xff20fc00, floatdp2, 0, FP, OP3 (Fd, Fn, Fm), QL_FP3, F_FPTYPE},
+  {"fnmul", 0x1ee08800, 0xff20fc00, floatdp2, 0, FP_F16,
+   OP3 (Fd, Fn, Fm), QL_FP3_H, F_FPTYPE},
   /* Floating-point data-processing (3 source).  */
   {"fmadd", 0x1f000000, 0xff208000, floatdp3, 0, FP, OP4 (Fd, Fn, Fm, Fa), QL_FP4, F_FPTYPE},
+  {"fmadd", 0x1fc00000, 0xff208000, floatdp3, 0, FP_F16,
+   OP4 (Fd, Fn, Fm, Fa), QL_FP4_H, F_FPTYPE},
   {"fmsub", 0x1f008000, 0xff208000, floatdp3, 0, FP, OP4 (Fd, Fn, Fm, Fa), QL_FP4, F_FPTYPE},
+  {"fmsub", 0x1fc08000, 0xff208000, floatdp3, 0, FP_F16,
+   OP4 (Fd, Fn, Fm, Fa), QL_FP4_H, F_FPTYPE},
   {"fnmadd", 0x1f200000, 0xff208000, floatdp3, 0, FP, OP4 (Fd, Fn, Fm, Fa), QL_FP4, F_FPTYPE},
+  {"fnmadd", 0x1fe00000, 0xff208000, floatdp3, 0, FP_F16,
+   OP4 (Fd, Fn, Fm, Fa), QL_FP4_H, F_FPTYPE},
   {"fnmsub", 0x1f208000, 0xff208000, floatdp3, 0, FP, OP4 (Fd, Fn, Fm, Fa), QL_FP4, F_FPTYPE},
+  {"fnmsub", 0x1fe08000, 0xff208000, floatdp3, 0, FP_F16,
+   OP4 (Fd, Fn, Fm, Fa), QL_FP4_H, F_FPTYPE},
   /* Floating-point immediate.  */
   {"fmov", 0x1e201000, 0xff201fe0, floatimm, 0, FP, OP2 (Fd, FPIMM), QL_DST_SD, F_FPTYPE},
+  {"fmov", 0x1ee01000, 0xff201fe0, floatimm, 0, FP_F16,
+   OP2 (Fd, FPIMM), QL_DST_H, F_FPTYPE},
   /* Floating-point conditional select.  */
   {"fcsel", 0x1e200c00, 0xff200c00, floatsel, 0, FP, OP4 (Fd, Fn, Fm, COND), QL_FP_COND, F_FPTYPE},
+  {"fcsel", 0x1ee00c00, 0xff200c00, floatsel, 0, FP_F16,
+   OP4 (Fd, Fn, Fm, COND), QL_FP_COND_H, F_FPTYPE},
   /* Load/store register (immediate indexed).  */
   {"strb", 0x38000400, 0xffe00400, ldst_imm9, 0, CORE, OP2 (Rt, ADDR_SIMM9), QL_LDST_W8, 0},
   {"ldrb", 0x38400400, 0xffe00400, ldst_imm9, 0, CORE, OP2 (Rt, ADDR_SIMM9), QL_LDST_W8, 0},
-- 
2.1.4

Follow-Ups:
- Re: [AArch64][PATCH 3/3] Add floating-point FP16 instructions
  - From: Nick Clifton

References:
- [AArch64][PATCH 1/3] Support ARMv8.2 FP16 floating point instructions.
  - From: Matthew Wahab

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]