This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
PATCH: Update for SSSE3
- From: "H. J. Lu" <hjl at lucon dot org>
- To: binutils at sources dot redhat dot com
- Date: Thu, 28 Sep 2006 07:03:52 -0700
- Subject: PATCH: Update for SSSE3
The official name of MNI is SSSE3:
http://developer.intel.com/design/Pentium4/manuals/index_new.htm
I am going to check in this patch.
H.J.
----
gas/
2006-09-28 H.J. Lu <hongjiu.lu@intel.com>
* config/tc-i386.h (CpuMNI): Renamed to ...
(CpuSSSE3): This.
(CpuUnknownFlags): Updated.
(processor_type): Replace PROCESSOR_YONAH wit PROCESSOR_CORE
and PROCESSOR_MEROM with PROCESSOR_CORE2.
* config/tc-i386.c: Updated.
* doc/c-i386.texi: Likewise.
* config/tc-i386.c (cpu_arch): Add ".ssse3", "core" and "core2".
include/opcode/
2006-09-28 H.J. Lu <hongjiu.lu@intel.com>
* i386.h: Replace CpuMNI with CpuSSSE3.
--- binutils/gas/config/tc-i386.c.ssse3 2006-08-18 17:41:48.000000000 -0700
+++ binutils/gas/config/tc-i386.c 2006-08-18 17:47:32.000000000 -0700
@@ -469,12 +469,18 @@ static const arch_entry cpu_arch[] =
{"nocona", PROCESSOR_NOCONA,
Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
- {"yonah", PROCESSOR_YONAH,
+ {"yonah", PROCESSOR_CORE,
Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
- {"merom", PROCESSOR_MEROM,
+ {"core", PROCESSOR_CORE,
Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
- |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuMNI},
+ |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+ {"merom", PROCESSOR_CORE2,
+ Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+ |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3},
+ {"core2", PROCESSOR_CORE2,
+ Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+ |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3},
{"k6", PROCESSOR_K6,
Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX},
{"k6_2", PROCESSOR_K6,
@@ -503,6 +509,8 @@ static const arch_entry cpu_arch[] =
CpuMMX|CpuMMX2|CpuSSE|CpuSSE2},
{".sse3", PROCESSOR_UNKNOWN,
CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+ {".ssse3", PROCESSOR_UNKNOWN,
+ CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3},
{".3dnow", PROCESSOR_UNKNOWN,
CpuMMX|Cpu3dnow},
{".3dnowa", PROCESSOR_UNKNOWN,
@@ -750,9 +758,9 @@ i386_align_code (fragP, count)
1. For PROCESSOR_I486, PROCESSOR_PENTIUM and PROCESSOR_GENERIC32,
f32_patt will be used.
2. For PROCESSOR_K8 and PROCESSOR_AMDFAM10 in 64bit, NOPs with 0x66 prefix will be used.
- 3. For PROCESSOR_MEROM, alt_long_patt will be used.
+ 3. For PROCESSOR_CORE2, alt_long_patt will be used.
4. For PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUM4, PROCESSOR_NOCONA,
- PROCESSOR_YONAH, PROCESSOR_MEROM, PROCESSOR_K6, PROCESSOR_ATHLON
+ PROCESSOR_CORE, PROCESSOR_CORE2, PROCESSOR_K6, PROCESSOR_ATHLON
and PROCESSOR_GENERIC64, alt_short_patt will be used.
When -mtune= isn't used, alt_short_patt will be used if
@@ -809,13 +817,13 @@ i386_align_code (fragP, count)
else
patt = f32_patt;
break;
- case PROCESSOR_MEROM:
+ case PROCESSOR_CORE2:
patt = alt_long_patt;
break;
case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
case PROCESSOR_NOCONA:
- case PROCESSOR_YONAH:
+ case PROCESSOR_CORE:
case PROCESSOR_K6:
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
@@ -845,7 +853,7 @@ i386_align_code (fragP, count)
case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
case PROCESSOR_NOCONA:
- case PROCESSOR_YONAH:
+ case PROCESSOR_CORE:
case PROCESSOR_K6:
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
@@ -858,7 +866,7 @@ i386_align_code (fragP, count)
else
patt = f32_patt;
break;
- case PROCESSOR_MEROM:
+ case PROCESSOR_CORE2:
if ((cpu_arch_isa_flags & Cpu686) != 0)
patt = alt_long_patt;
else
@@ -3831,10 +3839,11 @@ output_insn ()
unsigned char *q;
unsigned int prefix;
- /* All opcodes on i386 have either 1 or 2 bytes. Merom New
- Instructions have 3 bytes. We may use one more higher byte
- to specify a prefix the instruction requires. */
- if ((i.tm.cpu_flags & CpuMNI) != 0)
+ /* All opcodes on i386 have either 1 or 2 bytes. Supplemental
+ Streaming SIMD extensions 3 Instructions have 3 bytes. We may
+ use one more higher byte to specify a prefix the instruction
+ requires. */
+ if ((i.tm.cpu_flags & CpuSSSE3) != 0)
{
if (i.tm.base_opcode & 0xff000000)
{
@@ -3875,7 +3884,7 @@ output_insn ()
}
else
{
- if ((i.tm.cpu_flags & CpuMNI) != 0)
+ if ((i.tm.cpu_flags & CpuSSSE3) != 0)
{
p = frag_more (3);
*p++ = (i.tm.base_opcode >> 16) & 0xff;
@@ -5923,7 +5932,7 @@ md_show_usage (stream)
fprintf (stream, _("\
-march=CPU/-mtune=CPU generate code/optimize for CPU, where CPU is one of:\n\
i386, i486, pentium, pentiumpro, pentium4, nocona,\n\
- yonah, merom, k6, athlon, k8, generic32, generic64\n"));
+ core, core2, k6, athlon, k8, generic32, generic64\n"));
}
--- binutils/gas/config/tc-i386.h.ssse3 2006-08-18 17:41:48.000000000 -0700
+++ binutils/gas/config/tc-i386.h 2006-08-18 17:46:29.000000000 -0700
@@ -187,7 +187,7 @@ typedef struct
#define CpuPadLock 0x10000 /* VIA PadLock required */
#define CpuSVME 0x20000 /* AMD Secure Virtual Machine Ext-s required */
#define CpuVMX 0x40000 /* VMX Instructions required */
-#define CpuMNI 0x80000 /* Merom New Instructions required */
+#define CpuSSSE3 0x80000 /* Supplemental Streaming SIMD extensions 3 required */
#define CpuSSE4a 0x100000 /* SSE4a New Instuctions required */
#define CpuABM 0x200000 /* ABM New Instructions required */
@@ -198,7 +198,7 @@ typedef struct
/* The default value for unknown CPUs - enable all features to avoid problems. */
#define CpuUnknownFlags (Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 \
|CpuP4|CpuSledgehammer|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI|CpuVMX \
- |Cpu3dnow|Cpu3dnowA|CpuK6|CpuPadLock|CpuSVME|CpuMNI|CpuABM|CpuSSE4a)
+ |Cpu3dnow|Cpu3dnowA|CpuK6|CpuPadLock|CpuSVME|CpuSSSE3|CpuABM|CpuSSE4a)
/* the bits in opcode_modifier are used to generate the final opcode from
the base_opcode. These bits also are used to detect alternate forms of
@@ -390,8 +390,8 @@ enum processor_type
PROCESSOR_PENTIUMPRO,
PROCESSOR_PENTIUM4,
PROCESSOR_NOCONA,
- PROCESSOR_YONAH,
- PROCESSOR_MEROM,
+ PROCESSOR_CORE,
+ PROCESSOR_CORE2,
PROCESSOR_K6,
PROCESSOR_ATHLON,
PROCESSOR_K8,
--- binutils/gas/doc/c-i386.texi.ssse3 2006-07-28 13:51:47.000000000 -0700
+++ binutils/gas/doc/c-i386.texi 2006-08-18 17:49:30.000000000 -0700
@@ -95,8 +95,8 @@ instructions. The following architectur
@code{pentium4},
@code{prescott},
@code{nocona},
-@code{yonah},
-@code{merom},
+@code{core},
+@code{core2},
@code{k6},
@code{k6_2},
@code{athlon},
@@ -752,7 +752,7 @@ supported on the CPU specified. The cho
@item @samp{i8086} @tab @samp{i186} @tab @samp{i286} @tab @samp{i386}
@item @samp{i486} @tab @samp{i586} @tab @samp{i686} @tab @samp{pentium}
@item @samp{pentiumpro} @tab @samp{pentiumii} @tab @samp{pentiumiii} @tab @samp{pentium4}
-@item @samp{prescott} @tab @samp{nocona} @tab @samp{yonah} @tab @samp{merom}
+@item @samp{prescott} @tab @samp{nocona} @tab @samp{core} @tab @samp{core2}
@item @samp{k6} @tab @samp{athlon} @tab @samp{sledgehammer} @tab @samp{k8}
@item @samp{.mmx} @tab @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3}
@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.padlock} @tab @samp{.pacifica}
--- binutils/include/opcode/i386.h.ssse3 2006-08-18 14:52:12.000000000 -0700
+++ binutils/include/opcode/i386.h 2006-08-18 17:43:23.000000000 -0700
@@ -1375,40 +1375,40 @@ static const template i386_optab[] =
{"vmxoff", 0, 0x0f01, 0xc4, CpuVMX, NoSuf|ImmExt, { 0, 0, 0} },
{"vmxon", 1, 0xf30fc7, 6, CpuVMX, NoSuf|IgnoreSize|Modrm|NoRex64, { LLongMem, 0, 0} },
-/* Merom New Instructions. */
+/* Supplemental Streaming SIMD extensions 3 Instructions. */
-{"phaddw", 2, 0x0f3801,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"phaddw", 2, 0x660f3801,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"phaddd", 2, 0x0f3802,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"phaddd", 2, 0x660f3802,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"phaddsw", 2, 0x0f3803,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"phaddsw", 2, 0x660f3803,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"phsubw", 2, 0x0f3805,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"phsubw", 2, 0x660f3805,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"phsubd", 2, 0x0f3806,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"phsubd", 2, 0x660f3806,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"phsubsw", 2, 0x0f3807,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"phsubsw", 2, 0x660f3807,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"pmaddubsw", 2, 0x0f3804,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"pmaddubsw", 2, 0x660f3804,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"pmulhrsw", 2, 0x0f380b,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"pmulhrsw", 2, 0x660f380b,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"pshufb", 2, 0x0f3800,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"pshufb", 2, 0x660f3800,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"psignb", 2, 0x0f3808,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"psignb", 2, 0x660f3808,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"psignw", 2, 0x0f3809,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"psignw", 2, 0x660f3809,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"psignd", 2, 0x0f380a,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"psignd", 2, 0x660f380a,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"palignr", 3, 0x0f3a0f,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { Imm8, RegMMX|LongMem, RegMMX } },
-{"palignr", 3, 0x660f3a0f,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } },
-{"pabsb", 2, 0x0f381c,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"pabsb", 2, 0x660f381c,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"pabsw", 2, 0x0f381d,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"pabsw", 2, 0x660f381d,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
-{"pabsd", 2, 0x0f381e,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
-{"pabsd", 2, 0x660f381e,X, CpuMNI, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"phaddw", 2, 0x0f3801,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"phaddw", 2, 0x660f3801,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"phaddd", 2, 0x0f3802,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"phaddd", 2, 0x660f3802,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"phaddsw", 2, 0x0f3803,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"phaddsw", 2, 0x660f3803,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"phsubw", 2, 0x0f3805,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"phsubw", 2, 0x660f3805,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"phsubd", 2, 0x0f3806,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"phsubd", 2, 0x660f3806,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"phsubsw", 2, 0x0f3807,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"phsubsw", 2, 0x660f3807,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"pmaddubsw", 2, 0x0f3804,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"pmaddubsw", 2, 0x660f3804,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"pmulhrsw", 2, 0x0f380b,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"pmulhrsw", 2, 0x660f380b,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"pshufb", 2, 0x0f3800,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"pshufb", 2, 0x660f3800,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"psignb", 2, 0x0f3808,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"psignb", 2, 0x660f3808,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"psignw", 2, 0x0f3809,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"psignw", 2, 0x660f3809,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"psignd", 2, 0x0f380a,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"psignd", 2, 0x660f380a,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"palignr", 3, 0x0f3a0f,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { Imm8, RegMMX|LongMem, RegMMX } },
+{"palignr", 3, 0x660f3a0f,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } },
+{"pabsb", 2, 0x0f381c,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"pabsb", 2, 0x660f381c,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"pabsw", 2, 0x0f381d,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"pabsw", 2, 0x660f381d,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
+{"pabsd", 2, 0x0f381e,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
+{"pabsd", 2, 0x660f381e,X, CpuSSSE3, NoSuf|IgnoreSize|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
/* AMD 3DNow! instructions. */