This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
PATCH: Add -march=/-mtune= to x86 assembler
On Thu, Jun 15, 2006 at 07:58:26PM +0100, Paul Brook wrote:
>
> > in assembler. The reasons we haven't run into any serious problems are
> >
> > 1. By default, cpu_arch_flags is set to accept everything.
> > 2. .arch directive isn't used much.
> >
> > So my -mtune=CPU switch will optimize for CPU by generating instruction
> > for CPU if instruction set of CPU is available.
>
> But you said these instructions only existed on ppro or later. You can't have
> mtune= effect the choice of instruction unless you also implement -march=.
>
> Consider gcc -march=386 -mtune=686. IIUC you're proposing that gcc doesn't
> emit .arch, and doesn't pass through -march=, so gas defaults to allowing all
> instructions. gas then sees -mtune= and generates instructions that don't
> work on 386.
>
This is a patch to add -march=/-mtune= to x86 assembler. Currently,
it doesn't generate different code. I will add processor specific
optimization later.
H.J.
----
2006-06-14 H.J. Lu <hongjiu.lu@intel.com>
* config/tc-i386.h (processor_type): New.
(arch_entry): Add type.
* config/tc-i386.c (cpu_arch_tune): New.
(cpu_arch_tune_flags): Likewise.
(cpu_arch_isa): Likewise.
(cpu_arch_isa_flags): Likewise.
(cpu_arch): Updated.
(set_cpu_arch): Also update cpu_arch_isa/cpu_arch_isa_flags.
(i386_target_format): Likewise.
(OPTION_MARCH): New.
(OPTION_MTUNE): Likewise.
(md_longopts): Add -march= and -mtune=.
(md_parse_option): Support -march= and -mtune=.
(md_show_usage): Add -march=CPU/-mtune=CPU.
* doc/as.texinfo: Add -march=CPU/-mtune=CPU.
* doc/c-i386.texi: Document -march=CPU/-mtune=CPU.
--- gas/config/tc-i386.c.tune 2006-06-12 12:50:18.000000000 -0700
+++ gas/config/tc-i386.c 2006-06-15 15:40:18.000000000 -0700
@@ -323,6 +323,18 @@ static const char *cpu_sub_arch_name = N
/* CPU feature flags. */
static unsigned int cpu_arch_flags = CpuUnknownFlags | CpuNo64;
+/* Cpu we are generating instructions for. */
+enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
+
+/* CPU feature flags of cpu we are generating instructions for. */
+static unsigned int cpu_arch_tune_flags = CpuUnknownFlags;
+
+/* CPU instruction set architecture to use. */
+enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
+
+/* CPU feature flags of instruction set architecture used. */
+static unsigned int cpu_arch_isa_flags = CpuUnknownFlags;
+
/* If set, conditional jumps are not automatically promoted to handle
larger than a byte offset. */
static unsigned int no_cond_jump_promotion = 0;
@@ -415,35 +427,85 @@ const relax_typeS md_relax_table[] =
{0, 0, 4, 0}
};
-static const arch_entry cpu_arch[] = {
- {"i8086", Cpu086 },
- {"i186", Cpu086|Cpu186 },
- {"i286", Cpu086|Cpu186|Cpu286 },
- {"i386", Cpu086|Cpu186|Cpu286|Cpu386 },
- {"i486", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486 },
- {"i586", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
- {"i686", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
- {"pentium", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
- {"pentiumpro",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
- {"pentiumii", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX },
- {"pentiumiii",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2|CpuSSE },
- {"pentium4", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
- {"prescott", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI },
- {"k6", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX },
- {"k6_2", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow },
- {"athlon", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
- {"sledgehammer",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
- {"opteron", Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
- {".mmx", CpuMMX },
- {".sse", CpuMMX|CpuMMX2|CpuSSE },
- {".sse2", CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
- {".sse3", CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3 },
- {".3dnow", CpuMMX|Cpu3dnow },
- {".3dnowa", CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
- {".padlock", CpuPadLock },
- {".pacifica", CpuSVME },
- {".svme", CpuSVME },
- {NULL, 0 }
+static const arch_entry cpu_arch[] =
+{
+ {"generic32", PROCESSOR_GENERIC32,
+ Cpu086|Cpu186|Cpu286|Cpu386},
+ {"generic64", PROCESSOR_GENERIC64,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+ |CpuMMX2|CpuSSE|CpuSSE2},
+ {"i8086", PROCESSOR_UNKNOWN,
+ Cpu086},
+ {"i186", PROCESSOR_UNKNOWN,
+ Cpu086|Cpu186},
+ {"i286", PROCESSOR_UNKNOWN,
+ Cpu086|Cpu186|Cpu286},
+ {"i386", PROCESSOR_GENERIC32,
+ Cpu086|Cpu186|Cpu286|Cpu386},
+ {"i486", PROCESSOR_I486,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486},
+ {"i586", PROCESSOR_PENTIUM,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586},
+ {"i686", PROCESSOR_PENTIUMPRO,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686},
+ {"pentium", PROCESSOR_PENTIUM,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586},
+ {"pentiumpro",PROCESSOR_PENTIUMPRO,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686},
+ {"pentiumii", PROCESSOR_PENTIUMPRO,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX},
+ {"pentiumiii",PROCESSOR_PENTIUMPRO,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2
+ |CpuSSE},
+ {"pentium4", PROCESSOR_PENTIUM4,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+ |CpuMMX2|CpuSSE|CpuSSE2},
+ {"prescott", PROCESSOR_NOCONA,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+ |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+ {"nocona", PROCESSOR_NOCONA,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+ |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+ {"yonah", PROCESSOR_YONAH,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+ |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+ {"merom", PROCESSOR_MEROM,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+ |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuMNI},
+ {"k6", PROCESSOR_K6,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX},
+ {"k6_2", PROCESSOR_K6,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow},
+ {"athlon", PROCESSOR_ATHLON,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+ |CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA},
+ {"sledgehammer", PROCESSOR_K8,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+ |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+ {"opteron", PROCESSOR_K8,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+ |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+ {"k8", PROCESSOR_K8,
+ Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+ |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+ {".mmx", PROCESSOR_UNKNOWN,
+ CpuMMX},
+ {".sse", PROCESSOR_UNKNOWN,
+ CpuMMX|CpuMMX2|CpuSSE},
+ {".sse2", PROCESSOR_UNKNOWN,
+ CpuMMX|CpuMMX2|CpuSSE|CpuSSE2},
+ {".sse3", PROCESSOR_UNKNOWN,
+ CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+ {".3dnow", PROCESSOR_UNKNOWN,
+ CpuMMX|Cpu3dnow},
+ {".3dnowa", PROCESSOR_UNKNOWN,
+ CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA},
+ {".padlock", PROCESSOR_UNKNOWN,
+ CpuPadLock},
+ {".pacifica", PROCESSOR_UNKNOWN,
+ CpuSVME},
+ {".svme", PROCESSOR_UNKNOWN,
+ CpuSVME}
};
const pseudo_typeS md_pseudo_table[] =
@@ -866,9 +928,9 @@ set_cpu_arch (dummy)
{
char *string = input_line_pointer;
int e = get_symbol_end ();
- int i;
+ unsigned int i;
- for (i = 0; cpu_arch[i].name; i++)
+ for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
{
if (strcmp (string, cpu_arch[i].name) == 0)
{
@@ -878,6 +940,8 @@ set_cpu_arch (dummy)
cpu_sub_arch_name = NULL;
cpu_arch_flags = (cpu_arch[i].flags
| (flag_code == CODE_64BIT ? Cpu64 : CpuNo64));
+ cpu_arch_isa = cpu_arch[i].type;
+ cpu_arch_isa_flags = cpu_arch[i].flags;
break;
}
if ((cpu_arch_flags | cpu_arch[i].flags) != cpu_arch_flags)
@@ -890,7 +954,7 @@ set_cpu_arch (dummy)
return;
}
}
- if (!cpu_arch[i].name)
+ if (i >= ARRAY_SIZE (cpu_arch))
as_bad (_("no such architecture: `%s'"), string);
*input_line_pointer = e;
@@ -5428,6 +5492,8 @@ const char *md_shortopts = "qn";
#define OPTION_32 (OPTION_MD_BASE + 0)
#define OPTION_64 (OPTION_MD_BASE + 1)
#define OPTION_DIVIDE (OPTION_MD_BASE + 2)
+#define OPTION_MARCH (OPTION_MD_BASE + 3)
+#define OPTION_MTUNE (OPTION_MD_BASE + 4)
struct option md_longopts[] = {
{"32", no_argument, NULL, OPTION_32},
@@ -5435,15 +5501,17 @@ struct option md_longopts[] = {
{"64", no_argument, NULL, OPTION_64},
#endif
{"divide", no_argument, NULL, OPTION_DIVIDE},
+ {"march", required_argument, NULL, OPTION_MARCH},
+ {"mtune", required_argument, NULL, OPTION_MTUNE},
{NULL, no_argument, NULL, 0}
};
size_t md_longopts_size = sizeof (md_longopts);
int
-md_parse_option (c, arg)
- int c;
- char *arg ATTRIBUTE_UNUSED;
+md_parse_option (int c, char *arg)
{
+ unsigned int i;
+
switch (c)
{
case 'n':
@@ -5513,6 +5581,38 @@ md_parse_option (c, arg)
#endif
break;
+ case OPTION_MARCH:
+ if (*arg == '.')
+ as_fatal (_("Invalid -march= option: `%s'"), arg);
+ for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+ {
+ if (strcmp (arg, cpu_arch [i].name) == 0)
+ {
+ cpu_arch_isa = cpu_arch [i].type;
+ cpu_arch_isa_flags = cpu_arch[i].flags;
+ break;
+ }
+ }
+ if (i >= ARRAY_SIZE (cpu_arch))
+ as_fatal (_("Invalid -march= option: `%s'"), arg);
+ break;
+
+ case OPTION_MTUNE:
+ if (*arg == '.')
+ as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+ for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+ {
+ if (strcmp (arg, cpu_arch [i].name) == 0)
+ {
+ cpu_arch_tune = cpu_arch [i].type;
+ cpu_arch_tune_flags = cpu_arch[i].flags;
+ break;
+ }
+ }
+ if (i >= ARRAY_SIZE (cpu_arch))
+ as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+ break;
+
default:
return 0;
}
@@ -5543,6 +5643,11 @@ md_show_usage (stream)
fprintf (stream, _("\
--divide ignored\n"));
#endif
+ fprintf (stream, _("\
+ -march=CPU/-mtune=CPU generate code/optimize for CPU, where CPU is one of:\n\
+ i386, i486, pentium, pentiumpro, pentium4, nocona,\n\
+ yonah, merom, k6, athlon, k8, generic32, generic64\n"));
+
}
#if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
@@ -5554,9 +5659,37 @@ const char *
i386_target_format ()
{
if (!strcmp (default_arch, "x86_64"))
- set_code_flag (CODE_64BIT);
+ {
+ set_code_flag (CODE_64BIT);
+ if (cpu_arch_isa == PROCESSOR_UNKNOWN)
+ {
+ cpu_arch_isa = PROCESSOR_GENERIC64;
+ cpu_arch_isa_flags = Cpu086|Cpu186|Cpu286|Cpu386|Cpu486
+ |Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2
+ |CpuSSE|CpuSSE2;
+ }
+ if (cpu_arch_tune == PROCESSOR_UNKNOWN)
+ {
+ cpu_arch_tune = PROCESSOR_GENERIC64;
+ cpu_arch_tune_flags = Cpu086|Cpu186|Cpu286|Cpu386|Cpu486
+ |Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2
+ |CpuSSE|CpuSSE2;
+ }
+ }
else if (!strcmp (default_arch, "i386"))
- set_code_flag (CODE_32BIT);
+ {
+ set_code_flag (CODE_32BIT);
+ if (cpu_arch_isa == PROCESSOR_UNKNOWN)
+ {
+ cpu_arch_isa = PROCESSOR_GENERIC32;
+ cpu_arch_isa_flags = Cpu086|Cpu186|Cpu286|Cpu386;
+ }
+ if (cpu_arch_tune == PROCESSOR_UNKNOWN)
+ {
+ cpu_arch_tune = PROCESSOR_GENERIC32;
+ cpu_arch_tune_flags = Cpu086|Cpu186|Cpu286|Cpu386;
+ }
+ }
else
as_fatal (_("Unknown architecture"));
switch (OUTPUT_FLAVOR)
--- gas/config/tc-i386.h.tune 2006-05-19 06:31:21.000000000 -0700
+++ gas/config/tc-i386.h 2006-06-15 14:14:01.000000000 -0700
@@ -377,11 +377,29 @@ typedef struct
}
sib_byte;
-/* x86 arch names and features */
+enum processor_type
+{
+ PROCESSOR_UNKNOWN,
+ PROCESSOR_I486,
+ PROCESSOR_PENTIUM,
+ PROCESSOR_PENTIUMPRO,
+ PROCESSOR_PENTIUM4,
+ PROCESSOR_NOCONA,
+ PROCESSOR_YONAH,
+ PROCESSOR_MEROM,
+ PROCESSOR_K6,
+ PROCESSOR_ATHLON,
+ PROCESSOR_K8,
+ PROCESSOR_GENERIC32,
+ PROCESSOR_GENERIC64
+};
+
+/* x86 arch names, types and features */
typedef struct
{
- const char *name; /* arch name */
- unsigned int flags; /* cpu feature flags */
+ const char *name; /* arch name */
+ enum processor_type type; /* arch type */
+ unsigned int flags; /* cpu feature flags */
}
arch_entry;
--- gas/doc/as.texinfo.tune 2006-06-01 09:15:58.000000000 -0700
+++ gas/doc/as.texinfo 2006-06-15 15:26:26.000000000 -0700
@@ -296,6 +296,7 @@ gcc(1), ld(1), and the Info entries for
@emph{Target i386 options:}
[@b{--32}|@b{--64}] [@b{-n}]
+ [@b{-march}=@var{CPU}] [@b{-mtune}=@var{CPU}]
@end ifset
@ifset I960
--- gas/doc/c-i386.texi.tune 2005-11-11 10:48:53.000000000 -0800
+++ gas/doc/c-i386.texi 2006-06-15 15:28:43.000000000 -0700
@@ -76,6 +76,49 @@ character, which means that it cannot be
not disable @samp{/} at the beginning of a line starting a comment, or
affect using @samp{#} for starting a comment.
+@cindex @samp{-march=} option, i386
+@cindex @samp{-march=} option, x86-64
+@item -march=@var{CPU}
+This option specifies an instruction set architecture for generating
+instructions. The following architectures are recognized:
+@code{i8086},
+@code{i186},
+@code{i286},
+@code{i386},
+@code{i486},
+@code{i586},
+@code{i686},
+@code{pentium},
+@code{pentiumpro},
+@code{pentiumii},
+@code{pentiumiii},
+@code{pentium4},
+@code{prescott},
+@code{nocona},
+@code{yonah},
+@code{merom},
+@code{k6},
+@code{k6_2},
+@code{athlon},
+@code{sledgehammer},
+@code{opteron},
+@code{k8},
+@code{generic32} and
+@code{generic64}.
+
+This option only affects instructions generated by the assembler. The
+@code{.arch} directive will take precedent.
+
+@cindex @samp{-mtune=} option, i386
+@cindex @samp{-mtune=} option, x86-64
+@item -mtune=@var{CPU}
+This option specifies a processor to optimize for. When used in
+conjunction with the @option{-march} option, only instructions
+of the processor specified by the @option{-march} option will be
+generated.
+
+Valid @var{CPU} values are identical to @option{-march=@var{CPU}}.
+
@end table
@node i386-Syntax
@@ -709,8 +752,11 @@ supported on the CPU specified. The cho
@item @samp{i8086} @tab @samp{i186} @tab @samp{i286} @tab @samp{i386}
@item @samp{i486} @tab @samp{i586} @tab @samp{i686} @tab @samp{pentium}
@item @samp{pentiumpro} @tab @samp{pentiumii} @tab @samp{pentiumiii} @tab @samp{pentium4}
-@item @samp{k6} @tab @samp{athlon} @samp{sledgehammer}
-@item @samp{.mmx} @samp{.sse} @samp{.sse2} @samp{.sse3} @samp{.3dnow}
+@item @samp{prescott} @tab @samp{nocona} @tab @samp{yonah} @tab @samp{merom}
+@item @samp{k6} @tab @samp{athlon} @tab @samp{sledgehammer} @tab @samp{k8}
+@item @samp{.mmx} @tab @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3}
+@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.padlock} @tab @samp{.pacifica}
+@item @samp{.svme}
@end multitable
Apart from the warning, there are only two other effects on