rpms/gcc/devel gcc41-x86-mtune-generic1.patch, NONE, 1.1 gcc41-x86-mtune-generic2.patch, NONE, 1.1 gcc41-x86-mtune-generic3.patch, NONE, 1.1 .cvsignore, 1.128, 1.129 gcc41-atomic-builtins.patch, 1.1, 1.2 gcc41-s390-ldbl.patch, 1.2, 1.3 gcc41.spec, 1.19, 1.20 sources, 1.130, 1.131
fedora-cvs-commits at redhat.com
fedora-cvs-commits at redhat.com
Tue Jan 31 14:29:36 UTC 2006
Author: jakub
Update of /cvs/dist/rpms/gcc/devel
In directory cvs.devel.redhat.com:/tmp/cvs-serv17529
Modified Files:
.cvsignore gcc41-atomic-builtins.patch gcc41-s390-ldbl.patch
gcc41.spec sources
Added Files:
gcc41-x86-mtune-generic1.patch gcc41-x86-mtune-generic2.patch
gcc41-x86-mtune-generic3.patch
Log Message:
4.1.0-0.18
gcc41-x86-mtune-generic1.patch:
0 files changed
--- NEW FILE gcc41-x86-mtune-generic1.patch ---
2006-01-19 Jan Hubicka <jh at suse.cz>
* i386.c (*_cost): Add COSTS_N_INSNS.
(ix86_rtx_costs): Do not use COSTS_N_INSNS.
--- gcc/config/i386/i386.c (revision 108997)
+++ gcc/config/i386/i386.c (revision 109242)
@@ -65,15 +65,23 @@
/* Processor costs (relative to an add) */
static const
struct processor_costs size_cost = { /* costs for tunning for size */
- 2, /* cost of an add instruction */
- 3, /* cost of a lea instruction */
- 2, /* variable shift costs */
- 3, /* constant shift costs */
- {3, 3, 3, 3, 5}, /* cost of starting a multiply */
+ COSTS_N_INSNS (2), /* cost of an add instruction */
+ COSTS_N_INSNS (3), /* cost of a lea instruction */
+ COSTS_N_INSNS (2), /* variable shift costs */
+ COSTS_N_INSNS (3), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
- {3, 3, 3, 3, 5}, /* cost of a divide/mod */
- 3, /* cost of movsx */
- 3, /* cost of movzx */
+ {COSTS_N_INSNS (3), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (3), /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
@@ -99,26 +107,34 @@
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
- 2, /* cost of FADD and FSUB insns. */
- 2, /* cost of FMUL instruction. */
- 2, /* cost of FDIV instruction. */
- 2, /* cost of FABS instruction. */
- 2, /* cost of FCHS instruction. */
- 2, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (2), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (2), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (2), /* cost of FSQRT instruction. */
};
/* Processor costs (relative to an add) */
static const
struct processor_costs i386_cost = { /* 386 specific costs */
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 3, /* variable shift costs */
- 2, /* constant shift costs */
- {6, 6, 6, 6, 6}, /* cost of starting a multiply */
- 1, /* cost of multiply per each bit set */
- {23, 23, 23, 23, 23}, /* cost of a divide/mod */
- 3, /* cost of movsx */
- 2, /* cost of movzx */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (3), /* variable shift costs */
+ COSTS_N_INSNS (2), /* constant shift costs */
+ {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (6), /* HI */
+ COSTS_N_INSNS (6), /* SI */
+ COSTS_N_INSNS (6), /* DI */
+ COSTS_N_INSNS (6)}, /* other */
+ COSTS_N_INSNS (1), /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (23), /* SI */
+ COSTS_N_INSNS (23), /* DI */
+ COSTS_N_INSNS (23)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
@@ -144,25 +160,33 @@
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
- 23, /* cost of FADD and FSUB insns. */
- 27, /* cost of FMUL instruction. */
- 88, /* cost of FDIV instruction. */
- 22, /* cost of FABS instruction. */
- 24, /* cost of FCHS instruction. */
- 122, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (27), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (88), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (22), /* cost of FABS instruction. */
+ COSTS_N_INSNS (24), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
};
static const
struct processor_costs i486_cost = { /* 486 specific costs */
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 3, /* variable shift costs */
- 2, /* constant shift costs */
- {12, 12, 12, 12, 12}, /* cost of starting a multiply */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (3), /* variable shift costs */
+ COSTS_N_INSNS (2), /* constant shift costs */
+ {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (12), /* HI */
+ COSTS_N_INSNS (12), /* SI */
+ COSTS_N_INSNS (12), /* DI */
+ COSTS_N_INSNS (12)}, /* other */
1, /* cost of multiply per each bit set */
- {40, 40, 40, 40, 40}, /* cost of a divide/mod */
- 3, /* cost of movsx */
- 2, /* cost of movzx */
+ {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (40), /* HI */
+ COSTS_N_INSNS (40), /* SI */
+ COSTS_N_INSNS (40), /* DI */
+ COSTS_N_INSNS (40)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
@@ -188,25 +212,33 @@
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
- 8, /* cost of FADD and FSUB insns. */
- 16, /* cost of FMUL instruction. */
- 73, /* cost of FDIV instruction. */
- 3, /* cost of FABS instruction. */
- 3, /* cost of FCHS instruction. */
- 83, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (16), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (73), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (3), /* cost of FABS instruction. */
+ COSTS_N_INSNS (3), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentium_cost = {
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 4, /* variable shift costs */
- 1, /* constant shift costs */
- {11, 11, 11, 11, 11}, /* cost of starting a multiply */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (11), /* SI */
+ COSTS_N_INSNS (11), /* DI */
+ COSTS_N_INSNS (11)}, /* other */
0, /* cost of multiply per each bit set */
- {25, 25, 25, 25, 25}, /* cost of a divide/mod */
- 3, /* cost of movsx */
- 2, /* cost of movzx */
+ {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (25), /* HI */
+ COSTS_N_INSNS (25), /* SI */
+ COSTS_N_INSNS (25), /* DI */
+ COSTS_N_INSNS (25)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
6, /* cost for loading QImode using movzbl */
@@ -232,25 +264,33 @@
0, /* size of prefetch block */
0, /* number of parallel prefetches */
2, /* Branch cost */
- 3, /* cost of FADD and FSUB insns. */
- 3, /* cost of FMUL instruction. */
- 39, /* cost of FDIV instruction. */
- 1, /* cost of FABS instruction. */
- 1, /* cost of FCHS instruction. */
- 70, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (3), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (39), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentiumpro_cost = {
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 1, /* variable shift costs */
- 1, /* constant shift costs */
- {4, 4, 4, 4, 4}, /* cost of starting a multiply */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (4), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
- {17, 17, 17, 17, 17}, /* cost of a divide/mod */
- 1, /* cost of movsx */
- 1, /* cost of movzx */
+ {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (17), /* HI */
+ COSTS_N_INSNS (17), /* SI */
+ COSTS_N_INSNS (17), /* DI */
+ COSTS_N_INSNS (17)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
@@ -276,25 +316,33 @@
32, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
- 3, /* cost of FADD and FSUB insns. */
- 5, /* cost of FMUL instruction. */
- 56, /* cost of FDIV instruction. */
- 2, /* cost of FABS instruction. */
- 2, /* cost of FCHS instruction. */
- 56, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (56), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
};
static const
struct processor_costs k6_cost = {
- 1, /* cost of an add instruction */
- 2, /* cost of a lea instruction */
- 1, /* variable shift costs */
- 1, /* constant shift costs */
- {3, 3, 3, 3, 3}, /* cost of starting a multiply */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
0, /* cost of multiply per each bit set */
- {18, 18, 18, 18, 18}, /* cost of a divide/mod */
- 2, /* cost of movsx */
- 2, /* cost of movzx */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (18), /* HI */
+ COSTS_N_INSNS (18), /* SI */
+ COSTS_N_INSNS (18), /* DI */
+ COSTS_N_INSNS (18)}, /* other */
+ COSTS_N_INSNS (2), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
3, /* cost for loading QImode using movzbl */
@@ -320,25 +368,33 @@
32, /* size of prefetch block */
1, /* number of parallel prefetches */
1, /* Branch cost */
- 2, /* cost of FADD and FSUB insns. */
- 2, /* cost of FMUL instruction. */
- 56, /* cost of FDIV instruction. */
- 2, /* cost of FABS instruction. */
- 2, /* cost of FCHS instruction. */
- 56, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (2), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (56), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
};
static const
struct processor_costs athlon_cost = {
- 1, /* cost of an add instruction */
- 2, /* cost of a lea instruction */
- 1, /* variable shift costs */
- 1, /* constant shift costs */
- {5, 5, 5, 5, 5}, /* cost of starting a multiply */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (5), /* HI */
+ COSTS_N_INSNS (5), /* SI */
+ COSTS_N_INSNS (5), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
- {18, 26, 42, 74, 74}, /* cost of a divide/mod */
- 1, /* cost of movsx */
- 1, /* cost of movzx */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
@@ -364,25 +420,33 @@
64, /* size of prefetch block */
6, /* number of parallel prefetches */
5, /* Branch cost */
- 4, /* cost of FADD and FSUB insns. */
- 4, /* cost of FMUL instruction. */
- 24, /* cost of FDIV instruction. */
- 2, /* cost of FABS instruction. */
- 2, /* cost of FCHS instruction. */
- 35, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (24), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
};
static const
struct processor_costs k8_cost = {
- 1, /* cost of an add instruction */
- 2, /* cost of a lea instruction */
- 1, /* variable shift costs */
- 1, /* constant shift costs */
- {3, 4, 3, 4, 5}, /* cost of starting a multiply */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
- {18, 26, 42, 74, 74}, /* cost of a divide/mod */
- 1, /* cost of movsx */
- 1, /* cost of movzx */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
@@ -408,25 +472,33 @@
64, /* size of prefetch block */
6, /* number of parallel prefetches */
5, /* Branch cost */
- 4, /* cost of FADD and FSUB insns. */
- 4, /* cost of FMUL instruction. */
- 19, /* cost of FDIV instruction. */
- 2, /* cost of FABS instruction. */
- 2, /* cost of FCHS instruction. */
- 35, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (19), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentium4_cost = {
- 1, /* cost of an add instruction */
- 3, /* cost of a lea instruction */
- 4, /* variable shift costs */
- 4, /* constant shift costs */
- {15, 15, 15, 15, 15}, /* cost of starting a multiply */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (3), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (4), /* constant shift costs */
+ {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (15), /* HI */
+ COSTS_N_INSNS (15), /* SI */
+ COSTS_N_INSNS (15), /* DI */
+ COSTS_N_INSNS (15)}, /* other */
0, /* cost of multiply per each bit set */
- {56, 56, 56, 56, 56}, /* cost of a divide/mod */
- 1, /* cost of movsx */
- 1, /* cost of movzx */
+ {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (56), /* HI */
+ COSTS_N_INSNS (56), /* SI */
+ COSTS_N_INSNS (56), /* DI */
+ COSTS_N_INSNS (56)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
@@ -452,25 +524,33 @@
64, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
- 5, /* cost of FADD and FSUB insns. */
- 7, /* cost of FMUL instruction. */
- 43, /* cost of FDIV instruction. */
- 2, /* cost of FABS instruction. */
- 2, /* cost of FCHS instruction. */
- 43, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (7), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (43), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
};
static const
struct processor_costs nocona_cost = {
- 1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
- 1, /* variable shift costs */
- 1, /* constant shift costs */
- {10, 10, 10, 10, 10}, /* cost of starting a multiply */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (10), /* HI */
+ COSTS_N_INSNS (10), /* SI */
+ COSTS_N_INSNS (10), /* DI */
+ COSTS_N_INSNS (10)}, /* other */
0, /* cost of multiply per each bit set */
- {66, 66, 66, 66, 66}, /* cost of a divide/mod */
- 1, /* cost of movsx */
- 1, /* cost of movzx */
+ {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (66), /* HI */
+ COSTS_N_INSNS (66), /* SI */
+ COSTS_N_INSNS (66), /* DI */
+ COSTS_N_INSNS (66)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
17, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
@@ -496,12 +576,12 @@
128, /* size of prefetch block */
8, /* number of parallel prefetches */
1, /* Branch cost */
- 6, /* cost of FADD and FSUB insns. */
- 8, /* cost of FMUL instruction. */
- 40, /* cost of FDIV instruction. */
- 3, /* cost of FABS instruction. */
- 3, /* cost of FCHS instruction. */
- 44, /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (40), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (3), /* cost of FABS instruction. */
+ COSTS_N_INSNS (3), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
};
const struct processor_costs *ix86_cost = &pentium_cost;
@@ -16320,13 +16400,13 @@
&& GET_MODE (XEXP (x, 0)) == SImode)
*total = 1;
else if (TARGET_ZERO_EXTEND_WITH_AND)
- *total = COSTS_N_INSNS (ix86_cost->add);
+ *total = ix86_cost->add;
else
- *total = COSTS_N_INSNS (ix86_cost->movzx);
+ *total = ix86_cost->movzx;
return false;
case SIGN_EXTEND:
- *total = COSTS_N_INSNS (ix86_cost->movsx);
+ *total = ix86_cost->movsx;
return false;
case ASHIFT:
@@ -16336,13 +16416,14 @@
HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
if (value == 1)
{
- *total = COSTS_N_INSNS (ix86_cost->add);
+ *total = ix86_cost->add;
return false;
}
if ((value == 2 || value == 3)
+ && !TARGET_DECOMPOSE_LEA
&& ix86_cost->lea <= ix86_cost->shift_const)
{
- *total = COSTS_N_INSNS (ix86_cost->lea);
+ *total = ix86_cost->lea;
return false;
}
}
@@ -16357,31 +16438,31 @@
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
if (INTVAL (XEXP (x, 1)) > 32)
- *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
+ *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
else
- *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
+ *total = ix86_cost->shift_const * 2;
}
else
{
if (GET_CODE (XEXP (x, 1)) == AND)
- *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
+ *total = ix86_cost->shift_var * 2;
else
- *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
+ *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
}
}
else
{
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- *total = COSTS_N_INSNS (ix86_cost->shift_const);
+ *total = ix86_cost->shift_const;
else
- *total = COSTS_N_INSNS (ix86_cost->shift_var);
+ *total = ix86_cost->shift_var;
}
return false;
case MULT:
if (FLOAT_MODE_P (mode))
{
- *total = COSTS_N_INSNS (ix86_cost->fmul);
+ *total = ix86_cost->fmul;
return false;
}
else
@@ -16422,9 +16503,9 @@
op0 = XEXP (op0, 0), mode = GET_MODE (op0);
}
- *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
- + nbits * ix86_cost->mult_bit)
- + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
+ *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
+ + nbits * ix86_cost->mult_bit
+ + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
return true;
}
@@ -16434,14 +16515,14 @@
case MOD:
case UMOD:
if (FLOAT_MODE_P (mode))
- *total = COSTS_N_INSNS (ix86_cost->fdiv);
+ *total = ix86_cost->fdiv;
else
- *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
+ *total = ix86_cost->divide[MODE_INDEX (mode)];
return false;
case PLUS:
if (FLOAT_MODE_P (mode))
- *total = COSTS_N_INSNS (ix86_cost->fadd);
+ *total = ix86_cost->fadd;
else if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
{
@@ -16453,7 +16534,7 @@
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
if (val == 2 || val == 4 || val == 8)
{
- *total = COSTS_N_INSNS (ix86_cost->lea);
+ *total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
*total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
outer_code);
@@ -16467,7 +16548,7 @@
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
if (val == 2 || val == 4 || val == 8)
{
- *total = COSTS_N_INSNS (ix86_cost->lea);
+ *total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
return true;
@@ -16475,7 +16556,7 @@
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
- *total = COSTS_N_INSNS (ix86_cost->lea);
+ *total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
@@ -16487,7 +16568,7 @@
case MINUS:
if (FLOAT_MODE_P (mode))
{
- *total = COSTS_N_INSNS (ix86_cost->fadd);
+ *total = ix86_cost->fadd;
return false;
}
/* FALLTHRU */
@@ -16497,7 +16578,7 @@
case XOR:
if (!TARGET_64BIT && mode == DImode)
{
- *total = (COSTS_N_INSNS (ix86_cost->add) * 2
+ *total = (ix86_cost->add * 2
+ (rtx_cost (XEXP (x, 0), outer_code)
<< (GET_MODE (XEXP (x, 0)) != DImode))
+ (rtx_cost (XEXP (x, 1), outer_code)
@@ -16509,16 +16590,16 @@
case NEG:
if (FLOAT_MODE_P (mode))
{
- *total = COSTS_N_INSNS (ix86_cost->fchs);
+ *total = ix86_cost->fchs;
return false;
}
/* FALLTHRU */
case NOT:
if (!TARGET_64BIT && mode == DImode)
- *total = COSTS_N_INSNS (ix86_cost->add * 2);
+ *total = ix86_cost->add * 2;
else
- *total = COSTS_N_INSNS (ix86_cost->add);
+ *total = ix86_cost->add;
return false;
case COMPARE:
@@ -16529,7 +16610,7 @@
{
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
- *total = (COSTS_N_INSNS (ix86_cost->add)
+ *total = (ix86_cost->add
+ rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
+ rtx_cost (const1_rtx, outer_code));
return true;
@@ -16545,12 +16626,12 @@
case ABS:
if (FLOAT_MODE_P (mode))
- *total = COSTS_N_INSNS (ix86_cost->fabs);
+ *total = ix86_cost->fabs;
return false;
case SQRT:
if (FLOAT_MODE_P (mode))
- *total = COSTS_N_INSNS (ix86_cost->fsqrt);
+ *total = ix86_cost->fsqrt;
return false;
case UNSPEC:
gcc41-x86-mtune-generic2.patch:
0 files changed
--- NEW FILE gcc41-x86-mtune-generic2.patch ---
2006-01-19 Jan Hubicka <jh at suse.cz>
* i386.h (TARGET_DECOMPOSE_LEA): Kill.
* i386.c (x86_decompose_lea): Kill.
(ix86_rtx_costs): Kill.
--- gcc/config/i386/i386.h (revision 109569)
+++ gcc/config/i386/i386.h (revision 109581)
@@ -209,7 +209,6 @@
#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & TUNEMASK)
#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK)
#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK)
-#define TARGET_DECOMPOSE_LEA (x86_decompose_lea & TUNEMASK)
#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
#define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
--- gcc/config/i386/i386.c (revision 109569)
+++ gcc/config/i386/i386.c (revision 109581)
@@ -640,7 +640,6 @@
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
-const int x86_decompose_lea = m_PENT4 | m_NOCONA;
const int x86_shift1 = ~m_486;
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
@@ -16446,7 +16445,6 @@
return false;
}
if ((value == 2 || value == 3)
- && !TARGET_DECOMPOSE_LEA
&& ix86_cost->lea <= ix86_cost->shift_const)
{
*total = ix86_cost->lea;
gcc41-x86-mtune-generic3.patch:
config.gcc | 36 ++++-
config/i386/athlon.md | 195 +++++++++++++++---------------
config/i386/i386.c | 254 ++++++++++++++++++++++++++++++++++------
config/i386/i386.h | 19 ++
config/i386/i386.md | 26 ++--
config/i386/ppro.md | 172 +++++++++++++--------------
config/i386/predicates.md | 7 -
doc/invoke.texi | 22 +++
testsuite/gcc.target/i386/lea.c | 2
9 files changed, 493 insertions(+), 240 deletions(-)
--- NEW FILE gcc41-x86-mtune-generic3.patch ---
gcc/
2006-01-19 Jan Hubicka <jh at suse.cz>
H.J. Lu <hongjiu.lu at intel.com>
Evandro Menezes <evandro.menezes at amd.com>
* invoke.texi (generic): Document
(i686) Update.
* config.gcc: Make x86_64-* and i686-* default to generic tunning.
* i386.h (TARGET_GENERIC32, TARGET_GENERIC64, TARGET_GENERIC,
TARGET_USE_INCDEC, TARGET_PAD_RETURNS): New macros.
(x86_use_incdec, x86_pad_returns): New variables
(TARGET_CPU_DEFAULT_generic): New constant
(TARGET_CPU_DEFAULT_NAMES): Add generic.
(enum processor_type): Add generic32 and generic64.
* i386.md (cpu attribute): Add generic32/generic64
(movhi splitter): Behave sanely when both partial_reg_dependency and
partial_reg_stall are set.
(K8 splitters): Enable for generic as well.
* predicates.md (incdec_operand): Use TARGET_INCDEC
(aligned_operand): Avoid memory mismatch stalls.
* athlon.md: Enable for generic64, new patterns for 128bit moves.
* ppro.md: Enable for generic32
* i386.c (generic64_cost, generic32_cost): New.
(m_GENERIC32, m_GENERIC64, m_GENERIC): New macros.
(x86_use_leave): Enable for generic64. (x86_use_sahf,
x86_ext_80387_constants): Enable for generic32. (x86_push_memory,
x86_movx, x86_unroll_strlen, x86_deep_branch, x86_use_simode_fiop,
x86_use_cltd, x86_promote_QImode, x86_sub_esp_4, x86_sub_esp_8,
x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves,
x86_partial_reg_dependency, x86_memory_mismatch_stall,
x86_accumulate_outgoing_args, x86_prologue_using_move,
x86_epilogue_using_move, x86_arch_always_fancy_math_387,
x86_sse_partial_reg_dependency, x86_four_jump_limit, x86_schedule):
Enable for generic.
(x86_use_incdec, x86_pad_returns): New.
(override_options): Add generic32 and generic64, translate "generic"
to generic32/generic64 and "i686" to "generic32", refuse
"generic32"/"generic64" as arch target.
(ix86_issue_rate, ix86_adjust_cost): Handle generic as athlon.
(ix86_reorg): Honor PAD_RETURNS.
gcc/testsuite/
2006-01-19 Jan Hubicka <jh at suse.cz>
* gcc.target/i386/lea.c: Test pentiumpro, not i686.
--- gcc/config.gcc.jj 2006-01-19 06:45:43.000000000 -0800
+++ gcc/config.gcc 2006-01-19 10:07:57.000000000 -0800
@@ -2370,6 +2370,9 @@ if test x$with_cpu = x ; then
# A Cirrus ARM variant.
with_cpu="ep9312"
;;
+ i386-*-*)
+ with_cpu=i386
+ ;;
i486-*-*)
with_cpu=i486
;;
@@ -2421,13 +2424,26 @@ if test x$with_cpu = x ; then
pentium_m-*)
with_cpu=pentium-m
;;
- *)
+ pentiumpro-*)
with_cpu=pentiumpro
;;
+ *)
+ with_cpu=generic
+ ;;
esac
;;
x86_64-*-*)
- with_cpu=k8
+ case ${target_noncanonical} in
+ k8-*|opteron-*|athlon_64-*)
+ with_cpu=k8
+ ;;
+ nocona-*)
+ with_cpu=nocona
+ ;;
+ *)
+ with_cpu=generic
+ ;;
+ esac
;;
alphaev6[78]*-*-*)
with_cpu=ev67
@@ -2633,13 +2649,21 @@ case "${target}" in
for which in arch cpu tune; do
eval "val=\$with_$which"
case ${val} in
- "" | i386 | i486 \
+ i386 | i486 \
| i586 | pentium | pentium-mmx | winchip-c6 | winchip2 \
| c3 | c3-2 | i686 | pentiumpro | pentium2 | pentium3 \
| pentium4 | k6 | k6-2 | k6-3 | athlon | athlon-tbird \
- | athlon-4 | athlon-xp | athlon-mp | k8 | opteron \
- | athlon64 | athlon-fx | prescott | pentium-m \
- | pentium4m | pentium3m| nocona)
+ | athlon-4 | athlon-xp | athlon-mp \
+ | prescott | pentium-m | pentium4m | pentium3m)
+ case "${target}" in
+ x86_64-*-*)
+ echo "CPU given in --with-$which=$val doesn't support 64bit mode." 1>&2
+ exit 1
+ ;;
+ esac
+ # OK
+ ;;
+ "" | k8 | opteron | athlon64 | athlon-fx | nocona | generic)
# OK
;;
*)
--- gcc/config/i386/athlon.md.jj 2006-01-28 12:50:29.000000000 +0100
+++ gcc/config/i386/athlon.md 2006-01-28 13:17:22.000000000 +0100
@@ -123,7 +123,7 @@
(define_cpu_unit "athlon-fmul" "athlon_fp")
(define_cpu_unit "athlon-fstore" "athlon_fp")
(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
-(define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)")
+(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
;; Vector operations usually consume many of pipes.
(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
@@ -131,26 +131,26 @@
;; Jump instructions are executed in the branch unit completely transparent to us
(define_insn_reservation "athlon_branch" 0
- (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "ibr"))
"athlon-direct,athlon-ieu")
(define_insn_reservation "athlon_call" 0
- (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "call,callv"))
"athlon-vector,athlon-ieu")
;; Latency of push operation is 3 cycles, but ESP value is available
;; earlier
(define_insn_reservation "athlon_push" 2
- (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "push"))
"athlon-direct,athlon-agu,athlon-store")
(define_insn_reservation "athlon_pop" 4
- (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "pop"))
"athlon-vector,athlon-load,athlon-ieu")
(define_insn_reservation "athlon_pop_k8" 3
- (and (eq_attr "cpu" "k8")
+ (and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "pop"))
"athlon-double,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave" 3
@@ -158,13 +158,13 @@
(eq_attr "type" "leave"))
"athlon-vector,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave_k8" 3
- (and (eq_attr "cpu" "k8")
+ (and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "leave"))
"athlon-double,(athlon-ieu+athlon-load)")
;; Lea executes in AGU unit with 2 cycles latency.
(define_insn_reservation "athlon_lea" 2
- (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "lea"))
"athlon-direct,athlon-agu,nothing")
@@ -176,13 +176,13 @@
"athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
;; ??? Widening multiply is vector or double.
(define_insn_reservation "athlon_imul_k8_DI" 4
- (and (eq_attr "cpu" "k8")
+ (and (eq_attr "cpu" "k8,generic64")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none,unknown"))))
"athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
(define_insn_reservation "athlon_imul_k8" 3
- (and (eq_attr "cpu" "k8")
+ (and (eq_attr "cpu" "k8,generic64")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none,unknown")))
"athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
@@ -192,13 +192,13 @@
(eq_attr "memory" "load,both")))
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
(define_insn_reservation "athlon_imul_mem_k8_DI" 7
- (and (eq_attr "cpu" "k8")
+ (and (eq_attr "cpu" "k8,generic64")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load,both"))))
[...1715 lines suppressed...]
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "load")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssecmp"))))
"decoder0,(p2+p1)*2")
(define_insn_reservation "ppro_sse_cvt_V4SF" 3
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "none,unknown")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssecvt"))))
"decoder0,p1*2")
(define_insn_reservation "ppro_sse_cvt_V4SF_other" 4
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "!none,unknown")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssecmp"))))
"decoder0,p1,p4+p3")
(define_insn_reservation "ppro_sse_mul_V4SF" 5
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "none")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssemul"))))
"decoder0,p0*2")
(define_insn_reservation "ppro_sse_mul_V4SF_load" 5
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "load")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssemul"))))
@@ -679,49 +679,49 @@
;; FIXME: p0 really closed this long???
(define_insn_reservation "ppro_sse_div_V4SF" 48
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "none")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssediv"))))
"decoder0,p0*34")
(define_insn_reservation "ppro_sse_div_V4SF_load" 48
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "load")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssediv"))))
"decoder0,(p2+p0)*2,p0*32")
(define_insn_reservation "ppro_sse_log_V4SF" 2
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "none")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "sselog,sselog1"))))
"decodern,p1")
(define_insn_reservation "ppro_sse_log_V4SF_load" 2
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "load")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "sselog,sselog1"))))
"decoder0,(p2+p1)")
(define_insn_reservation "ppro_sse_mov_V4SF" 1
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "none")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssemov"))))
"decoder0,(p0|p1)*2")
(define_insn_reservation "ppro_sse_mov_V4SF_load" 2
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "load")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssemov"))))
"decoder0,p2*2")
(define_insn_reservation "ppro_sse_mov_V4SF_store" 3
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "store")
(and (eq_attr "mode" "V4SF")
(eq_attr "type" "ssemov"))))
@@ -735,7 +735,7 @@
;; reg-reg instructions produce 1 uop so they can be decoded on any of
;; the three decoders.
(define_insn_reservation "ppro_insn" 1
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "none,unknown")
(eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
"decodern,(p0|p1)")
@@ -743,13 +743,13 @@
;; read-modify and register-memory instructions have 2 or three uops,
;; so they have to be decoded on decoder0.
(define_insn_reservation "ppro_insn_load" 3
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "load")
(eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
"decoder0,p2+(p0|p1)")
(define_insn_reservation "ppro_insn_store" 1
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "store")
(eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
"decoder0,(p0|p1),p4+p3")
@@ -757,7 +757,7 @@
;; read-modify-store instructions produce 4 uops so they have to be
;; decoded on decoder0 as well.
(define_insn_reservation "ppro_insn_both" 4
- (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "cpu" "pentiumpro,generic32")
(and (eq_attr "memory" "both")
(eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
"decoder0,p2+(p0|p1),p4+p3")
--- gcc/config/i386/predicates.md.jj 2006-01-28 12:50:29.000000000 +0100
+++ gcc/config/i386/predicates.md 2006-01-28 13:17:22.000000000 +0100
@@ -614,7 +614,7 @@
{
/* On Pentium4, the inc and dec operations causes extra dependency on flag
registers, since carry flag is not set. */
- if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
+ if (!TARGET_USE_INCDEC && !optimize_size)
return 0;
return op == const1_rtx || op == constm1_rtx;
})
@@ -693,6 +693,11 @@
if (GET_CODE (op) != MEM)
return 1;
+ /* All patterns using aligned_operand on memory operands ends up
+ in promoting memory operand to 64bit and thus causing memory mismatch. */
+ if (TARGET_MEMORY_MISMATCH_STALL && !optimize_size)
+ return 0;
+
/* Don't even try to do any aligned optimizations with volatiles. */
if (MEM_VOLATILE_P (op))
return 0;
--- gcc/doc/invoke.texi.jj 2005-12-17 16:38:57.000000000 -0800
+++ gcc/doc/invoke.texi 2006-01-19 10:07:57.000000000 -0800
@@ -8993,6 +8993,23 @@ Tune to @var{cpu-type} everything applic
for the ABI and the set of available instructions. The choices for
@var{cpu-type} are:
@table @emph
+ at item generic
+Produce code optimized for the most common IA32/AMD64/EM64T processors.
+If you know the CPU on which your code will run, then you should use
+the corresponding @option{-mtune} option instead of
+ at option{-mtune=generic}. But, if you do not know exactly what CPU users
+of your application will have, then you should use this option.
+
+As new processors are deployed in the marketplace, the behavior of this
+option will change. Therefore, if you upgrade to a newer version of
+GCC, the code generated option will change to reflect the processors
+that were most common when that version of GCC was released.
+
+There is no @option{-march=generic} option because @option{-march}
+indicates the instruction set the compiler can use, and there is no
+generic instruction set applicable to all processors. In contrast,
+ at option{-mtune} indicates the processor (or, in this case, collection of
+processors) for which the code is optimized.
@item i386
Original Intel's i386 CPU at .
@item i486
@@ -9001,8 +9018,11 @@ Intel's i486 CPU at . (No scheduling is im
Intel Pentium CPU with no MMX support.
@item pentium-mmx
Intel PentiumMMX CPU based on Pentium core with MMX instruction set support.
- at item i686, pentiumpro
+ at item pentiumpro
Intel PentiumPro CPU at .
+ at item i686
+Same as @code{generic}, but when used as @code{march} option, PentiumPro
+instruction set will be used, so the code will run on all i686 familly chips.
@item pentium2
Intel Pentium2 CPU based on PentiumPro core with MMX instruction set support.
@item pentium3, pentium3m
--- gcc/testsuite/gcc.target/i386/lea.c.jj 2005-11-04 14:10:31.000000000 -0800
+++ gcc/testsuite/gcc.target/i386/lea.c 2006-01-19 13:10:18.000000000 -0800
@@ -1,6 +1,6 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target ilp32 } */
-/* { dg-options "-O2 -march=i686" } */
+/* { dg-options "-O2 -march=pentiumpro" } */
/* { dg-final { scan-assembler "leal" } } */
typedef struct {
char **visbuf;
Index: .cvsignore
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/.cvsignore,v
retrieving revision 1.128
retrieving revision 1.129
diff -u -r1.128 -r1.129
--- .cvsignore 28 Jan 2006 10:00:01 -0000 1.128
+++ .cvsignore 31 Jan 2006 14:29:33 -0000 1.129
@@ -1 +1 @@
-gcc-4.1.0-20060128.tar.bz2
+gcc-4.1.0-20060131.tar.bz2
gcc41-atomic-builtins.patch:
s390/s390.c | 1 +
sparc/sparc.c | 1 +
2 files changed, 2 insertions(+)
Index: gcc41-atomic-builtins.patch
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/gcc41-atomic-builtins.patch,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- gcc41-atomic-builtins.patch 28 Jan 2006 10:00:01 -0000 1.1
+++ gcc41-atomic-builtins.patch 31 Jan 2006 14:29:33 -0000 1.2
@@ -1,23 +1,9 @@
-2006-01-28 Jakub Jelinek <jakub at redhat.com>
+2006-01-31 Jakub Jelinek <jakub at redhat.com>
* config/s390/s390.c (init_alignment_context): Set
ALIAS_SET_MEMORY_BARRIER on the MEM.
* config/sparc/sparc.c (sparc_expand_compare_and_swap_12): Likewise.
-2006-01-25 Richard Henderson <rth at redhat.com>
-
- * alias.h (ALIAS_SET_MEMORY_BARRIER): New.
- * alias.c (true_dependence): Respect it.
- (canon_true_dependence, write_dependence_p): Likewise.
- * builtins.c (get_builtin_sync_mem): Set it.
-
-2006-01-03 Adrian Straetling <straetling at de.ibm.com>
-
- * gcc/builtins.c (get_builtin_sync_mem): New function.
- (expand_builtin_sync_operation, expand_builtin_compare_and_swap,
- expand_builtin_lock_test_and_set, expand_builtin_lock_release):
- Call get_builtin_sync_mem to generate mem rtx.
-
--- gcc/config/s390/s390.c.jj 2006-01-28 09:54:03.000000000 +0100
+++ gcc/config/s390/s390.c 2006-01-28 10:29:03.000000000 +0100
@@ -4030,6 +4030,7 @@ init_alignment_context (struct alignment
@@ -38,171 +24,3 @@
MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
val = force_reg (SImode, memsi);
---- gcc/builtins.c.jj 2006-01-28 09:54:07.000000000 +0100
-+++ gcc/builtins.c 2006-01-28 10:25:33.000000000 +0100
-@@ -5425,6 +5425,28 @@ get_builtin_sync_mode (int fcode_diff)
- return mode_for_size (BITS_PER_UNIT << fcode_diff, MODE_INT, 0);
- }
-
-+/* Expand the memory expression LOC and return the appropriate memory operand
-+ for the builtin_sync operations. */
-+
-+static rtx
-+get_builtin_sync_mem (tree loc, enum machine_mode mode)
-+{
-+ rtx addr, mem;
-+
-+ addr = expand_expr (loc, NULL, Pmode, EXPAND_SUM);
-+
-+ /* Note that we explicitly do not want any alias information for this
-+ memory, so that we kill all other live memories. Otherwise we don't
-+ satisfy the full barrier semantics of the intrinsic. */
-+ mem = validize_mem (gen_rtx_MEM (mode, addr));
-+
-+ set_mem_align (mem, get_pointer_alignment (loc, BIGGEST_ALIGNMENT));
-+ set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
-+ MEM_VOLATILE_P (mem) = 1;
-+
-+ return mem;
-+}
-+
- /* Expand the __sync_xxx_and_fetch and __sync_fetch_and_xxx intrinsics.
- ARGLIST is the operands list to the function. CODE is the rtx code
- that corresponds to the arithmetic or logical operation from the name;
-@@ -5438,20 +5460,14 @@ expand_builtin_sync_operation (enum mach
- enum rtx_code code, bool after,
- rtx target, bool ignore)
- {
-- rtx addr, val, mem;
-+ rtx val, mem;
-
- /* Expand the operands. */
-- addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_SUM);
-+ mem = get_builtin_sync_mem (TREE_VALUE (arglist), mode);
-
- arglist = TREE_CHAIN (arglist);
- val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL);
-
-- /* Note that we explicitly do not want any alias information for this
-- memory, so that we kill all other live memories. Otherwise we don't
-- satisfy the full barrier semantics of the intrinsic. */
-- mem = validize_mem (gen_rtx_MEM (mode, addr));
-- MEM_VOLATILE_P (mem) = 1;
--
- if (ignore)
- return expand_sync_operation (mem, val, code);
- else
-@@ -5467,10 +5483,10 @@ static rtx
- expand_builtin_compare_and_swap (enum machine_mode mode, tree arglist,
- bool is_bool, rtx target)
- {
-- rtx addr, old_val, new_val, mem;
-+ rtx old_val, new_val, mem;
-
- /* Expand the operands. */
-- addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_SUM);
-+ mem = get_builtin_sync_mem (TREE_VALUE (arglist), mode);
-
- arglist = TREE_CHAIN (arglist);
- old_val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL);
-@@ -5478,12 +5494,6 @@ expand_builtin_compare_and_swap (enum ma
- arglist = TREE_CHAIN (arglist);
- new_val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL);
-
-- /* Note that we explicitly do not want any alias information for this
-- memory, so that we kill all other live memories. Otherwise we don't
-- satisfy the full barrier semantics of the intrinsic. */
-- mem = validize_mem (gen_rtx_MEM (mode, addr));
-- MEM_VOLATILE_P (mem) = 1;
--
- if (is_bool)
- return expand_bool_compare_and_swap (mem, old_val, new_val, target);
- else
-@@ -5500,20 +5510,14 @@ static rtx
- expand_builtin_lock_test_and_set (enum machine_mode mode, tree arglist,
- rtx target)
- {
-- rtx addr, val, mem;
-+ rtx val, mem;
-
- /* Expand the operands. */
-- addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_NORMAL);
-+ mem = get_builtin_sync_mem (TREE_VALUE (arglist), mode);
-
- arglist = TREE_CHAIN (arglist);
- val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL);
-
-- /* Note that we explicitly do not want any alias information for this
-- memory, so that we kill all other live memories. Otherwise we don't
-- satisfy the barrier semantics of the intrinsic. */
-- mem = validize_mem (gen_rtx_MEM (mode, addr));
-- MEM_VOLATILE_P (mem) = 1;
--
- return expand_sync_lock_test_and_set (mem, val, target);
- }
-
-@@ -5547,17 +5551,11 @@ static void
- expand_builtin_lock_release (enum machine_mode mode, tree arglist)
- {
- enum insn_code icode;
-- rtx addr, mem, insn;
-+ rtx mem, insn;
- rtx val = const0_rtx;
-
- /* Expand the operands. */
-- addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_NORMAL);
--
-- /* Note that we explicitly do not want any alias information for this
-- memory, so that we kill all other live memories. Otherwise we don't
-- satisfy the barrier semantics of the intrinsic. */
-- mem = validize_mem (gen_rtx_MEM (mode, addr));
-- MEM_VOLATILE_P (mem) = 1;
-+ mem = get_builtin_sync_mem (TREE_VALUE (arglist), mode);
-
- /* If there is an explicit operation in the md file, use it. */
- icode = sync_lock_release[mode];
---- gcc/alias.c.jj 2006-01-28 09:54:07.000000000 +0100
-+++ gcc/alias.c 2006-01-28 10:25:33.000000000 +0100
-@@ -2209,6 +2209,9 @@ true_dependence (rtx mem, enum machine_m
- return 1;
- if (GET_MODE (mem) == BLKmode && GET_CODE (XEXP (mem, 0)) == SCRATCH)
- return 1;
-+ if (MEM_ALIAS_SET (x) == ALIAS_SET_MEMORY_BARRIER
-+ || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
-+ return 1;
-
- if (DIFFERENT_ALIAS_SETS_P (x, mem))
- return 0;
-@@ -2282,6 +2285,9 @@ canon_true_dependence (rtx mem, enum mac
- return 1;
- if (GET_MODE (mem) == BLKmode && GET_CODE (XEXP (mem, 0)) == SCRATCH)
- return 1;
-+ if (MEM_ALIAS_SET (x) == ALIAS_SET_MEMORY_BARRIER
-+ || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
-+ return 1;
-
- if (DIFFERENT_ALIAS_SETS_P (x, mem))
- return 0;
-@@ -2341,6 +2347,9 @@ write_dependence_p (rtx mem, rtx x, int
- return 1;
- if (GET_MODE (mem) == BLKmode && GET_CODE (XEXP (mem, 0)) == SCRATCH)
- return 1;
-+ if (MEM_ALIAS_SET (x) == ALIAS_SET_MEMORY_BARRIER
-+ || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
-+ return 1;
-
- if (DIFFERENT_ALIAS_SETS_P (x, mem))
- return 0;
---- gcc/alias.h.jj 2006-01-28 09:54:07.000000000 +0100
-+++ gcc/alias.h 2006-01-28 10:25:33.000000000 +0100
-@@ -27,4 +27,10 @@ extern HOST_WIDE_INT get_frame_alias_set
- extern void record_base_value (unsigned int, rtx, int);
- extern bool component_uses_parent_alias_set (tree);
-
-+/* This alias set can be used to force a memory to conflict with all
-+ other memories, creating a barrier across which no memory reference
-+ can move. Note that there are other legacy ways to create such
-+ memory barriers, including an address of SCRATCH. */
-+#define ALIAS_SET_MEMORY_BARRIER ((HOST_WIDE_INT) -1)
-+
- #endif /* GCC_ALIAS_H */
gcc41-s390-ldbl.patch:
config/s390/2084.md | 44 +++
config/s390/fixdfdi.h | 151 ++++++++++++
config/s390/libgcc-glibc.ver | 33 ++
config/s390/s390.c | 62 ++++-
config/s390/s390.h | 48 ++-
config/s390/s390.md | 529 +++++++++++++++++++++++++++++++++----------
config/s390/s390.opt | 10
config/s390/t-crtstuff | 1
doc/invoke.texi | 10
9 files changed, 742 insertions(+), 146 deletions(-)
Index: gcc41-s390-ldbl.patch
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/gcc41-s390-ldbl.patch,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- gcc41-s390-ldbl.patch 28 Jan 2006 10:10:48 -0000 1.2
+++ gcc41-s390-ldbl.patch 31 Jan 2006 14:29:33 -0000 1.3
@@ -1,9 +1,10 @@
-2006-01-27 Andreas Krebbel <krebbel1 at de.ibm.com>
- Ulrich Weigand <uweigand at de.ibm.com>
+2006-01-31 Andreas Krebbel <krebbel1 at de.ibm.com>
+ Ulrich Weigand <uweigand at de.ibm.com>
* config/s390/2084.md ("x_fsimptf", "x_fmultf", "x_fdivtf",
"x_floadtf", "x_ftrunctf", "x_ftruncdf"): New insn reservations.
- * config/s390/s390.c (struct processor_costs): Add mxbr, sqxbr, dxbr
+ * config/s390/fixdfdi.h (__fixunstfdi, __fixtfdi): New functions.
+ * config/s390/s390.c (struct processor_costs): Add mxbr, sqxbr, dxbr
and dxr fields.
(z900_cost, z990_cost, z9_109_cost): Values for the new fields added.
(s390_rtx_costs): Use the new fields to calculate rtx costs.
@@ -15,145 +16,94 @@
builtin define.
(LONG_DOUBLE_TYPE_SIZE): Set to 128 or 64.
(LIBGCC2_LONG_DOUBLE_TYPE_SIZE, WIDEST_HARDWARE_FP_SIZE): Define.
- (HARD_REGNO_NREGS, HARD_REGNO_MODE_OK, CLASS_MAX_NREGS,
+ (HARD_REGNO_NREGS, HARD_REGNO_MODE_OK, CLASS_MAX_NREGS,
CANNOT_CHANGE_MODE_CLASS): Consider TFmode.
- * config/s390/s390.md ("type" attribute): Add fsimptf, floadtf, fmultf,
+ * config/s390/s390.md ("type" attribute): Add fsimptf, floadtf, fmultf,
fdivtf, fsqrttf, ftrunctf, ftruncdf as possible values.
(FPR mode macro): Add TFmode.
(DSF mode macro): New.
(<de>, <dee> mode attributes): Removed.
(<xde>, <xdee>, <RRe>, <RXe>, <Rf> mode attributes): New.
- ("*cmp<mode>_ccs_0", "*cmp<mode>_ccs_0_ibm", "*cmp<mode>_ccs",
- "*cmp<mode>_ccs_ibm", "fix_trunc<FPR:mode><GPR:mode>2_ieee",
- "floatdi<mode>2", "floatsi<mode>2_ieee", "*add<mode>3",
- "*add<mode>3_cc", "*add<mode>3_cconly", "*add<mode>3_ibm",
- "*sub<mode>3", "*sub<mode>3_cc", "*sub<mode>3_cconly",
+ ("*cmp<mode>_ccs_0", "*cmp<mode>_ccs_0_ibm", "*cmp<mode>_ccs",
+ "*cmp<mode>_ccs_ibm", "fix_trunc<FPR:mode><GPR:mode>2_ieee",
+ "floatdi<mode>2", "floatsi<mode>2_ieee", "*add<mode>3",
+ "*add<mode>3_cc", "*add<mode>3_cconly", "*add<mode>3_ibm",
+ "*sub<mode>3", "*sub<mode>3_cc", "*sub<mode>3_cconly",
"*sub<mode>3_ibm", "*mul<mode>3_ibm", "*fmadd<mode>", "*fmsub<mode>",
- "*div<mode>3", "*div<mode>3_ibm", "*neg<mode>2_cc",
- "*neg<mode>2_cconly", "*neg<mode>2", "*neg<mode>2_ibm",
+ "*div<mode>3", "*div<mode>3_ibm", "*neg<mode>2_cc",
+ "*neg<mode>2_cconly", "*neg<mode>2", "*neg<mode>2_ibm",
"*abs<mode>2_cc", "*abs<mode>2_cconly", "*abs<mode>2",
- "*abs<mode>2_ibm", "*negabs<mode>2_cc", "*negabs<mode>2_cconly",
+ "*abs<mode>2_ibm", "*negabs<mode>2_cc", "*negabs<mode>2_cconly",
"*negabs<mode>2", "sqrt<mode>2"):
Changed <de> to <xde>. R constraint replaced by <Rf>.
- ("*mul<mode>3"): Changed <dee> to <xdee>. R constraint replaced by
+ ("*mul<mode>3"): Changed <dee> to <xdee>. R constraint replaced by
<Rf>.
("fix_trunc<FPR:mode>di2"): 'FPR:' removed.
- ("*fmadd<mode>", "*fmsub<mode>"): FPR mode replaced by DSF.
- ("movtf"): New insn definition followed by 2 new splitters.
- ("reload_outtf", "reload_intf", "trunctfdf2", "trunctfsf2",
+ ("*fmadd<mode>", "*fmsub<mode>"): FPR mode replaced by DSF.
+ ("*movtf_64", "*movtf_31"): New insn definitions followed by 5
+ splitters.
+ ("movtf", "reload_outtf", "reload_intf", "trunctfdf2", "trunctfsf2",
"extenddftf2", "extendsftf2"): New expanders.
- ("*trunctfdf2_ieee", "*trunctfdf2_ibm", "*trunctfsf2_ieee",
+ ("*trunctfdf2_ieee", "*trunctfdf2_ibm", "*trunctfsf2_ieee",
"*trunctfsf2_ibm", "*extenddftf2_ieee", "*extenddftf2_ibm",
"*extendsftf2_ieee", "*extendsftf2_ibm"): New insn patterns.
* config/s390/s390.opt (mlong-double-128, mlong-double-64):
New options.
* config/s390/t-crtstuff (TARGET_LIBGCC2_CFLAGS): Macro defined.
- * config/s390/libgcc-glibc.ver (__divtc3, __multc3, __powitf2,
+ * config/s390/libgcc-glibc.ver (__divtc3, __multc3, __powitf2,
__fixtfti, __fixunstfti, __floattitf, __fixtfdi, __fixunstfdi,
__floatditf): Add a GCC_4.1.0 symbol version tag.
+ * doc/invoke.texi (-mlong-double-128, -mlong-double-64): Document
+ the new options.
---- gcc/config/s390/2084.md.jj 2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/2084.md 2006-01-28 10:39:11.000000000 +0100
-@@ -161,6 +161,11 @@
- ;; Floating point insns
- ;;
-
-+(define_insn_reservation "x_fsimptf" 7
-+ (and (eq_attr "cpu" "z990,z9_109")
-+ (eq_attr "type" "fsimptf"))
-+ "x_e1_t*2,x-wr-fp")
-+
- (define_insn_reservation "x_fsimpdf" 6
- (and (eq_attr "cpu" "z990,z9_109")
- (eq_attr "type" "fsimpdf,fmuldf"))
-@@ -171,6 +176,18 @@
- (eq_attr "type" "fsimpsf,fmulsf"))
- "x_e1_t,x-wr-fp")
-
-+
-+(define_insn_reservation "x_fmultf" 33
-+ (and (eq_attr "cpu" "z990,z9_109")
-+ (eq_attr "type" "fmultf"))
-+ "x_e1_t*27,x-wr-fp")
-+
-+
-+(define_insn_reservation "x_fdivtf" 82
-+ (and (eq_attr "cpu" "z990,z9_109")
-+ (eq_attr "type" "fdivtf,fsqrttf"))
-+ "x_e1_t*76,x-wr-fp")
-+
- (define_insn_reservation "x_fdivdf" 36
- (and (eq_attr "cpu" "z990,z9_109")
- (eq_attr "type" "fdivdf,fsqrtdf"))
-@@ -181,6 +198,12 @@
- (eq_attr "type" "fdivsf,fsqrtsf"))
- "x_e1_t*30,x-wr-fp")
-
-+
-+(define_insn_reservation "x_floadtf" 6
-+ (and (eq_attr "cpu" "z990,z9_109")
-+ (eq_attr "type" "floadtf"))
-+ "x_e1_t,x-wr-fp")
-+
- (define_insn_reservation "x_floaddf" 6
- (and (eq_attr "cpu" "z990,z9_109")
- (eq_attr "type" "floaddf"))
-@@ -191,6 +214,7 @@
- (eq_attr "type" "floadsf"))
- "x_e1_t,x-wr-fp")
+--- gcc/config/s390/libgcc-glibc.ver.jj 2006-01-31 14:38:38.000000000 +0100
++++ gcc/config/s390/libgcc-glibc.ver 2006-01-31 15:07:03.000000000 +0100
+@@ -39,3 +39,36 @@ GLIBC_2.2 {
+ }
+ %endif
++# With GCC 4.1.0 long double 128 bit support was introduced. The
++# following symbols coming from libgcc are enabled when -mlong-double-128
++# is specified. These lines make the symbols to get a @@GCC_4.1.0 attached.
+
- (define_insn_reservation "x_fstoredf" 1
- (and (eq_attr "cpu" "z990,z9_109")
- (eq_attr "type" "fstoredf"))
-@@ -201,6 +225,18 @@
- (eq_attr "type" "fstoresf"))
- "x_e1_t,x-wr-fp")
-
++%exclude {
++ __divtc3
++ __multc3
++ __powitf2
++ __fixtfti
++ __fixunstfti
++ __floattitf
+
-+(define_insn_reservation "x_ftrunctf" 16
-+ (and (eq_attr "cpu" "z990,z9_109")
-+ (eq_attr "type" "ftrunctf"))
-+ "x_e1_t*10,x-wr-fp")
++ __fixtfdi
++ __fixunstfdi
++ __floatditf
++}
+
-+(define_insn_reservation "x_ftruncdf" 11
-+ (and (eq_attr "cpu" "z990,z9_109")
-+ (eq_attr "type" "ftruncdf"))
-+ "x_e1_t*5,x-wr-fp")
++GCC_4.1.0 {
++ __divtc3
++ __multc3
++ __powitf2
+
++%ifdef __s390x__
++ __fixtfti
++ __fixunstfti
++ __floattitf
+
- (define_insn_reservation "x_ftoi" 1
- (and (eq_attr "cpu" "z990,z9_109")
- (eq_attr "type" "ftoi"))
-@@ -234,7 +270,7 @@
- "s390_agen_dep_p")
-
- (define_bypass 9 "x_int,x_agen,x_lr"
-- "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
-+ "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
- x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
- "s390_agen_dep_p")
- ;;
-@@ -247,7 +283,7 @@
- "s390_agen_dep_p")
-
- (define_bypass 5 "x_load"
-- "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
-+ "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
- x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
- "s390_agen_dep_p")
-
-@@ -261,7 +297,7 @@
- "s390_agen_dep_p")
-
- (define_bypass 5 "x_larl, x_la"
-- "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
-+ "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
- x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
- "s390_agen_dep_p")
-
---- gcc/config/s390/s390.c.jj 2006-01-28 10:29:03.000000000 +0100
-+++ gcc/config/s390/s390.c 2006-01-28 10:39:11.000000000 +0100
++%else
++ __fixtfdi
++ __fixunstfdi
++ __floatditf
++%endif
++}
+--- gcc/config/s390/s390.c.jj 2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/s390.c 2006-01-31 15:07:03.000000000 +0100
+@@ -1,5 +1,5 @@
+ /* Subroutines used for code generation on IBM S/390 and zSeries
+- Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
++ Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
+ Contributed by Hartmut Penner (hpenner at de.ibm.com) and
+ Ulrich Weigand (uweigand at de.ibm.com).
@@ -71,13 +71,17 @@ struct processor_costs
const int msgr; /* cost of an MSGR instruction. */
const int msr; /* cost of an MSR instruction. */
@@ -217,7 +167,7 @@
COSTS_N_INSNS (40), /* DDBR */
COSTS_N_INSNS (37), /* DDR */
COSTS_N_INSNS (26), /* DDBR */
-@@ -2154,6 +2170,9 @@ s390_rtx_costs (rtx x, int code, int out
+@@ -2159,6 +2175,9 @@ s390_rtx_costs (rtx x, int code, int out
case DFmode:
*total = s390_cost->mult_df;
break;
@@ -227,7 +177,7 @@
default:
return false;
}
-@@ -2204,13 +2223,22 @@ s390_rtx_costs (rtx x, int code, int out
+@@ -2209,13 +2228,22 @@ s390_rtx_costs (rtx x, int code, int out
else /* TARGET_IBM_FLOAT */
*total = s390_cost->ddr;
}
@@ -251,7 +201,7 @@
return false;
case SIGN_EXTEND:
-@@ -2565,6 +2593,18 @@ s390_secondary_input_reload_class (enum
+@@ -2570,6 +2598,18 @@ s390_secondary_input_reload_class (enum
if (s390_plus_operand (in, mode))
return ADDR_REGS;
@@ -270,7 +220,16 @@
if (reg_classes_intersect_p (CC_REGS, class))
return GENERAL_REGS;
-@@ -2592,6 +2632,18 @@ s390_secondary_output_reload_class (enum
+@@ -2586,7 +2626,7 @@ enum reg_class
+ s390_secondary_output_reload_class (enum reg_class class,
+ enum machine_mode mode, rtx out)
+ {
+- if ((TARGET_64BIT ? mode == TImode
++ if ((TARGET_64BIT ? (mode == TImode || mode == TFmode)
+ : (mode == DImode || mode == DFmode))
+ && reg_classes_intersect_p (GENERAL_REGS, class)
+ && GET_CODE (out) == MEM
+@@ -2597,6 +2637,18 @@ s390_secondary_output_reload_class (enum
+ GET_MODE_SIZE (mode) - 1))
return ADDR_REGS;
@@ -289,7 +248,7 @@
if (reg_classes_intersect_p (CC_REGS, class))
return GENERAL_REGS;
-@@ -5164,10 +5216,10 @@ replace_ltrel_base (rtx *x)
+@@ -5168,10 +5220,10 @@ replace_ltrel_base (rtx *x)
/* We keep a list of constants which we have to add to internal
constant tables in the middle of large functions. */
@@ -302,107 +261,154 @@
DFmode, DImode,
SFmode, SImode,
HImode,
---- gcc/config/s390/s390.h.jj 2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/s390.h 2006-01-28 10:39:11.000000000 +0100
-@@ -93,6 +93,8 @@ extern enum processor_flags s390_arch_fl
- builtin_define ("__s390__"); \
- if (TARGET_64BIT) \
- builtin_define ("__s390x__"); \
-+ if (TARGET_LONG_DOUBLE_128) \
-+ builtin_define ("__LONG_DOUBLE_128__"); \
- } \
- while (0)
+--- gcc/config/s390/s390.opt.jj 2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/s390.opt 2006-01-31 15:07:03.000000000 +0100
+@@ -1,6 +1,6 @@
+ ; Options for the S/390 / zSeries port of the compiler.
-@@ -216,7 +218,18 @@ if (INTEGRAL_MODE_P (MODE) &&
- #define LONG_LONG_TYPE_SIZE 64
- #define FLOAT_TYPE_SIZE 32
- #define DOUBLE_TYPE_SIZE 64
--#define LONG_DOUBLE_TYPE_SIZE 64 /* ??? Should support extended format. */
-+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+-; Copyright (C) 2005 Free Software Foundation, Inc.
++; Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+ ;
+ ; This file is part of GCC.
+ ;
+@@ -51,6 +51,14 @@ mhard-float
+ Target Report RejectNegative Mask(HARD_FLOAT)
+ Use hardware fp
+
++mlong-double-128
++Target Report RejectNegative Mask(LONG_DOUBLE_128)
++Use 128-bit long double
+
-+/* Define this to set long double type size to use in libgcc2.c, which can
-+ not depend on target_flags. */
-+#ifdef __LONG_DOUBLE_128__
-+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
-+#else
-+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
-+#endif
++mlong-double-64
++Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
++Use 64-bit long double
+
-+/* Work around target_flags dependency in ada/targtyps.c. */
-+#define WIDEST_HARDWARE_FP_SIZE 64
+ mpacked-stack
+ Target Report Mask(PACKED_STACK)
+ Use packed stack layout
+--- gcc/config/s390/2084.md.jj 2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/2084.md 2006-01-31 15:07:03.000000000 +0100
+@@ -1,5 +1,5 @@
+ ;; Scheduling description for z990 (cpu 2084).
+-;; Copyright (C) 2003,2004, 2005 Free Software Foundation, Inc.
++;; Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ ;; Contributed by Hartmut Penner (hpenner at de.ibm.com) and
+ ;; Ulrich Weigand (uweigand at de.ibm.com).
- /* We use "unsigned char" as default. */
- #define DEFAULT_SIGNED_CHAR 0
-@@ -334,28 +347,34 @@ if (INTEGRAL_MODE_P (MODE) &&
- Floating point modes <= word size fit into any FPR or GPR.
- Floating point modes > word size (i.e. DFmode on 32-bit) fit
- into any FPR, or an even-odd GPR pair.
-+ TFmode fits only into an even-odd FPR pair.
+@@ -161,6 +161,11 @@
+ ;; Floating point insns
+ ;;
- Complex floating point modes fit either into two FPRs, or into
- successive GPRs (again starting with an even number).
-+ TCmode fits only into two successive even-odd FPR pairs.
++(define_insn_reservation "x_fsimptf" 7
++ (and (eq_attr "cpu" "z990,z9_109")
++ (eq_attr "type" "fsimptf"))
++ "x_e1_t*2,x-wr-fp")
++
+ (define_insn_reservation "x_fsimpdf" 6
+ (and (eq_attr "cpu" "z990,z9_109")
+ (eq_attr "type" "fsimpdf,fmuldf"))
+@@ -171,6 +176,18 @@
+ (eq_attr "type" "fsimpsf,fmulsf"))
+ "x_e1_t,x-wr-fp")
- Condition code modes fit only into the CC register. */
++
++(define_insn_reservation "x_fmultf" 33
++ (and (eq_attr "cpu" "z990,z9_109")
++ (eq_attr "type" "fmultf"))
++ "x_e1_t*27,x-wr-fp")
++
++
++(define_insn_reservation "x_fdivtf" 82
++ (and (eq_attr "cpu" "z990,z9_109")
++ (eq_attr "type" "fdivtf,fsqrttf"))
++ "x_e1_t*76,x-wr-fp")
++
+ (define_insn_reservation "x_fdivdf" 36
+ (and (eq_attr "cpu" "z990,z9_109")
+ (eq_attr "type" "fdivdf,fsqrtdf"))
+@@ -181,6 +198,12 @@
+ (eq_attr "type" "fdivsf,fsqrtsf"))
+ "x_e1_t*30,x-wr-fp")
- #define HARD_REGNO_NREGS(REGNO, MODE) \
- (FP_REGNO_P(REGNO)? \
-- (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ? 2 : 1) : \
-+ (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ? \
-+ 2 * ((GET_MODE_SIZE(MODE) / 2 + 8 - 1) / 8) : \
-+ ((GET_MODE_SIZE(MODE) + 8 - 1) / 8)) : \
- GENERAL_REGNO_P(REGNO)? \
- ((GET_MODE_SIZE(MODE)+UNITS_PER_WORD-1) / UNITS_PER_WORD) : \
- ACCESS_REGNO_P(REGNO)? \
-- ((GET_MODE_SIZE(MODE)+4-1) / 4) : \
-+ ((GET_MODE_SIZE(MODE) + 4 - 1) / 4) : \
- 1)
++
++(define_insn_reservation "x_floadtf" 6
++ (and (eq_attr "cpu" "z990,z9_109")
++ (eq_attr "type" "floadtf"))
++ "x_e1_t,x-wr-fp")
++
+ (define_insn_reservation "x_floaddf" 6
+ (and (eq_attr "cpu" "z990,z9_109")
+ (eq_attr "type" "floaddf"))
+@@ -191,6 +214,7 @@
+ (eq_attr "type" "floadsf"))
+ "x_e1_t,x-wr-fp")
- #define HARD_REGNO_MODE_OK(REGNO, MODE) \
- (FP_REGNO_P(REGNO)? \
-- ((MODE) == SImode || (MODE) == DImode || \
-- GET_MODE_CLASS(MODE) == MODE_FLOAT || \
-- GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT) : \
-+ (((MODE) == SImode || (MODE) == DImode \
-+ || GET_MODE_CLASS(MODE) == MODE_FLOAT \
-+ || GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT) \
-+ && (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1))) : \
- GENERAL_REGNO_P(REGNO)? \
-- (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1)) : \
-+ ((HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1)) \
-+ && (MODE) != TFmode && (MODE) != TCmode) : \
- CC_REGNO_P(REGNO)? \
- GET_MODE_CLASS (MODE) == MODE_CC : \
- FRAME_REGNO_P(REGNO)? \
-@@ -376,7 +395,9 @@ if (INTEGRAL_MODE_P (MODE) &&
- in a register of class CLASS. */
- #define CLASS_MAX_NREGS(CLASS, MODE) \
- ((CLASS) == FP_REGS ? \
-- (GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT ? 2 : 1) : \
-+ (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ? \
-+ 2 * (GET_MODE_SIZE (MODE) / 2 + 8 - 1) / 8 : \
-+ (GET_MODE_SIZE (MODE) + 8 - 1) / 8) : \
- (CLASS) == ACCESS_REGS ? \
- (GET_MODE_SIZE (MODE) + 4 - 1) / 4 : \
- (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
-@@ -386,10 +407,11 @@ if (INTEGRAL_MODE_P (MODE) &&
- cannot use SUBREGs to switch between modes in FP registers.
- Likewise for access registers, since they have only half the
- word size on 64-bit. */
--#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
-- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
-- ? reg_classes_intersect_p (FP_REGS, CLASS) \
-- || reg_classes_intersect_p (ACCESS_REGS, CLASS) : 0)
-+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
-+ (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
-+ ? ((reg_classes_intersect_p (FP_REGS, CLASS) \
-+ && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8)) \
-+ || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0)
++
+ (define_insn_reservation "x_fstoredf" 1
+ (and (eq_attr "cpu" "z990,z9_109")
+ (eq_attr "type" "fstoredf"))
+@@ -201,6 +225,18 @@
+ (eq_attr "type" "fstoresf"))
+ "x_e1_t,x-wr-fp")
- /* Register classes. */
++
++(define_insn_reservation "x_ftrunctf" 16
++ (and (eq_attr "cpu" "z990,z9_109")
++ (eq_attr "type" "ftrunctf"))
++ "x_e1_t*10,x-wr-fp")
++
++(define_insn_reservation "x_ftruncdf" 11
++ (and (eq_attr "cpu" "z990,z9_109")
++ (eq_attr "type" "ftruncdf"))
++ "x_e1_t*5,x-wr-fp")
++
++
+ (define_insn_reservation "x_ftoi" 1
+ (and (eq_attr "cpu" "z990,z9_109")
+ (eq_attr "type" "ftoi"))
+@@ -234,7 +270,7 @@
+ "s390_agen_dep_p")
+
+ (define_bypass 9 "x_int,x_agen,x_lr"
+- "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
++ "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+ x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+ "s390_agen_dep_p")
+ ;;
+@@ -247,7 +283,7 @@
+ "s390_agen_dep_p")
+
+ (define_bypass 5 "x_load"
+- "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
++ "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+ x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+ "s390_agen_dep_p")
+
+@@ -261,7 +297,7 @@
+ "s390_agen_dep_p")
+
+ (define_bypass 5 "x_larl, x_la"
+- "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
++ "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+ x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+ "s390_agen_dep_p")
---- gcc/config/s390/s390.md.jj 2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/s390.md 2006-01-28 10:39:11.000000000 +0100
+--- gcc/config/s390/t-crtstuff.jj 2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/t-crtstuff 2006-01-31 15:07:03.000000000 +0100
+@@ -2,3 +2,4 @@
+ # because then __FRAME_END__ might not be the last thing in .eh_frame
+ # section.
+ CRTSTUFF_T_CFLAGS = -fno-asynchronous-unwind-tables
++TARGET_LIBGCC2_CFLAGS += -mlong-double-128
+--- gcc/config/s390/s390.md.jj 2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/s390.md 2006-01-31 15:07:03.000000000 +0100
+@@ -1,5 +1,5 @@
+ ;;- Machine description for GNU compiler -- S/390 / zSeries version.
+-;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
++;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ ;; Free Software Foundation, Inc.
+ ;; Contributed by Hartmut Penner (hpenner at de.ibm.com) and
+ ;; Ulrich Weigand (uweigand at de.ibm.com).
@@ -199,11 +199,11 @@
(define_attr "type" "none,integer,load,lr,la,larl,lm,stm,
cs,vs,store,sem,idiv,
@@ -512,17 +518,40 @@
(set_attr "type" "fsimp<mode>")])
-@@ -1483,6 +1493,79 @@
+@@ -1483,6 +1493,149 @@
(set_attr "type" "lr,load,load,*")])
;
+; movtf instruction pattern(s).
+;
+
-+(define_insn "movtf"
++(define_expand "movtf"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (match_operand:TF 1 "general_operand" ""))]
++ ""
++ "")
++
++(define_insn "*movtf_64"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "=f,f,f,o,d,QS,d,o,Q")
++ (match_operand:TF 1 "general_operand" "G,f,o,f,QS,d,dm,d,Q"))]
++ "TARGET_64BIT"
++ "@
++ lzxr\t%0
++ lxr\t%0,%1
++ #
++ #
++ lmg\t%0,%N0,%S1
++ stmg\t%1,%N1,%S0
++ #
++ #
++ #"
++ [(set_attr "op_type" "RRE,RRE,*,*,RSY,RSY,*,*,*")
++ (set_attr "type" "fsimptf,fsimptf,*,*,lm,stm,*,*,*")])
++
++(define_insn "*movtf_31"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "=f,f,f,o,Q")
+ (match_operand:TF 1 "general_operand" "G,f,o,f,Q"))]
-+ ""
++ "!TARGET_64BIT"
+ "@
+ lzxr\t%0
+ lxr\t%0,%1
@@ -530,12 +559,58 @@
+ #
+ #"
+ [(set_attr "op_type" "RRE,RRE,*,*,*")
-+ (set_attr "type" "fsimptf,fsimptf,*,*,*")])
++ (set_attr "type" "fsimptf,fsimptf,*,*,*")])
++
++; TFmode in GPRs splitters
++
++(define_split
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (match_operand:TF 1 "general_operand" ""))]
++ "TARGET_64BIT && reload_completed
++ && s390_split_ok_p (operands[0], operands[1], TFmode, 0)"
++ [(set (match_dup 2) (match_dup 4))
++ (set (match_dup 3) (match_dup 5))]
++{
++ operands[2] = operand_subword (operands[0], 0, 0, TFmode);
++ operands[3] = operand_subword (operands[0], 1, 0, TFmode);
++ operands[4] = operand_subword (operands[1], 0, 0, TFmode);
++ operands[5] = operand_subword (operands[1], 1, 0, TFmode);
++})
++
++(define_split
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (match_operand:TF 1 "general_operand" ""))]
++ "TARGET_64BIT && reload_completed
++ && s390_split_ok_p (operands[0], operands[1], TFmode, 1)"
++ [(set (match_dup 2) (match_dup 4))
++ (set (match_dup 3) (match_dup 5))]
++{
++ operands[2] = operand_subword (operands[0], 1, 0, TFmode);
++ operands[3] = operand_subword (operands[0], 0, 0, TFmode);
++ operands[4] = operand_subword (operands[1], 1, 0, TFmode);
++ operands[5] = operand_subword (operands[1], 0, 0, TFmode);
++})
++
++(define_split
++ [(set (match_operand:TF 0 "register_operand" "")
++ (match_operand:TF 1 "memory_operand" ""))]
++ "TARGET_64BIT && reload_completed
++ && !FP_REG_P (operands[0])
++ && !s_operand (operands[1], VOIDmode)"
++ [(set (match_dup 0) (match_dup 1))]
++{
++ rtx addr = operand_subword (operands[0], 1, 0, DFmode);
++ s390_load_address (addr, XEXP (operands[1], 0));
++ operands[1] = replace_equiv_address (operands[1], addr);
++})
++
++; TFmode in FPRs splitters
+
+(define_split
+ [(set (match_operand:TF 0 "register_operand" "")
+ (match_operand:TF 1 "memory_operand" ""))]
-+ "reload_completed && offsettable_memref_p (operands[1])"
++ "reload_completed && offsettable_memref_p (operands[1])
++ && FP_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (match_dup 5))]
+{
@@ -548,7 +623,8 @@
+(define_split
+ [(set (match_operand:TF 0 "memory_operand" "")
+ (match_operand:TF 1 "register_operand" ""))]
-+ "reload_completed && offsettable_memref_p (operands[0])"
++ "reload_completed && offsettable_memref_p (operands[0])
++ && FP_REG_P (operands[1])"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (match_dup 5))]
+{
@@ -592,7 +668,7 @@
; movdf instruction pattern(s).
;
-@@ -3083,13 +3166,13 @@
+@@ -3083,13 +3236,13 @@
DONE;
})
@@ -610,7 +686,7 @@
GEN_INT(5)));
DONE;
})
-@@ -3100,11 +3183,23 @@
+@@ -3100,11 +3253,23 @@
(unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
(clobber (reg:CC CC_REGNUM))]
"TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -635,7 +711,7 @@
; fix_truncdfsi2 instruction pattern(s).
;
-@@ -3177,21 +3272,36 @@
+@@ -3177,21 +3342,36 @@
})
;
@@ -675,7 +751,7 @@
(define_expand "floatsidf2"
[(set (match_operand:DF 0 "register_operand" "")
(float:DF (match_operand:SI 1 "register_operand" "")))]
-@@ -3209,14 +3319,6 @@
+@@ -3209,14 +3389,6 @@
}
})
@@ -690,7 +766,7 @@
(define_insn "floatsidf2_ibm"
[(set (match_operand:DF 0 "register_operand" "=f")
(float:DF (match_operand:SI 1 "register_operand" "d")))
-@@ -3252,14 +3354,6 @@
+@@ -3252,14 +3424,6 @@
}
})
@@ -705,7 +781,7 @@
;
; truncdfsf2 instruction pattern(s).
;
-@@ -3275,7 +3369,8 @@
+@@ -3275,7 +3439,8 @@
(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
"TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
"ledbr\t%0,%1"
@@ -715,7 +791,7 @@
(define_insn "truncdfsf2_ibm"
[(set (match_operand:SF 0 "register_operand" "=f,f")
-@@ -3288,6 +3383,66 @@
+@@ -3288,6 +3453,66 @@
(set_attr "type" "floadsf")])
;
@@ -782,7 +858,7 @@
; extendsfdf2 instruction pattern(s).
;
-@@ -3311,7 +3466,7 @@
+@@ -3311,7 +3536,7 @@
ldebr\t%0,%1
ldeb\t%0,%1"
[(set_attr "op_type" "RRE,RXE")
@@ -791,7 +867,7 @@
(define_insn "extendsfdf2_ibm"
[(set (match_operand:DF 0 "register_operand" "=f,f")
-@@ -3324,6 +3479,66 @@
+@@ -3324,6 +3549,66 @@
[(set_attr "length" "4,6")
(set_attr "type" "floadsf")])
@@ -858,7 +934,7 @@
;;
;; ARITHMETIC OPERATIONS
-@@ -3751,7 +3966,7 @@
+@@ -3751,7 +4036,7 @@
[(parallel
[(set (match_operand:FPR 0 "register_operand" "=f,f")
(plus:FPR (match_operand:FPR 1 "nonimmediate_operand" "%0,0")
@@ -867,7 +943,7 @@
(clobber (reg:CC CC_REGNUM))])]
"TARGET_HARD_FLOAT"
"")
-@@ -3759,52 +3974,52 @@
+@@ -3759,52 +4044,52 @@
(define_insn "*add<mode>3"
[(set (match_operand:FPR 0 "register_operand" "=f,f")
(plus:FPR (match_operand:FPR 1 "nonimmediate_operand" "%0,0")
@@ -933,7 +1009,7 @@
(set_attr "type" "fsimp<mode>")])
-@@ -4163,52 +4378,52 @@
+@@ -4163,52 +4448,52 @@
(define_insn "*sub<mode>3"
[(set (match_operand:FPR 0 "register_operand" "=f,f")
(minus:FPR (match_operand:FPR 1 "register_operand" "0,0")
@@ -999,7 +1075,7 @@
(set_attr "type" "fsimp<mode>")])
-@@ -4456,53 +4671,53 @@
+@@ -4456,53 +4741,53 @@
(define_expand "mul<mode>3"
[(set (match_operand:FPR 0 "register_operand" "=f,f")
(mult:FPR (match_operand:FPR 1 "nonimmediate_operand" "%0,0")
@@ -1073,7 +1149,7 @@
[(set_attr "op_type" "RRE,RXE")
(set_attr "type" "fmul<mode>")])
-@@ -4949,30 +5164,30 @@
+@@ -4949,30 +5234,30 @@
(define_expand "div<mode>3"
[(set (match_operand:FPR 0 "register_operand" "=f,f")
(div:FPR (match_operand:FPR 1 "register_operand" "0,0")
@@ -1112,7 +1188,7 @@
(set_attr "type" "fdiv<mode>")])
-@@ -5955,7 +6170,7 @@
+@@ -5955,7 +6240,7 @@
(set (match_operand:FPR 0 "register_operand" "=f")
(neg:FPR (match_dup 1)))]
"s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1121,7 +1197,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -5965,7 +6180,7 @@
+@@ -5965,7 +6250,7 @@
(match_operand:FPR 2 "const0_operand" "")))
(clobber (match_scratch:FPR 0 "=f"))]
"s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1130,7 +1206,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -5974,7 +6189,7 @@
+@@ -5974,7 +6259,7 @@
(neg:FPR (match_operand:FPR 1 "register_operand" "f")))
(clobber (reg:CC CC_REGNUM))]
"TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1139,7 +1215,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -5983,8 +6198,8 @@
+@@ -5983,8 +6268,8 @@
(neg:FPR (match_operand:FPR 1 "register_operand" "f")))
(clobber (reg:CC CC_REGNUM))]
"TARGET_HARD_FLOAT && TARGET_IBM_FLOAT"
@@ -1150,7 +1226,7 @@
(set_attr "type" "fsimp<mode>")])
-@@ -6062,7 +6277,7 @@
+@@ -6062,7 +6347,7 @@
(set (match_operand:FPR 0 "register_operand" "=f")
(abs:FPR (match_dup 1)))]
"s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1159,7 +1235,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -6072,7 +6287,7 @@
+@@ -6072,7 +6357,7 @@
(match_operand:FPR 2 "const0_operand" "")))
(clobber (match_scratch:FPR 0 "=f"))]
"s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1168,7 +1244,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -6081,7 +6296,7 @@
+@@ -6081,7 +6366,7 @@
(abs:FPR (match_operand:FPR 1 "register_operand" "f")))
(clobber (reg:CC CC_REGNUM))]
"TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1177,7 +1253,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -6090,8 +6305,8 @@
+@@ -6090,8 +6375,8 @@
(abs:FPR (match_operand:FPR 1 "register_operand" "f")))
(clobber (reg:CC CC_REGNUM))]
"TARGET_HARD_FLOAT && TARGET_IBM_FLOAT"
@@ -1188,7 +1264,7 @@
(set_attr "type" "fsimp<mode>")])
;;
-@@ -6161,7 +6376,7 @@
+@@ -6161,7 +6446,7 @@
(set (match_operand:FPR 0 "register_operand" "=f")
(neg:FPR (abs:FPR (match_dup 1))))]
"s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1197,7 +1273,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -6171,7 +6386,7 @@
+@@ -6171,7 +6456,7 @@
(match_operand:FPR 2 "const0_operand" "")))
(clobber (match_scratch:FPR 0 "=f"))]
"s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1206,7 +1282,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -6180,7 +6395,7 @@
+@@ -6180,7 +6465,7 @@
(neg:FPR (abs:FPR (match_operand:FPR 1 "register_operand" "f"))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1215,7 +1291,7 @@
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
-@@ -6194,11 +6409,11 @@
+@@ -6194,11 +6479,11 @@
(define_insn "sqrt<mode>2"
[(set (match_operand:FPR 0 "register_operand" "=f,f")
@@ -1230,66 +1306,301 @@
[(set_attr "op_type" "RRE,RXE")
(set_attr "type" "fsqrt<mode>")])
---- gcc/config/s390/s390.opt.jj 2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/s390.opt 2006-01-28 10:39:11.000000000 +0100
-@@ -51,6 +51,14 @@ mhard-float
- Target Report RejectNegative Mask(HARD_FLOAT)
- Use hardware fp
-
-+mlong-double-128
-+Target Report RejectNegative Mask(LONG_DOUBLE_128)
-+Use 128-bit long double
+--- gcc/config/s390/fixdfdi.h.jj 2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/fixdfdi.h 2006-01-31 15:07:03.000000000 +0100
+@@ -20,6 +20,156 @@ along with GCC; see the file COPYING. I
+ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
++#ifdef L_fixunstfdi
++
++#define EXPD(fp) (((fp.l.i[0]) >> 16) & 0x7FFF)
++#define EXPONENT_BIAS 16383
++#define MANTISSA_BITS 112
++#define PRECISION (MANTISSA_BITS + 1)
++#define SIGNBIT 0x80000000
++#define SIGND(fp) ((fp.l.i[0]) & SIGNBIT)
++#define MANTD_HIGH_LL(fp) ((fp.ll[0] & HIGH_LL_FRAC_MASK) | HIGH_LL_UNIT_BIT)
++#define MANTD_LOW_LL(fp) (fp.ll[1])
++#define FRACD_ZERO_P(fp) (!fp.ll[1] && !(fp.ll[0] & HIGH_LL_FRAC_MASK))
++#define HIGH_LL_FRAC_BITS 48
++#define HIGH_LL_UNIT_BIT ((UDItype_x)1 << HIGH_LL_FRAC_BITS)
++#define HIGH_LL_FRAC_MASK (HIGH_LL_UNIT_BIT - 1)
++
++typedef int DItype_x __attribute__ ((mode (DI)));
++typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
++typedef int SItype_x __attribute__ ((mode (SI)));
++typedef unsigned int USItype_x __attribute__ ((mode (SI)));
++
++union double_long {
++ long double d;
++ struct {
++ SItype_x i[4]; /* 32 bit parts: 0 upper ... 3 lowest */
++ } l;
++ UDItype_x ll[2]; /* 64 bit parts: 0 upper, 1 lower */
++};
++
++UDItype_x __fixunstfdi (long double a1);
++
++/* convert double to unsigned int */
++UDItype_x
++__fixunstfdi (long double a1)
++{
++ register union double_long dl1;
++ register int exp;
++ register UDItype_x l;
+
-+mlong-double-64
-+Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
-+Use 64-bit long double
++ dl1.d = a1;
+
- mpacked-stack
- Target Report Mask(PACKED_STACK)
- Use packed stack layout
---- gcc/config/s390/t-crtstuff.jj 2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/t-crtstuff 2006-01-28 10:39:11.000000000 +0100
-@@ -2,3 +2,4 @@
- # because then __FRAME_END__ might not be the last thing in .eh_frame
- # section.
- CRTSTUFF_T_CFLAGS = -fno-asynchronous-unwind-tables
-+TARGET_LIBGCC2_CFLAGS += -mlong-double-128
---- gcc/config/s390/libgcc-glibc.ver.jj 2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/libgcc-glibc.ver 2006-01-28 10:39:11.000000000 +0100
-@@ -39,3 +39,36 @@ GLIBC_2.2 {
- }
- %endif
-
-+# With GCC 4.1.0 long double 128 bit support was introduced. The
-+# following symbols coming from libgcc are enabled when -mlong-double-128
-+# is specified. These lines make the symbols to get a @@GCC_4.1.0 attached.
++ /* +/- 0, denormalized, negative */
++ if (!EXPD (dl1) || SIGND(dl1))
++ return 0;
+
-+%exclude {
-+ __divtc3
-+ __multc3
-+ __powitf2
-+ __fixtfti
-+ __fixunstfti
-+ __floattitf
++ /* The exponent - considered the binary point at the right end of
++ the mantissa. */
++ exp = EXPD (dl1) - EXPONENT_BIAS - MANTISSA_BITS;
+
-+ __fixtfdi
-+ __fixunstfdi
-+ __floatditf
++ /* number < 1: If the mantissa would need to be right-shifted more bits than
++ its size (plus the implied one bit on the left) the result would be
++ zero. */
++ if (exp <= -PRECISION)
++ return 0;
++
++ /* NaN: All exponent bits set and a non-zero fraction. */
++ if ((EXPD(dl1) == 0x7fff) && !FRACD_ZERO_P (dl1))
++ return 0x0ULL;
++
++ /* If the upper ll part of the mantissa isn't
++ zeroed out after shifting the number would be to large. */
++ if (exp >= -HIGH_LL_FRAC_BITS)
++ return 0xFFFFFFFFFFFFFFFFULL;
++
++ exp += HIGH_LL_FRAC_BITS + 1;
++
++ l = MANTD_LOW_LL (dl1) >> (HIGH_LL_FRAC_BITS + 1)
++ | MANTD_HIGH_LL (dl1) << (64 - (HIGH_LL_FRAC_BITS + 1));
++
++ return l >> -exp;
+}
++#define __fixunstfdi ___fixunstfdi
++#endif
++#undef L_fixunstfdi
+
-+GCC_4.1.0 {
-+ __divtc3
-+ __multc3
-+ __powitf2
++#ifdef L_fixtfdi
++#define EXPD(fp) (((fp.l.i[0]) >> 16) & 0x7FFF)
++#define EXPONENT_BIAS 16383
++#define MANTISSA_BITS 112
++#define PRECISION (MANTISSA_BITS + 1)
++#define SIGNBIT 0x80000000
++#define SIGND(fp) ((fp.l.i[0]) & SIGNBIT)
++#define MANTD_HIGH_LL(fp) ((fp.ll[0] & HIGH_LL_FRAC_MASK) | HIGH_LL_UNIT_BIT)
++#define MANTD_LOW_LL(fp) (fp.ll[1])
++#define FRACD_ZERO_P(fp) (!fp.ll[1] && !(fp.ll[0] & HIGH_LL_FRAC_MASK))
++#define HIGH_LL_FRAC_BITS 48
++#define HIGH_LL_UNIT_BIT ((UDItype_x)1 << HIGH_LL_FRAC_BITS)
++#define HIGH_LL_FRAC_MASK (HIGH_LL_UNIT_BIT - 1)
++
++typedef int DItype_x __attribute__ ((mode (DI)));
++typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
++typedef int SItype_x __attribute__ ((mode (SI)));
++typedef unsigned int USItype_x __attribute__ ((mode (SI)));
++
++union double_long {
++ long double d;
++ struct {
++ SItype_x i[4]; /* 32 bit parts: 0 upper ... 3 lowest */
++ } l;
++ DItype_x ll[2]; /* 64 bit parts: 0 upper, 1 lower */
++};
++
++DItype_x __fixtfdi (long double a1);
++
++/* convert double to unsigned int */
++DItype_x
++__fixtfdi (long double a1)
++{
++ register union double_long dl1;
++ register int exp;
++ register UDItype_x l;
++
++ dl1.d = a1;
++
++ /* +/- 0, denormalized */
++ if (!EXPD (dl1))
++ return 0;
++
++ /* The exponent - considered the binary point at the right end of
++ the mantissa. */
++ exp = EXPD (dl1) - EXPONENT_BIAS - MANTISSA_BITS;
++
++ /* number < 1: If the mantissa would need to be right-shifted more bits than
++ its size the result would be zero. */
++ if (exp <= -PRECISION)
++ return 0;
++
++ /* NaN: All exponent bits set and a non-zero fraction. */
++ if ((EXPD(dl1) == 0x7fff) && !FRACD_ZERO_P (dl1))
++ return 0x8000000000000000ULL;
++
++ /* If the upper ll part of the mantissa isn't
++ zeroed out after shifting the number would be to large. */
++ if (exp >= -HIGH_LL_FRAC_BITS)
++ {
++ l = (long long)1 << 63; /* long int min */
++ return SIGND (dl1) ? l : l - 1;
++ }
+
-+%ifdef __s390x__
-+ __fixtfti
-+ __fixunstfti
-+ __floattitf
++ /* The extra bit is needed for the sign bit. */
++ exp += HIGH_LL_FRAC_BITS + 1;
+
-+%else
-+ __fixtfdi
-+ __fixunstfdi
-+ __floatditf
-+%endif
++ l = MANTD_LOW_LL (dl1) >> (HIGH_LL_FRAC_BITS + 1)
++ | MANTD_HIGH_LL (dl1) << (64 - (HIGH_LL_FRAC_BITS + 1));
++
++ return SIGND (dl1) ? -(l >> -exp) : l >> -exp;
+}
++#define __fixtfdi ___fixtfdi
++#endif
++#undef L_fixtfdi
++
+ #ifdef L_fixunsdfdi
+ #define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF)
+ #define EXCESSD 1022
+@@ -305,4 +455,3 @@ __fixsfdi (float a1)
+ #define __fixsfdi ___fixsfdi
+ #endif
+ #undef L_fixsfdi
+-
+--- gcc/config/s390/s390.h.jj 2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/s390.h 2006-01-31 15:07:03.000000000 +0100
+@@ -1,5 +1,5 @@
+ /* Definitions of target machine for GNU compiler, for IBM S/390
+- Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
++ Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
+ Contributed by Hartmut Penner (hpenner at de.ibm.com) and
+ Ulrich Weigand (uweigand at de.ibm.com).
+@@ -93,6 +93,8 @@ extern enum processor_flags s390_arch_fl
+ builtin_define ("__s390__"); \
+ if (TARGET_64BIT) \
+ builtin_define ("__s390x__"); \
++ if (TARGET_LONG_DOUBLE_128) \
++ builtin_define ("__LONG_DOUBLE_128__"); \
+ } \
+ while (0)
+
+@@ -216,7 +218,18 @@ if (INTEGRAL_MODE_P (MODE) &&
+ #define LONG_LONG_TYPE_SIZE 64
+ #define FLOAT_TYPE_SIZE 32
+ #define DOUBLE_TYPE_SIZE 64
+-#define LONG_DOUBLE_TYPE_SIZE 64 /* ??? Should support extended format. */
++#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
++
++/* Define this to set long double type size to use in libgcc2.c, which can
++ not depend on target_flags. */
++#ifdef __LONG_DOUBLE_128__
++#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
++#else
++#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
++#endif
++
++/* Work around target_flags dependency in ada/targtyps.c. */
++#define WIDEST_HARDWARE_FP_SIZE 64
+
+ /* We use "unsigned char" as default. */
+ #define DEFAULT_SIGNED_CHAR 0
+@@ -334,28 +347,34 @@ if (INTEGRAL_MODE_P (MODE) &&
+ Floating point modes <= word size fit into any FPR or GPR.
+ Floating point modes > word size (i.e. DFmode on 32-bit) fit
+ into any FPR, or an even-odd GPR pair.
++ TFmode fits only into an even-odd FPR pair.
+
+ Complex floating point modes fit either into two FPRs, or into
+ successive GPRs (again starting with an even number).
++ TCmode fits only into two successive even-odd FPR pairs.
+
+ Condition code modes fit only into the CC register. */
+
+ #define HARD_REGNO_NREGS(REGNO, MODE) \
+ (FP_REGNO_P(REGNO)? \
+- (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ? 2 : 1) : \
++ (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ? \
++ 2 * ((GET_MODE_SIZE(MODE) / 2 + 8 - 1) / 8) : \
++ ((GET_MODE_SIZE(MODE) + 8 - 1) / 8)) : \
+ GENERAL_REGNO_P(REGNO)? \
+ ((GET_MODE_SIZE(MODE)+UNITS_PER_WORD-1) / UNITS_PER_WORD) : \
+ ACCESS_REGNO_P(REGNO)? \
+- ((GET_MODE_SIZE(MODE)+4-1) / 4) : \
++ ((GET_MODE_SIZE(MODE) + 4 - 1) / 4) : \
+ 1)
+
+ #define HARD_REGNO_MODE_OK(REGNO, MODE) \
+ (FP_REGNO_P(REGNO)? \
+- ((MODE) == SImode || (MODE) == DImode || \
+- GET_MODE_CLASS(MODE) == MODE_FLOAT || \
+- GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT) : \
++ (((MODE) == SImode || (MODE) == DImode \
++ || GET_MODE_CLASS(MODE) == MODE_FLOAT \
++ || GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT) \
++ && (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1))) : \
+ GENERAL_REGNO_P(REGNO)? \
+- (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1)) : \
++ ((HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1)) \
++ && (((MODE) != TFmode && (MODE) != TCmode) || TARGET_64BIT)) : \
+ CC_REGNO_P(REGNO)? \
+ GET_MODE_CLASS (MODE) == MODE_CC : \
+ FRAME_REGNO_P(REGNO)? \
+@@ -376,7 +395,9 @@ if (INTEGRAL_MODE_P (MODE) &&
+ in a register of class CLASS. */
+ #define CLASS_MAX_NREGS(CLASS, MODE) \
+ ((CLASS) == FP_REGS ? \
+- (GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT ? 2 : 1) : \
++ (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ? \
++ 2 * (GET_MODE_SIZE (MODE) / 2 + 8 - 1) / 8 : \
++ (GET_MODE_SIZE (MODE) + 8 - 1) / 8) : \
+ (CLASS) == ACCESS_REGS ? \
+ (GET_MODE_SIZE (MODE) + 4 - 1) / 4 : \
+ (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+@@ -386,10 +407,11 @@ if (INTEGRAL_MODE_P (MODE) &&
+ cannot use SUBREGs to switch between modes in FP registers.
+ Likewise for access registers, since they have only half the
+ word size on 64-bit. */
+-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
+- ? reg_classes_intersect_p (FP_REGS, CLASS) \
+- || reg_classes_intersect_p (ACCESS_REGS, CLASS) : 0)
++#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
++ (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
++ ? ((reg_classes_intersect_p (FP_REGS, CLASS) \
++ && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8)) \
++ || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0)
+
+ /* Register classes. */
+
+--- gcc/doc/invoke.texi.jj 2006-01-31 14:38:56.000000000 +0100
++++ gcc/doc/invoke.texi 2006-01-31 15:07:03.000000000 +0100
+@@ -666,8 +666,8 @@ See RS/6000 and PowerPC Options.
+
+ @emph{S/390 and zSeries Options}
+ @gccoptlist{-mtune=@var{cpu-type} -march=@var{cpu-type} @gol
+--mhard-float -msoft-float -mbackchain -mno-backchain @gol
+--mpacked-stack -mno-packed-stack @gol
++-mhard-float -msoft-float -mlong-double-64 -mlong-double-128 @gol
++-mbackchain -mno-backchain -mpacked-stack -mno-packed-stack @gol
+ -msmall-exec -mno-small-exec -mmvcle -mno-mvcle @gol
+ -m64 -m31 -mdebug -mno-debug -mesa -mzarch @gol
+ -mtpf-trace -mno-tpf-trace -mfused-madd -mno-fused-madd @gol
+@@ -11585,6 +11585,14 @@ functions in @file{libgcc.a} will be use
+ operations. When @option{-mhard-float} is specified, the compiler
+ generates IEEE floating-point instructions. This is the default.
+
++ at item -mlong-double-64
++ at itemx -mlong-double-128
++ at opindex mlong-double-64
++ at opindex mlong-double-128
++These switches control the size of @code{long double} type. A size
++of 64bit makes the @code{long double} type equivalent to the @code{double}
++type. This is the default.
++
+ @item -mbackchain
+ @itemx -mno-backchain
+ @opindex mbackchain
Index: gcc41.spec
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/gcc41.spec,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- gcc41.spec 28 Jan 2006 10:00:01 -0000 1.19
+++ gcc41.spec 31 Jan 2006 14:29:33 -0000 1.20
@@ -1,6 +1,6 @@
-%define DATE 20060128
+%define DATE 20060131
%define gcc_version 4.1.0
-%define gcc_release 0.17
+%define gcc_release 0.18
%define _unpackaged_files_terminate_build 0
%define multilib_64_archs sparc64 ppc64 s390x x86_64
%ifarch %{ix86} x86_64 ia64
@@ -96,6 +96,9 @@
Patch15: gcc41-ppc32-retaddr.patch
Patch16: gcc41-ppc32-ldbl.patch
Patch17: gcc41-s390-ldbl.patch
+Patch18: gcc41-x86-mtune-generic1.patch
+Patch19: gcc41-x86-mtune-generic2.patch
+Patch20: gcc41-x86-mtune-generic3.patch
%define _gnu %{nil}
%ifarch sparc
@@ -443,6 +446,9 @@
%patch15 -p0 -b .ppc32-retaddr~
%patch16 -p0 -b .ppc32-ldbl~
%patch17 -p0 -b .s390-ldbl~
+%patch18 -p0 -b .x86-mtune-generic1~
+%patch19 -p0 -b .x86-mtune-generic2~
+%patch20 -p0 -b .x86-mtune-generic3~
sed -i -e 's/4\.1\.0/4.1.0/' gcc/BASE-VER gcc/version.c
sed -i -e 's/" (Red Hat[^)]*)"/" (Red Hat %{version}-%{gcc_release})"/' gcc/version.c
@@ -547,6 +553,9 @@
%ifarch ppc
--host=%{gcc_target_platform} --build=%{gcc_target_platform} --target=%{gcc_target_platform} --with-cpu=default32
%endif
+%ifarch %{ix86} x86_64
+ --with-cpu=generic \
+%endif
%ifnarch sparc ppc
--host=%{gcc_target_platform}
%endif
@@ -1528,6 +1537,20 @@
%endif
%changelog
+* Tue Jan 31 2006 Jakub Jelinek <jakub at redhat.com> 4.1.0-0.18
+- update from gcc-4_1-branch (-r110317:110433)
+ - PRs c++/25855, c++/25999, fortran/17911, fortran/18578, fortran/18579,
+ fortran/20857, fortran/20885, fortran/20895, fortran/25030,
+ fortran/25835, fortran/25951, java/21428, libgfortran/25835,
+ target/14798, target/25706, target/25718, target/25947,
+ target/26018, testsuite/25318
+- add -mtune=generic support for i?86 and x86_64 (Jan Hubicka, H.J. Lu,
+ Evandro Menezes)
+- use -mtune=generic by default if neither -march= nor -mtune= is specified
+ on command line on i?86 or x86_64
+- updated s390{,x} long double patch, fixing ICEs on s390x glibc build
+ (Andreas Krebbel, Ulrich Weigand)
+
* Sat Jan 28 2006 Jakub Jelinek <jakub at redhat.com> 4.1.0-0.17
- update from gcc-4_1-branch (-r110062:110317)
- PRs ada/20548, ada/21317, bootstrap/25859, c++/25552, c++/25856,
Index: sources
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/sources,v
retrieving revision 1.130
retrieving revision 1.131
diff -u -r1.130 -r1.131
--- sources 28 Jan 2006 10:00:01 -0000 1.130
+++ sources 31 Jan 2006 14:29:33 -0000 1.131
@@ -1 +1 @@
-dc997a74f17dccebeb7145a1849573b4 gcc-4.1.0-20060128.tar.bz2
+7a1b6cd7661918ede937ff3b157f2788 gcc-4.1.0-20060131.tar.bz2
More information about the fedora-cvs-commits
mailing list