rpms/gcc/devel gcc41-x86-mtune-generic1.patch, NONE, 1.1 gcc41-x86-mtune-generic2.patch, NONE, 1.1 gcc41-x86-mtune-generic3.patch, NONE, 1.1 .cvsignore, 1.128, 1.129 gcc41-atomic-builtins.patch, 1.1, 1.2 gcc41-s390-ldbl.patch, 1.2, 1.3 gcc41.spec, 1.19, 1.20 sources, 1.130, 1.131

Tue Jan 31 14:29:36 UTC 2006

Author: jakub

Update of /cvs/dist/rpms/gcc/devel
In directory cvs.devel.redhat.com:/tmp/cvs-serv17529

Modified Files:
	.cvsignore gcc41-atomic-builtins.patch gcc41-s390-ldbl.patch 
	gcc41.spec sources 
Added Files:
	gcc41-x86-mtune-generic1.patch gcc41-x86-mtune-generic2.patch 
	gcc41-x86-mtune-generic3.patch 
Log Message:
4.1.0-0.18


gcc41-x86-mtune-generic1.patch:
 0 files changed

--- NEW FILE gcc41-x86-mtune-generic1.patch ---
2006-01-19  Jan Hubicka  <jh at suse.cz>

	* i386.c (*_cost): Add COSTS_N_INSNS.
	(ix86_rtx_costs): Do not use COSTS_N_INSNS.

--- gcc/config/i386/i386.c	(revision 108997)
+++ gcc/config/i386/i386.c	(revision 109242)
@@ -65,15 +65,23 @@
 /* Processor costs (relative to an add) */
 static const
 struct processor_costs size_cost = {	/* costs for tunning for size */
-  2,					/* cost of an add instruction */
-  3,					/* cost of a lea instruction */
-  2,					/* variable shift costs */
-  3,					/* constant shift costs */
-  {3, 3, 3, 3, 5},			/* cost of starting a multiply */
+  COSTS_N_INSNS (2),			/* cost of an add instruction */
+  COSTS_N_INSNS (3),			/* cost of a lea instruction */
+  COSTS_N_INSNS (2),			/* variable shift costs */
+  COSTS_N_INSNS (3),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*                               HI */
+   COSTS_N_INSNS (3),			/*                               SI */
+   COSTS_N_INSNS (3),			/*                               DI */
+   COSTS_N_INSNS (5)},			/*                               other */
   0,					/* cost of multiply per each bit set */
-  {3, 3, 3, 3, 5},			/* cost of a divide/mod */
-  3,					/* cost of movsx */
-  3,					/* cost of movzx */
+  {COSTS_N_INSNS (3),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (3),			/*                          HI */
+   COSTS_N_INSNS (3),			/*                          SI */
+   COSTS_N_INSNS (3),			/*                          DI */
+   COSTS_N_INSNS (5)},			/*                          other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (3),			/* cost of movzx */
   0,					/* "large" insn */
   2,					/* MOVE_RATIO */
   2,					/* cost for loading QImode using movzbl */
@@ -99,26 +107,34 @@
   0,					/* size of prefetch block */
   0,					/* number of parallel prefetches */
   1,					/* Branch cost */
-  2,					/* cost of FADD and FSUB insns.  */
-  2,					/* cost of FMUL instruction.  */
-  2,					/* cost of FDIV instruction.  */
-  2,					/* cost of FABS instruction.  */
-  2,					/* cost of FCHS instruction.  */
-  2,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FSQRT instruction.  */
 };
 
 /* Processor costs (relative to an add) */
 static const
 struct processor_costs i386_cost = {	/* 386 specific costs */
-  1,					/* cost of an add instruction */
-  1,					/* cost of a lea instruction */
-  3,					/* variable shift costs */
-  2,					/* constant shift costs */
-  {6, 6, 6, 6, 6},			/* cost of starting a multiply */
-  1,					/* cost of multiply per each bit set */
-  {23, 23, 23, 23, 23},			/* cost of a divide/mod */
-  3,					/* cost of movsx */
-  2,					/* cost of movzx */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (3),			/* variable shift costs */
+  COSTS_N_INSNS (2),			/* constant shift costs */
+  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (6),			/*                               HI */
+   COSTS_N_INSNS (6),			/*                               SI */
+   COSTS_N_INSNS (6),			/*                               DI */
+   COSTS_N_INSNS (6)},			/*                               other */
+  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (23),			/*                          HI */
+   COSTS_N_INSNS (23),			/*                          SI */
+   COSTS_N_INSNS (23),			/*                          DI */
+   COSTS_N_INSNS (23)},			/*                          other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
   15,					/* "large" insn */
   3,					/* MOVE_RATIO */
   4,					/* cost for loading QImode using movzbl */
@@ -144,25 +160,33 @@
   0,					/* size of prefetch block */
   0,					/* number of parallel prefetches */
   1,					/* Branch cost */
-  23,					/* cost of FADD and FSUB insns.  */
-  27,					/* cost of FMUL instruction.  */
-  88,					/* cost of FDIV instruction.  */
-  22,					/* cost of FABS instruction.  */
-  24,					/* cost of FCHS instruction.  */
-  122,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
 };
 
 static const
 struct processor_costs i486_cost = {	/* 486 specific costs */
-  1,					/* cost of an add instruction */
-  1,					/* cost of a lea instruction */
-  3,					/* variable shift costs */
-  2,					/* constant shift costs */
-  {12, 12, 12, 12, 12},			/* cost of starting a multiply */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (3),			/* variable shift costs */
+  COSTS_N_INSNS (2),			/* constant shift costs */
+  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (12),			/*                               HI */
+   COSTS_N_INSNS (12),			/*                               SI */
+   COSTS_N_INSNS (12),			/*                               DI */
+   COSTS_N_INSNS (12)},			/*                               other */
   1,					/* cost of multiply per each bit set */
-  {40, 40, 40, 40, 40},			/* cost of a divide/mod */
-  3,					/* cost of movsx */
-  2,					/* cost of movzx */
+  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (40),			/*                          HI */
+   COSTS_N_INSNS (40),			/*                          SI */
+   COSTS_N_INSNS (40),			/*                          DI */
+   COSTS_N_INSNS (40)},			/*                          other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
   15,					/* "large" insn */
   3,					/* MOVE_RATIO */
   4,					/* cost for loading QImode using movzbl */
@@ -188,25 +212,33 @@
   0,					/* size of prefetch block */
   0,					/* number of parallel prefetches */
   1,					/* Branch cost */
-  8,					/* cost of FADD and FSUB insns.  */
-  16,					/* cost of FMUL instruction.  */
-  73,					/* cost of FDIV instruction.  */
-  3,					/* cost of FABS instruction.  */
-  3,					/* cost of FCHS instruction.  */
-  83,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
 };
 
 static const
 struct processor_costs pentium_cost = {
-  1,					/* cost of an add instruction */
-  1,					/* cost of a lea instruction */
-  4,					/* variable shift costs */
-  1,					/* constant shift costs */
-  {11, 11, 11, 11, 11},			/* cost of starting a multiply */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (4),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (11),			/*                               HI */
+   COSTS_N_INSNS (11),			/*                               SI */
+   COSTS_N_INSNS (11),			/*                               DI */
+   COSTS_N_INSNS (11)},			/*                               other */
   0,					/* cost of multiply per each bit set */
-  {25, 25, 25, 25, 25},			/* cost of a divide/mod */
-  3,					/* cost of movsx */
-  2,					/* cost of movzx */
+  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (25),			/*                          HI */
+   COSTS_N_INSNS (25),			/*                          SI */
+   COSTS_N_INSNS (25),			/*                          DI */
+   COSTS_N_INSNS (25)},			/*                          other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
   8,					/* "large" insn */
   6,					/* MOVE_RATIO */
   6,					/* cost for loading QImode using movzbl */
@@ -232,25 +264,33 @@
   0,					/* size of prefetch block */
   0,					/* number of parallel prefetches */
   2,					/* Branch cost */
-  3,					/* cost of FADD and FSUB insns.  */
-  3,					/* cost of FMUL instruction.  */
-  39,					/* cost of FDIV instruction.  */
-  1,					/* cost of FABS instruction.  */
-  1,					/* cost of FCHS instruction.  */
-  70,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
 };
 
 static const
 struct processor_costs pentiumpro_cost = {
-  1,					/* cost of an add instruction */
-  1,					/* cost of a lea instruction */
-  1,					/* variable shift costs */
-  1,					/* constant shift costs */
-  {4, 4, 4, 4, 4},			/* cost of starting a multiply */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*                               HI */
+   COSTS_N_INSNS (4),			/*                               SI */
+   COSTS_N_INSNS (4),			/*                               DI */
+   COSTS_N_INSNS (4)},			/*                               other */
   0,					/* cost of multiply per each bit set */
-  {17, 17, 17, 17, 17},			/* cost of a divide/mod */
-  1,					/* cost of movsx */
-  1,					/* cost of movzx */
+  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (17),			/*                          HI */
+   COSTS_N_INSNS (17),			/*                          SI */
+   COSTS_N_INSNS (17),			/*                          DI */
+   COSTS_N_INSNS (17)},			/*                          other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
   8,					/* "large" insn */
   6,					/* MOVE_RATIO */
   2,					/* cost for loading QImode using movzbl */
@@ -276,25 +316,33 @@
   32,					/* size of prefetch block */
   6,					/* number of parallel prefetches */
   2,					/* Branch cost */
-  3,					/* cost of FADD and FSUB insns.  */
-  5,					/* cost of FMUL instruction.  */
-  56,					/* cost of FDIV instruction.  */
-  2,					/* cost of FABS instruction.  */
-  2,					/* cost of FCHS instruction.  */
-  56,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
 };
 
 static const
 struct processor_costs k6_cost = {
-  1,					/* cost of an add instruction */
-  2,					/* cost of a lea instruction */
-  1,					/* variable shift costs */
-  1,					/* constant shift costs */
-  {3, 3, 3, 3, 3},			/* cost of starting a multiply */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*                               HI */
+   COSTS_N_INSNS (3),			/*                               SI */
+   COSTS_N_INSNS (3),			/*                               DI */
+   COSTS_N_INSNS (3)},			/*                               other */
   0,					/* cost of multiply per each bit set */
-  {18, 18, 18, 18, 18},			/* cost of a divide/mod */
-  2,					/* cost of movsx */
-  2,					/* cost of movzx */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (18),			/*                          HI */
+   COSTS_N_INSNS (18),			/*                          SI */
+   COSTS_N_INSNS (18),			/*                          DI */
+   COSTS_N_INSNS (18)},			/*                          other */
+  COSTS_N_INSNS (2),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
   8,					/* "large" insn */
   4,					/* MOVE_RATIO */
   3,					/* cost for loading QImode using movzbl */
@@ -320,25 +368,33 @@
   32,					/* size of prefetch block */
   1,					/* number of parallel prefetches */
   1,					/* Branch cost */
-  2,					/* cost of FADD and FSUB insns.  */
-  2,					/* cost of FMUL instruction.  */
-  56,					/* cost of FDIV instruction.  */
-  2,					/* cost of FABS instruction.  */
-  2,					/* cost of FCHS instruction.  */
-  56,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
 };
 
 static const
 struct processor_costs athlon_cost = {
-  1,					/* cost of an add instruction */
-  2,					/* cost of a lea instruction */
-  1,					/* variable shift costs */
-  1,					/* constant shift costs */
-  {5, 5, 5, 5, 5},			/* cost of starting a multiply */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (5),			/*                               HI */
+   COSTS_N_INSNS (5),			/*                               SI */
+   COSTS_N_INSNS (5),			/*                               DI */
+   COSTS_N_INSNS (5)},			/*                               other */
   0,					/* cost of multiply per each bit set */
-  {18, 26, 42, 74, 74},			/* cost of a divide/mod */
-  1,					/* cost of movsx */
-  1,					/* cost of movzx */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*                          HI */
+   COSTS_N_INSNS (42),			/*                          SI */
+   COSTS_N_INSNS (74),			/*                          DI */
+   COSTS_N_INSNS (74)},			/*                          other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
   8,					/* "large" insn */
   9,					/* MOVE_RATIO */
   4,					/* cost for loading QImode using movzbl */
@@ -364,25 +420,33 @@
   64,					/* size of prefetch block */
   6,					/* number of parallel prefetches */
   5,					/* Branch cost */
-  4,					/* cost of FADD and FSUB insns.  */
-  4,					/* cost of FMUL instruction.  */
-  24,					/* cost of FDIV instruction.  */
-  2,					/* cost of FABS instruction.  */
-  2,					/* cost of FCHS instruction.  */
-  35,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
 };
 
 static const
 struct processor_costs k8_cost = {
-  1,					/* cost of an add instruction */
-  2,					/* cost of a lea instruction */
-  1,					/* variable shift costs */
-  1,					/* constant shift costs */
-  {3, 4, 3, 4, 5},			/* cost of starting a multiply */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*                               HI */
+   COSTS_N_INSNS (3),			/*                               SI */
+   COSTS_N_INSNS (4),			/*                               DI */
+   COSTS_N_INSNS (5)},			/*                               other */
   0,					/* cost of multiply per each bit set */
-  {18, 26, 42, 74, 74},			/* cost of a divide/mod */
-  1,					/* cost of movsx */
-  1,					/* cost of movzx */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*                          HI */
+   COSTS_N_INSNS (42),			/*                          SI */
+   COSTS_N_INSNS (74),			/*                          DI */
+   COSTS_N_INSNS (74)},			/*                          other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
   8,					/* "large" insn */
   9,					/* MOVE_RATIO */
   4,					/* cost for loading QImode using movzbl */
@@ -408,25 +472,33 @@
   64,					/* size of prefetch block */
   6,					/* number of parallel prefetches */
   5,					/* Branch cost */
-  4,					/* cost of FADD and FSUB insns.  */
-  4,					/* cost of FMUL instruction.  */
-  19,					/* cost of FDIV instruction.  */
-  2,					/* cost of FABS instruction.  */
-  2,					/* cost of FCHS instruction.  */
-  35,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
 };
 
 static const
 struct processor_costs pentium4_cost = {
-  1,					/* cost of an add instruction */
-  3,					/* cost of a lea instruction */
-  4,					/* variable shift costs */
-  4,					/* constant shift costs */
-  {15, 15, 15, 15, 15},			/* cost of starting a multiply */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (3),			/* cost of a lea instruction */
+  COSTS_N_INSNS (4),			/* variable shift costs */
+  COSTS_N_INSNS (4),			/* constant shift costs */
+  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (15),			/*                               HI */
+   COSTS_N_INSNS (15),			/*                               SI */
+   COSTS_N_INSNS (15),			/*                               DI */
+   COSTS_N_INSNS (15)},			/*                               other */
   0,					/* cost of multiply per each bit set */
-  {56, 56, 56, 56, 56},			/* cost of a divide/mod */
-  1,					/* cost of movsx */
-  1,					/* cost of movzx */
+  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (56),			/*                          HI */
+   COSTS_N_INSNS (56),			/*                          SI */
+   COSTS_N_INSNS (56),			/*                          DI */
+   COSTS_N_INSNS (56)},			/*                          other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
   16,					/* "large" insn */
   6,					/* MOVE_RATIO */
   2,					/* cost for loading QImode using movzbl */
@@ -452,25 +524,33 @@
   64,					/* size of prefetch block */
   6,					/* number of parallel prefetches */
   2,					/* Branch cost */
-  5,					/* cost of FADD and FSUB insns.  */
-  7,					/* cost of FMUL instruction.  */
-  43,					/* cost of FDIV instruction.  */
-  2,					/* cost of FABS instruction.  */
-  2,					/* cost of FCHS instruction.  */
-  43,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
 };
 
 static const
 struct processor_costs nocona_cost = {
-  1,					/* cost of an add instruction */
-  1,					/* cost of a lea instruction */
-  1,					/* variable shift costs */
-  1,					/* constant shift costs */
-  {10, 10, 10, 10, 10},			/* cost of starting a multiply */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (10),			/*                               HI */
+   COSTS_N_INSNS (10),			/*                               SI */
+   COSTS_N_INSNS (10),			/*                               DI */
+   COSTS_N_INSNS (10)},			/*                               other */
   0,					/* cost of multiply per each bit set */
-  {66, 66, 66, 66, 66},			/* cost of a divide/mod */
-  1,					/* cost of movsx */
-  1,					/* cost of movzx */
+  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (66),			/*                          HI */
+   COSTS_N_INSNS (66),			/*                          SI */
+   COSTS_N_INSNS (66),			/*                          DI */
+   COSTS_N_INSNS (66)},			/*                          other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
   16,					/* "large" insn */
   17,					/* MOVE_RATIO */
   4,					/* cost for loading QImode using movzbl */
@@ -496,12 +576,12 @@
   128,					/* size of prefetch block */
   8,					/* number of parallel prefetches */
   1,					/* Branch cost */
-  6,					/* cost of FADD and FSUB insns.  */
-  8,					/* cost of FMUL instruction.  */
-  40,					/* cost of FDIV instruction.  */
-  3,					/* cost of FABS instruction.  */
-  3,					/* cost of FCHS instruction.  */
-  44,					/* cost of FSQRT instruction.  */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
 };
 
 const struct processor_costs *ix86_cost = &pentium_cost;
@@ -16320,13 +16400,13 @@
 	  && GET_MODE (XEXP (x, 0)) == SImode)
 	*total = 1;
       else if (TARGET_ZERO_EXTEND_WITH_AND)
-	*total = COSTS_N_INSNS (ix86_cost->add);
+	*total = ix86_cost->add;
       else
-	*total = COSTS_N_INSNS (ix86_cost->movzx);
+	*total = ix86_cost->movzx;
       return false;
 
     case SIGN_EXTEND:
-      *total = COSTS_N_INSNS (ix86_cost->movsx);
+      *total = ix86_cost->movsx;
       return false;
 
     case ASHIFT:
@@ -16336,13 +16416,14 @@
 	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
 	  if (value == 1)
 	    {
-	      *total = COSTS_N_INSNS (ix86_cost->add);
+	      *total = ix86_cost->add;
 	      return false;
 	    }
 	  if ((value == 2 || value == 3)
+	      && !TARGET_DECOMPOSE_LEA
 	      && ix86_cost->lea <= ix86_cost->shift_const)
 	    {
-	      *total = COSTS_N_INSNS (ix86_cost->lea);
+	      *total = ix86_cost->lea;
 	      return false;
 	    }
 	}
@@ -16357,31 +16438,31 @@
 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
 	    {
 	      if (INTVAL (XEXP (x, 1)) > 32)
-		*total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
+		*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
 	      else
-		*total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
+		*total = ix86_cost->shift_const * 2;
 	    }
 	  else
 	    {
 	      if (GET_CODE (XEXP (x, 1)) == AND)
-		*total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
+		*total = ix86_cost->shift_var * 2;
 	      else
-		*total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
+		*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
 	    }
 	}
       else
 	{
 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
-	    *total = COSTS_N_INSNS (ix86_cost->shift_const);
+	    *total = ix86_cost->shift_const;
 	  else
-	    *total = COSTS_N_INSNS (ix86_cost->shift_var);
+	    *total = ix86_cost->shift_var;
 	}
       return false;
 
     case MULT:
       if (FLOAT_MODE_P (mode))
 	{
-	  *total = COSTS_N_INSNS (ix86_cost->fmul);
+	  *total = ix86_cost->fmul;
 	  return false;
 	}
       else
@@ -16422,9 +16503,9 @@
 	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
 	    }
 
-  	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
-			          + nbits * ix86_cost->mult_bit)
-	           + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
+  	  *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
+		    + nbits * ix86_cost->mult_bit
+	            + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
 
           return true;
 	}
@@ -16434,14 +16515,14 @@
     case MOD:
     case UMOD:
       if (FLOAT_MODE_P (mode))
-	*total = COSTS_N_INSNS (ix86_cost->fdiv);
+	*total = ix86_cost->fdiv;
       else
-	*total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
+	*total = ix86_cost->divide[MODE_INDEX (mode)];
       return false;
 
     case PLUS:
       if (FLOAT_MODE_P (mode))
-	*total = COSTS_N_INSNS (ix86_cost->fadd);
+	*total = ix86_cost->fadd;
       else if (GET_MODE_CLASS (mode) == MODE_INT
 	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
 	{
@@ -16453,7 +16534,7 @@
 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
 	      if (val == 2 || val == 4 || val == 8)
 		{
-		  *total = COSTS_N_INSNS (ix86_cost->lea);
+		  *total = ix86_cost->lea;
 		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
 		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
 				      outer_code);
@@ -16467,7 +16548,7 @@
 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
 	      if (val == 2 || val == 4 || val == 8)
 		{
-		  *total = COSTS_N_INSNS (ix86_cost->lea);
+		  *total = ix86_cost->lea;
 		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
 		  *total += rtx_cost (XEXP (x, 1), outer_code);
 		  return true;
@@ -16475,7 +16556,7 @@
 	    }
 	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
 	    {
-	      *total = COSTS_N_INSNS (ix86_cost->lea);
+	      *total = ix86_cost->lea;
 	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
 	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
 	      *total += rtx_cost (XEXP (x, 1), outer_code);
@@ -16487,7 +16568,7 @@
     case MINUS:
       if (FLOAT_MODE_P (mode))
 	{
-	  *total = COSTS_N_INSNS (ix86_cost->fadd);
+	  *total = ix86_cost->fadd;
 	  return false;
 	}
       /* FALLTHRU */
@@ -16497,7 +16578,7 @@
     case XOR:
       if (!TARGET_64BIT && mode == DImode)
 	{
-	  *total = (COSTS_N_INSNS (ix86_cost->add) * 2
+	  *total = (ix86_cost->add * 2
 		    + (rtx_cost (XEXP (x, 0), outer_code)
 		       << (GET_MODE (XEXP (x, 0)) != DImode))
 		    + (rtx_cost (XEXP (x, 1), outer_code)
@@ -16509,16 +16590,16 @@
     case NEG:
       if (FLOAT_MODE_P (mode))
 	{
-	  *total = COSTS_N_INSNS (ix86_cost->fchs);
+	  *total = ix86_cost->fchs;
 	  return false;
 	}
       /* FALLTHRU */
 
     case NOT:
       if (!TARGET_64BIT && mode == DImode)
-	*total = COSTS_N_INSNS (ix86_cost->add * 2);
+	*total = ix86_cost->add * 2;
       else
-	*total = COSTS_N_INSNS (ix86_cost->add);
+	*total = ix86_cost->add;
       return false;
 
     case COMPARE:
@@ -16529,7 +16610,7 @@
 	{
 	  /* This kind of construct is implemented using test[bwl].
 	     Treat it as if we had an AND.  */
-	  *total = (COSTS_N_INSNS (ix86_cost->add)
+	  *total = (ix86_cost->add
 		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
 		    + rtx_cost (const1_rtx, outer_code));
 	  return true;
@@ -16545,12 +16626,12 @@
 
     case ABS:
       if (FLOAT_MODE_P (mode))
-	*total = COSTS_N_INSNS (ix86_cost->fabs);
+	*total = ix86_cost->fabs;
       return false;
 
     case SQRT:
       if (FLOAT_MODE_P (mode))
-	*total = COSTS_N_INSNS (ix86_cost->fsqrt);
+	*total = ix86_cost->fsqrt;
       return false;
 
     case UNSPEC:

gcc41-x86-mtune-generic2.patch:
 0 files changed

--- NEW FILE gcc41-x86-mtune-generic2.patch ---
2006-01-19  Jan Hubicka  <jh at suse.cz>

	* i386.h (TARGET_DECOMPOSE_LEA): Kill.
	* i386.c (x86_decompose_lea): Kill.
	(ix86_rtx_costs): Kill.

--- gcc/config/i386/i386.h	(revision 109569)
+++ gcc/config/i386/i386.h	(revision 109581)
@@ -209,7 +209,6 @@
 #define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & TUNEMASK)
 #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK)
 #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK)
-#define TARGET_DECOMPOSE_LEA (x86_decompose_lea & TUNEMASK)
 #define TARGET_PREFETCH_SSE (x86_prefetch_sse)
 #define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
 #define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
--- gcc/config/i386/i386.c	(revision 109569)
+++ gcc/config/i386/i386.c	(revision 109581)
@@ -640,7 +640,6 @@
 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
-const int x86_decompose_lea = m_PENT4 | m_NOCONA;
 const int x86_shift1 = ~m_486;
 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
@@ -16446,7 +16445,6 @@
 	      return false;
 	    }
 	  if ((value == 2 || value == 3)
-	      && !TARGET_DECOMPOSE_LEA
 	      && ix86_cost->lea <= ix86_cost->shift_const)
 	    {
 	      *total = ix86_cost->lea;

gcc41-x86-mtune-generic3.patch:
 config.gcc                      |   36 ++++-
 config/i386/athlon.md           |  195 +++++++++++++++---------------
 config/i386/i386.c              |  254 ++++++++++++++++++++++++++++++++++------
 config/i386/i386.h              |   19 ++
 config/i386/i386.md             |   26 ++--
 config/i386/ppro.md             |  172 +++++++++++++--------------
 config/i386/predicates.md       |    7 -
 doc/invoke.texi                 |   22 +++
 testsuite/gcc.target/i386/lea.c |    2 
 9 files changed, 493 insertions(+), 240 deletions(-)

--- NEW FILE gcc41-x86-mtune-generic3.patch ---
gcc/

2006-01-19  Jan Hubicka  <jh at suse.cz>
            H.J. Lu  <hongjiu.lu at intel.com>
	    Evandro Menezes <evandro.menezes at amd.com>

	* invoke.texi (generic): Document
	(i686) Update.
	* config.gcc: Make x86_64-* and i686-* default to generic tunning.
	* i386.h (TARGET_GENERIC32, TARGET_GENERIC64, TARGET_GENERIC,
	TARGET_USE_INCDEC, TARGET_PAD_RETURNS): New macros.
	(x86_use_incdec, x86_pad_returns): New variables
	(TARGET_CPU_DEFAULT_generic): New constant
	(TARGET_CPU_DEFAULT_NAMES): Add generic.
	(enum processor_type): Add generic32 and generic64.
	* i386.md (cpu attribute): Add generic32/generic64
	(movhi splitter): Behave sanely when both partial_reg_dependency and
	partial_reg_stall are set.
	(K8 splitters): Enable for generic as well.
	* predicates.md (incdec_operand): Use TARGET_INCDEC
	(aligned_operand): Avoid memory mismatch stalls.
	* athlon.md: Enable for generic64, new patterns for 128bit moves.
	* ppro.md: Enable for generic32
	* i386.c (generic64_cost, generic32_cost): New.
	(m_GENERIC32, m_GENERIC64, m_GENERIC): New macros.
	(x86_use_leave): Enable for generic64.  (x86_use_sahf,
	x86_ext_80387_constants): Enable for generic32.  (x86_push_memory,
	x86_movx, x86_unroll_strlen, x86_deep_branch, x86_use_simode_fiop,
	x86_use_cltd, x86_promote_QImode, x86_sub_esp_4, x86_sub_esp_8,
	x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves,
	x86_partial_reg_dependency, x86_memory_mismatch_stall,
	x86_accumulate_outgoing_args, x86_prologue_using_move,
	x86_epilogue_using_move, x86_arch_always_fancy_math_387,
	x86_sse_partial_reg_dependency, x86_four_jump_limit, x86_schedule):
	Enable for generic.
	(x86_use_incdec, x86_pad_returns): New.
	(override_options): Add generic32 and generic64, translate "generic"
	to generic32/generic64 and "i686" to "generic32", refuse
	"generic32"/"generic64" as arch target.
	(ix86_issue_rate, ix86_adjust_cost): Handle generic as athlon.
	(ix86_reorg): Honor PAD_RETURNS.

gcc/testsuite/

2006-01-19  Jan Hubicka  <jh at suse.cz>

	* gcc.target/i386/lea.c: Test pentiumpro, not i686.

--- gcc/config.gcc.jj	2006-01-19 06:45:43.000000000 -0800
+++ gcc/config.gcc	2006-01-19 10:07:57.000000000 -0800
@@ -2370,6 +2370,9 @@ if test x$with_cpu = x ; then
       # A Cirrus ARM variant.
       with_cpu="ep9312"
       ;;
+    i386-*-*)
+      with_cpu=i386
+      ;;
     i486-*-*)
       with_cpu=i486
       ;;
@@ -2421,13 +2424,26 @@ if test x$with_cpu = x ; then
         pentium_m-*)
           with_cpu=pentium-m
           ;;
-        *)
+        pentiumpro-*)
           with_cpu=pentiumpro
           ;;
+        *)
+          with_cpu=generic
+          ;;
       esac
       ;;
     x86_64-*-*)
-      with_cpu=k8
+      case ${target_noncanonical} in
+        k8-*|opteron-*|athlon_64-*)
+          with_cpu=k8
+          ;;
+        nocona-*)
+          with_cpu=nocona
+          ;;
+        *)
+          with_cpu=generic
+          ;;
+      esac
       ;;
     alphaev6[78]*-*-*)
       with_cpu=ev67
@@ -2633,13 +2649,21 @@ case "${target}" in
 		for which in arch cpu tune; do
 			eval "val=\$with_$which"
 			case ${val} in
-			"" | i386 | i486 \
+			i386 | i486 \
 			| i586 | pentium | pentium-mmx | winchip-c6 | winchip2 \
 			| c3 | c3-2 | i686 | pentiumpro | pentium2 | pentium3 \
 			| pentium4 | k6 | k6-2 | k6-3 | athlon | athlon-tbird \
-			| athlon-4 | athlon-xp | athlon-mp | k8 | opteron \
-			| athlon64 | athlon-fx | prescott | pentium-m \
-			| pentium4m | pentium3m| nocona)
+			| athlon-4 | athlon-xp | athlon-mp \
+			| prescott | pentium-m | pentium4m | pentium3m)
+				case "${target}" in
+				  x86_64-*-*)
+				      echo "CPU given in --with-$which=$val doesn't support 64bit mode." 1>&2
+				      exit 1
+				      ;;
+				esac
+				# OK
+				;;
+			"" | k8 | opteron | athlon64 | athlon-fx | nocona | generic)
 				# OK
 				;;
 			*)
--- gcc/config/i386/athlon.md.jj	2006-01-28 12:50:29.000000000 +0100
+++ gcc/config/i386/athlon.md	2006-01-28 13:17:22.000000000 +0100
@@ -123,7 +123,7 @@
 (define_cpu_unit "athlon-fmul" "athlon_fp")
 (define_cpu_unit "athlon-fstore" "athlon_fp")
 (define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
-(define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)")
+(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
 
 ;; Vector operations usually consume many of pipes.
 (define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
@@ -131,26 +131,26 @@
 
 ;; Jump instructions are executed in the branch unit completely transparent to us
 (define_insn_reservation "athlon_branch" 0
-			 (and (eq_attr "cpu" "athlon,k8")
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (eq_attr "type" "ibr"))
 			 "athlon-direct,athlon-ieu")
 (define_insn_reservation "athlon_call" 0
-			 (and (eq_attr "cpu" "athlon,k8")
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (eq_attr "type" "call,callv"))
 			 "athlon-vector,athlon-ieu")
 
 ;; Latency of push operation is 3 cycles, but ESP value is available
 ;; earlier
 (define_insn_reservation "athlon_push" 2
-			 (and (eq_attr "cpu" "athlon,k8")
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (eq_attr "type" "push"))
 			 "athlon-direct,athlon-agu,athlon-store")
 (define_insn_reservation "athlon_pop" 4
-			 (and (eq_attr "cpu" "athlon,k8")
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (eq_attr "type" "pop"))
 			 "athlon-vector,athlon-load,athlon-ieu")
 (define_insn_reservation "athlon_pop_k8" 3
-			 (and (eq_attr "cpu" "k8")
+			 (and (eq_attr "cpu" "k8,generic64")
 			      (eq_attr "type" "pop"))
 			 "athlon-double,(athlon-ieu+athlon-load)")
 (define_insn_reservation "athlon_leave" 3
@@ -158,13 +158,13 @@
 			      (eq_attr "type" "leave"))
 			 "athlon-vector,(athlon-ieu+athlon-load)")
 (define_insn_reservation "athlon_leave_k8" 3
-			 (and (eq_attr "cpu" "k8")
+			 (and (eq_attr "cpu" "k8,generic64")
 			      (eq_attr "type" "leave"))
 			 "athlon-double,(athlon-ieu+athlon-load)")
 
 ;; Lea executes in AGU unit with 2 cycles latency.
 (define_insn_reservation "athlon_lea" 2
-			 (and (eq_attr "cpu" "athlon,k8")
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (eq_attr "type" "lea"))
 			 "athlon-direct,athlon-agu,nothing")
 
@@ -176,13 +176,13 @@
 			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
 ;; ??? Widening multiply is vector or double.
 (define_insn_reservation "athlon_imul_k8_DI" 4
-			 (and (eq_attr "cpu" "k8")
+			 (and (eq_attr "cpu" "k8,generic64")
 			      (and (eq_attr "type" "imul")
 				   (and (eq_attr "mode" "DI")
 					(eq_attr "memory" "none,unknown"))))
 			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
 (define_insn_reservation "athlon_imul_k8" 3
-			 (and (eq_attr "cpu" "k8")
+			 (and (eq_attr "cpu" "k8,generic64")
 			      (and (eq_attr "type" "imul")
 				   (eq_attr "memory" "none,unknown")))
 			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
@@ -192,13 +192,13 @@
 				   (eq_attr "memory" "load,both")))
 			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
 (define_insn_reservation "athlon_imul_mem_k8_DI" 7
-			 (and (eq_attr "cpu" "k8")
+			 (and (eq_attr "cpu" "k8,generic64")
 			      (and (eq_attr "type" "imul")
 				   (and (eq_attr "mode" "DI")
 					(eq_attr "memory" "load,both"))))
[...1715 lines suppressed...]
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssecmp"))))
 			 "decoder0,(p2+p1)*2")
 
 (define_insn_reservation "ppro_sse_cvt_V4SF" 3
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "none,unknown")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssecvt"))))
 			 "decoder0,p1*2")
 
 (define_insn_reservation "ppro_sse_cvt_V4SF_other" 4
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "!none,unknown")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssecmp"))))
 			 "decoder0,p1,p4+p3")
 
 (define_insn_reservation "ppro_sse_mul_V4SF" 5
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssemul"))))
 			"decoder0,p0*2")
 
 (define_insn_reservation "ppro_sse_mul_V4SF_load" 5
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssemul"))))
@@ -679,49 +679,49 @@
 
 ;; FIXME: p0 really closed this long???
 (define_insn_reservation "ppro_sse_div_V4SF" 48
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssediv"))))
 			 "decoder0,p0*34")
 
 (define_insn_reservation "ppro_sse_div_V4SF_load" 48
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssediv"))))
 			 "decoder0,(p2+p0)*2,p0*32")
 
 (define_insn_reservation "ppro_sse_log_V4SF" 2
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "sselog,sselog1"))))
 			 "decodern,p1")
 
 (define_insn_reservation "ppro_sse_log_V4SF_load" 2
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "sselog,sselog1"))))
 			 "decoder0,(p2+p1)")
 
 (define_insn_reservation "ppro_sse_mov_V4SF" 1
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssemov"))))
 			 "decoder0,(p0|p1)*2")
 
 (define_insn_reservation "ppro_sse_mov_V4SF_load" 2
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssemov"))))
 			 "decoder0,p2*2")
 
 (define_insn_reservation "ppro_sse_mov_V4SF_store" 3
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "store")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssemov"))))
@@ -735,7 +735,7 @@
 ;; reg-reg instructions produce 1 uop so they can be decoded on any of
 ;; the three decoders.
 (define_insn_reservation "ppro_insn" 1
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "none,unknown")
 				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
 			 "decodern,(p0|p1)")
@@ -743,13 +743,13 @@
 ;; read-modify and register-memory instructions have 2 or three uops,
 ;; so they have to be decoded on decoder0.
 (define_insn_reservation "ppro_insn_load" 3
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "load")
 				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
 			 "decoder0,p2+(p0|p1)")
 
 (define_insn_reservation "ppro_insn_store" 1
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "store")
 				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
 			 "decoder0,(p0|p1),p4+p3")
@@ -757,7 +757,7 @@
 ;; read-modify-store instructions produce 4 uops so they have to be
 ;; decoded on decoder0 as well.
 (define_insn_reservation "ppro_insn_both" 4
-			 (and (eq_attr "cpu" "pentiumpro")
+			 (and (eq_attr "cpu" "pentiumpro,generic32")
 			      (and (eq_attr "memory" "both")
 				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
 			 "decoder0,p2+(p0|p1),p4+p3")
--- gcc/config/i386/predicates.md.jj	2006-01-28 12:50:29.000000000 +0100
+++ gcc/config/i386/predicates.md	2006-01-28 13:17:22.000000000 +0100
@@ -614,7 +614,7 @@
 {
   /* On Pentium4, the inc and dec operations causes extra dependency on flag
      registers, since carry flag is not set.  */
-  if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
+  if (!TARGET_USE_INCDEC && !optimize_size)
     return 0;
   return op == const1_rtx || op == constm1_rtx;
 })
@@ -693,6 +693,11 @@
   if (GET_CODE (op) != MEM)
     return 1;
 
+  /* All patterns using aligned_operand on memory operands ends up
+     in promoting memory operand to 64bit and thus causing memory mismatch.  */
+  if (TARGET_MEMORY_MISMATCH_STALL && !optimize_size)
+    return 0;
+
   /* Don't even try to do any aligned optimizations with volatiles.  */
   if (MEM_VOLATILE_P (op))
     return 0;
--- gcc/doc/invoke.texi.jj	2005-12-17 16:38:57.000000000 -0800
+++ gcc/doc/invoke.texi	2006-01-19 10:07:57.000000000 -0800
@@ -8993,6 +8993,23 @@ Tune to @var{cpu-type} everything applic
 for the ABI and the set of available instructions.  The choices for
 @var{cpu-type} are:
 @table @emph
+ at item generic
+Produce code optimized for the most common IA32/AMD64/EM64T processors.
+If you know the CPU on which your code will run, then you should use
+the corresponding @option{-mtune} option instead of
+ at option{-mtune=generic}.  But, if you do not know exactly what CPU users
+of your application will have, then you should use this option.
+
+As new processors are deployed in the marketplace, the behavior of this
+option will change.  Therefore, if you upgrade to a newer version of
+GCC, the code generated option will change to reflect the processors
+that were most common when that version of GCC was released.
+
+There is no @option{-march=generic} option because @option{-march}
+indicates the instruction set the compiler can use, and there is no
+generic instruction set applicable to all processors.  In contrast,
+ at option{-mtune} indicates the processor (or, in this case, collection of
+processors) for which the code is optimized.
 @item i386
 Original Intel's i386 CPU at .
 @item i486
@@ -9001,8 +9018,11 @@ Intel's i486 CPU at .  (No scheduling is im
 Intel Pentium CPU with no MMX support.
 @item pentium-mmx
 Intel PentiumMMX CPU based on Pentium core with MMX instruction set support.
- at item i686, pentiumpro
+ at item pentiumpro
 Intel PentiumPro CPU at .
+ at item i686
+Same as @code{generic}, but when used as @code{march} option, PentiumPro
+instruction set will be used, so the code will run on all i686 familly chips.
 @item pentium2
 Intel Pentium2 CPU based on PentiumPro core with MMX instruction set support.
 @item pentium3, pentium3m
--- gcc/testsuite/gcc.target/i386/lea.c.jj	2005-11-04 14:10:31.000000000 -0800
+++ gcc/testsuite/gcc.target/i386/lea.c	2006-01-19 13:10:18.000000000 -0800
@@ -1,6 +1,6 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
 /* { dg-require-effective-target ilp32 } */
-/* { dg-options "-O2 -march=i686" } */
+/* { dg-options "-O2 -march=pentiumpro" } */
 /* { dg-final { scan-assembler "leal" } } */
 typedef struct {
   char **visbuf;


Index: .cvsignore
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/.cvsignore,v
retrieving revision 1.128
retrieving revision 1.129
diff -u -r1.128 -r1.129
--- .cvsignore	28 Jan 2006 10:00:01 -0000	1.128
+++ .cvsignore	31 Jan 2006 14:29:33 -0000	1.129
@@ -1 +1 @@
-gcc-4.1.0-20060128.tar.bz2
+gcc-4.1.0-20060131.tar.bz2

gcc41-atomic-builtins.patch:
 s390/s390.c   |    1 +
 sparc/sparc.c |    1 +
 2 files changed, 2 insertions(+)

Index: gcc41-atomic-builtins.patch
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/gcc41-atomic-builtins.patch,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- gcc41-atomic-builtins.patch	28 Jan 2006 10:00:01 -0000	1.1
+++ gcc41-atomic-builtins.patch	31 Jan 2006 14:29:33 -0000	1.2
@@ -1,23 +1,9 @@
-2006-01-28  Jakub Jelinek  <jakub at redhat.com>
+2006-01-31  Jakub Jelinek  <jakub at redhat.com>
 
 	* config/s390/s390.c (init_alignment_context): Set
 	ALIAS_SET_MEMORY_BARRIER on the MEM.
 	* config/sparc/sparc.c (sparc_expand_compare_and_swap_12): Likewise.
 
-2006-01-25  Richard Henderson <rth at redhat.com>
-
-	* alias.h (ALIAS_SET_MEMORY_BARRIER): New.
-	* alias.c (true_dependence): Respect it.
-	(canon_true_dependence, write_dependence_p): Likewise.
-	* builtins.c (get_builtin_sync_mem): Set it.
-
-2006-01-03  Adrian Straetling  <straetling at de.ibm.com>
-
-	* gcc/builtins.c (get_builtin_sync_mem): New function.
-	(expand_builtin_sync_operation, expand_builtin_compare_and_swap,
-	expand_builtin_lock_test_and_set, expand_builtin_lock_release):
-	Call get_builtin_sync_mem to generate mem rtx.
-
 --- gcc/config/s390/s390.c.jj	2006-01-28 09:54:03.000000000 +0100
 +++ gcc/config/s390/s390.c	2006-01-28 10:29:03.000000000 +0100
 @@ -4030,6 +4030,7 @@ init_alignment_context (struct alignment
@@ -38,171 +24,3 @@
    MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
  
    val = force_reg (SImode, memsi);
---- gcc/builtins.c.jj	2006-01-28 09:54:07.000000000 +0100
-+++ gcc/builtins.c	2006-01-28 10:25:33.000000000 +0100
-@@ -5425,6 +5425,28 @@ get_builtin_sync_mode (int fcode_diff)
-   return mode_for_size (BITS_PER_UNIT << fcode_diff, MODE_INT, 0);
- }
- 
-+/* Expand the memory expression LOC and return the appropriate memory operand
-+   for the builtin_sync operations.  */
-+
-+static rtx
-+get_builtin_sync_mem (tree loc, enum machine_mode mode)
-+{
-+  rtx addr, mem;
-+
-+  addr = expand_expr (loc, NULL, Pmode, EXPAND_SUM);
-+
-+  /* Note that we explicitly do not want any alias information for this
-+     memory, so that we kill all other live memories.  Otherwise we don't
-+     satisfy the full barrier semantics of the intrinsic.  */
-+  mem = validize_mem (gen_rtx_MEM (mode, addr));
-+
-+  set_mem_align (mem, get_pointer_alignment (loc, BIGGEST_ALIGNMENT));
-+  set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
-+  MEM_VOLATILE_P (mem) = 1;
-+
-+  return mem;
-+}
-+
- /* Expand the __sync_xxx_and_fetch and __sync_fetch_and_xxx intrinsics.
-    ARGLIST is the operands list to the function.  CODE is the rtx code 
-    that corresponds to the arithmetic or logical operation from the name;
-@@ -5438,20 +5460,14 @@ expand_builtin_sync_operation (enum mach
- 			       enum rtx_code code, bool after,
- 			       rtx target, bool ignore)
- {
--  rtx addr, val, mem;
-+  rtx val, mem;
- 
-   /* Expand the operands.  */
--  addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_SUM);
-+  mem = get_builtin_sync_mem (TREE_VALUE (arglist), mode);
- 
-   arglist = TREE_CHAIN (arglist);
-   val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL);
- 
--  /* Note that we explicitly do not want any alias information for this
--     memory, so that we kill all other live memories.  Otherwise we don't
--     satisfy the full barrier semantics of the intrinsic.  */
--  mem = validize_mem (gen_rtx_MEM (mode, addr));
--  MEM_VOLATILE_P (mem) = 1;
--
-   if (ignore)
-     return expand_sync_operation (mem, val, code);
-   else
-@@ -5467,10 +5483,10 @@ static rtx
- expand_builtin_compare_and_swap (enum machine_mode mode, tree arglist,
- 				 bool is_bool, rtx target)
- {
--  rtx addr, old_val, new_val, mem;
-+  rtx old_val, new_val, mem;
- 
-   /* Expand the operands.  */
--  addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_SUM);
-+  mem = get_builtin_sync_mem (TREE_VALUE (arglist), mode);
- 
-   arglist = TREE_CHAIN (arglist);
-   old_val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL);
-@@ -5478,12 +5494,6 @@ expand_builtin_compare_and_swap (enum ma
-   arglist = TREE_CHAIN (arglist);
-   new_val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL);
- 
--  /* Note that we explicitly do not want any alias information for this
--     memory, so that we kill all other live memories.  Otherwise we don't
--     satisfy the full barrier semantics of the intrinsic.  */
--  mem = validize_mem (gen_rtx_MEM (mode, addr));
--  MEM_VOLATILE_P (mem) = 1;
--
-   if (is_bool)
-     return expand_bool_compare_and_swap (mem, old_val, new_val, target);
-   else
-@@ -5500,20 +5510,14 @@ static rtx
- expand_builtin_lock_test_and_set (enum machine_mode mode, tree arglist,
- 				  rtx target)
- {
--  rtx addr, val, mem;
-+  rtx val, mem;
- 
-   /* Expand the operands.  */
--  addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_NORMAL);
-+  mem = get_builtin_sync_mem (TREE_VALUE (arglist), mode);
- 
-   arglist = TREE_CHAIN (arglist);
-   val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL);
- 
--  /* Note that we explicitly do not want any alias information for this
--     memory, so that we kill all other live memories.  Otherwise we don't
--     satisfy the barrier semantics of the intrinsic.  */
--  mem = validize_mem (gen_rtx_MEM (mode, addr));
--  MEM_VOLATILE_P (mem) = 1;
--
-   return expand_sync_lock_test_and_set (mem, val, target);
- }
- 
-@@ -5547,17 +5551,11 @@ static void
- expand_builtin_lock_release (enum machine_mode mode, tree arglist)
- {
-   enum insn_code icode;
--  rtx addr, mem, insn;
-+  rtx mem, insn;
-   rtx val = const0_rtx;
- 
-   /* Expand the operands.  */
--  addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_NORMAL);
--
--  /* Note that we explicitly do not want any alias information for this
--     memory, so that we kill all other live memories.  Otherwise we don't
--     satisfy the barrier semantics of the intrinsic.  */
--  mem = validize_mem (gen_rtx_MEM (mode, addr));
--  MEM_VOLATILE_P (mem) = 1;
-+  mem = get_builtin_sync_mem (TREE_VALUE (arglist), mode);
- 
-   /* If there is an explicit operation in the md file, use it.  */
-   icode = sync_lock_release[mode];
---- gcc/alias.c.jj	2006-01-28 09:54:07.000000000 +0100
-+++ gcc/alias.c	2006-01-28 10:25:33.000000000 +0100
-@@ -2209,6 +2209,9 @@ true_dependence (rtx mem, enum machine_m
-     return 1;
-   if (GET_MODE (mem) == BLKmode && GET_CODE (XEXP (mem, 0)) == SCRATCH)
-     return 1;
-+  if (MEM_ALIAS_SET (x) == ALIAS_SET_MEMORY_BARRIER
-+      || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
-+    return 1;
- 
-   if (DIFFERENT_ALIAS_SETS_P (x, mem))
-     return 0;
-@@ -2282,6 +2285,9 @@ canon_true_dependence (rtx mem, enum mac
-     return 1;
-   if (GET_MODE (mem) == BLKmode && GET_CODE (XEXP (mem, 0)) == SCRATCH)
-     return 1;
-+  if (MEM_ALIAS_SET (x) == ALIAS_SET_MEMORY_BARRIER
-+      || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
-+    return 1;
- 
-   if (DIFFERENT_ALIAS_SETS_P (x, mem))
-     return 0;
-@@ -2341,6 +2347,9 @@ write_dependence_p (rtx mem, rtx x, int 
-     return 1;
-   if (GET_MODE (mem) == BLKmode && GET_CODE (XEXP (mem, 0)) == SCRATCH)
-     return 1;
-+  if (MEM_ALIAS_SET (x) == ALIAS_SET_MEMORY_BARRIER
-+      || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
-+    return 1;
- 
-   if (DIFFERENT_ALIAS_SETS_P (x, mem))
-     return 0;
---- gcc/alias.h.jj	2006-01-28 09:54:07.000000000 +0100
-+++ gcc/alias.h	2006-01-28 10:25:33.000000000 +0100
-@@ -27,4 +27,10 @@ extern HOST_WIDE_INT get_frame_alias_set
- extern void record_base_value (unsigned int, rtx, int);
- extern bool component_uses_parent_alias_set (tree);
- 
-+/* This alias set can be used to force a memory to conflict with all
-+   other memories, creating a barrier across which no memory reference
-+   can move.  Note that there are other legacy ways to create such
-+   memory barriers, including an address of SCRATCH.  */
-+#define ALIAS_SET_MEMORY_BARRIER	((HOST_WIDE_INT) -1)
-+
- #endif /* GCC_ALIAS_H */

gcc41-s390-ldbl.patch:
 config/s390/2084.md          |   44 +++
 config/s390/fixdfdi.h        |  151 ++++++++++++
 config/s390/libgcc-glibc.ver |   33 ++
 config/s390/s390.c           |   62 ++++-
 config/s390/s390.h           |   48 ++-
 config/s390/s390.md          |  529 +++++++++++++++++++++++++++++++++----------
 config/s390/s390.opt         |   10 
 config/s390/t-crtstuff       |    1 
 doc/invoke.texi              |   10 
 9 files changed, 742 insertions(+), 146 deletions(-)

Index: gcc41-s390-ldbl.patch
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/gcc41-s390-ldbl.patch,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- gcc41-s390-ldbl.patch	28 Jan 2006 10:10:48 -0000	1.2
+++ gcc41-s390-ldbl.patch	31 Jan 2006 14:29:33 -0000	1.3
@@ -1,9 +1,10 @@
-2006-01-27  Andreas Krebbel  <krebbel1 at de.ibm.com>
-            Ulrich Weigand  <uweigand at de.ibm.com>
+2006-01-31  Andreas Krebbel  <krebbel1 at de.ibm.com>
+	    Ulrich Weigand  <uweigand at de.ibm.com>
 
 	* config/s390/2084.md ("x_fsimptf", "x_fmultf", "x_fdivtf",
 	"x_floadtf", "x_ftrunctf", "x_ftruncdf"): New insn reservations.
-	* config/s390/s390.c (struct processor_costs): Add mxbr, sqxbr, dxbr 
+	* config/s390/fixdfdi.h (__fixunstfdi, __fixtfdi): New functions.
+	* config/s390/s390.c (struct processor_costs): Add mxbr, sqxbr, dxbr
 	and dxr fields.
 	(z900_cost, z990_cost, z9_109_cost): Values for the new fields added.
 	(s390_rtx_costs): Use the new fields to calculate rtx costs.
@@ -15,145 +16,94 @@
 	builtin define.
 	(LONG_DOUBLE_TYPE_SIZE): Set to 128 or 64.
 	(LIBGCC2_LONG_DOUBLE_TYPE_SIZE, WIDEST_HARDWARE_FP_SIZE): Define.
-	(HARD_REGNO_NREGS, HARD_REGNO_MODE_OK, CLASS_MAX_NREGS, 
+	(HARD_REGNO_NREGS, HARD_REGNO_MODE_OK, CLASS_MAX_NREGS,
 	CANNOT_CHANGE_MODE_CLASS): Consider TFmode.
-	* config/s390/s390.md ("type" attribute): Add fsimptf, floadtf, fmultf, 
+	* config/s390/s390.md ("type" attribute): Add fsimptf, floadtf, fmultf,
 	fdivtf, fsqrttf, ftrunctf, ftruncdf as possible values.
 	(FPR mode macro): Add TFmode.
 	(DSF mode macro): New.
 	(<de>, <dee> mode attributes): Removed.
 	(<xde>, <xdee>, <RRe>, <RXe>, <Rf> mode attributes): New.
-	("*cmp<mode>_ccs_0", "*cmp<mode>_ccs_0_ibm", "*cmp<mode>_ccs", 
-	"*cmp<mode>_ccs_ibm", "fix_trunc<FPR:mode><GPR:mode>2_ieee", 
-	"floatdi<mode>2", "floatsi<mode>2_ieee", "*add<mode>3", 
-	"*add<mode>3_cc", "*add<mode>3_cconly", "*add<mode>3_ibm", 
-	"*sub<mode>3", "*sub<mode>3_cc", "*sub<mode>3_cconly", 
+	("*cmp<mode>_ccs_0", "*cmp<mode>_ccs_0_ibm", "*cmp<mode>_ccs",
+	"*cmp<mode>_ccs_ibm", "fix_trunc<FPR:mode><GPR:mode>2_ieee",
+	"floatdi<mode>2", "floatsi<mode>2_ieee", "*add<mode>3",
+	"*add<mode>3_cc", "*add<mode>3_cconly", "*add<mode>3_ibm",
+	"*sub<mode>3", "*sub<mode>3_cc", "*sub<mode>3_cconly",
 	"*sub<mode>3_ibm", "*mul<mode>3_ibm", "*fmadd<mode>", "*fmsub<mode>",
-	"*div<mode>3", "*div<mode>3_ibm", "*neg<mode>2_cc", 
-	"*neg<mode>2_cconly", "*neg<mode>2", "*neg<mode>2_ibm", 
+	"*div<mode>3", "*div<mode>3_ibm", "*neg<mode>2_cc",
+	"*neg<mode>2_cconly", "*neg<mode>2", "*neg<mode>2_ibm",
 	"*abs<mode>2_cc", "*abs<mode>2_cconly", "*abs<mode>2",
-	"*abs<mode>2_ibm", "*negabs<mode>2_cc", "*negabs<mode>2_cconly", 
+	"*abs<mode>2_ibm", "*negabs<mode>2_cc", "*negabs<mode>2_cconly",
 	"*negabs<mode>2", "sqrt<mode>2"):
 	Changed <de> to <xde>. R constraint replaced by <Rf>.
-	("*mul<mode>3"): Changed <dee> to <xdee>. R constraint replaced by 
+	("*mul<mode>3"): Changed <dee> to <xdee>. R constraint replaced by
 	<Rf>.
 	("fix_trunc<FPR:mode>di2"): 'FPR:' removed.
-        ("*fmadd<mode>", "*fmsub<mode>"): FPR mode replaced by DSF.
-	("movtf"): New insn definition followed by 2 new splitters.
-	("reload_outtf", "reload_intf", "trunctfdf2", "trunctfsf2", 
+	("*fmadd<mode>", "*fmsub<mode>"): FPR mode replaced by DSF.
+	("*movtf_64", "*movtf_31"): New insn definitions followed by 5
+	splitters.
+	("movtf", "reload_outtf", "reload_intf", "trunctfdf2", "trunctfsf2",
 	"extenddftf2", "extendsftf2"): New expanders.
-	("*trunctfdf2_ieee", "*trunctfdf2_ibm", "*trunctfsf2_ieee", 
+	("*trunctfdf2_ieee", "*trunctfdf2_ibm", "*trunctfsf2_ieee",
 	"*trunctfsf2_ibm", "*extenddftf2_ieee", "*extenddftf2_ibm",
 	"*extendsftf2_ieee", "*extendsftf2_ibm"): New insn patterns.
 	* config/s390/s390.opt (mlong-double-128, mlong-double-64):
 	New options.
 	* config/s390/t-crtstuff (TARGET_LIBGCC2_CFLAGS): Macro defined.
-	* config/s390/libgcc-glibc.ver (__divtc3, __multc3, __powitf2, 
+	* config/s390/libgcc-glibc.ver (__divtc3, __multc3, __powitf2,
 	__fixtfti, __fixunstfti, __floattitf, __fixtfdi, __fixunstfdi,
 	__floatditf): Add a GCC_4.1.0 symbol version tag.
+	* doc/invoke.texi (-mlong-double-128, -mlong-double-64): Document
+	the new options.
 
---- gcc/config/s390/2084.md.jj	2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/2084.md	2006-01-28 10:39:11.000000000 +0100
-@@ -161,6 +161,11 @@
- ;; Floating point insns
- ;;
- 
-+(define_insn_reservation "x_fsimptf" 7 
-+  (and (eq_attr "cpu" "z990,z9_109")
-+       (eq_attr "type" "fsimptf"))
-+  "x_e1_t*2,x-wr-fp") 
-+
- (define_insn_reservation "x_fsimpdf" 6 
-   (and (eq_attr "cpu" "z990,z9_109")
-        (eq_attr "type" "fsimpdf,fmuldf"))
-@@ -171,6 +176,18 @@
-        (eq_attr "type" "fsimpsf,fmulsf"))
-   "x_e1_t,x-wr-fp") 
- 
-+
-+(define_insn_reservation "x_fmultf" 33
-+  (and (eq_attr "cpu" "z990,z9_109")
-+       (eq_attr "type" "fmultf"))
-+  "x_e1_t*27,x-wr-fp") 
-+
-+
-+(define_insn_reservation "x_fdivtf" 82
-+  (and (eq_attr "cpu" "z990,z9_109")
-+       (eq_attr "type" "fdivtf,fsqrttf"))
-+  "x_e1_t*76,x-wr-fp") 
-+
- (define_insn_reservation "x_fdivdf" 36
-   (and (eq_attr "cpu" "z990,z9_109")
-        (eq_attr "type" "fdivdf,fsqrtdf"))
-@@ -181,6 +198,12 @@
-        (eq_attr "type" "fdivsf,fsqrtsf"))
-   "x_e1_t*30,x-wr-fp") 
- 
-+
-+(define_insn_reservation "x_floadtf" 6 
-+  (and (eq_attr "cpu" "z990,z9_109")
-+       (eq_attr "type" "floadtf"))
-+  "x_e1_t,x-wr-fp") 
-+
- (define_insn_reservation "x_floaddf" 6 
-   (and (eq_attr "cpu" "z990,z9_109")
-        (eq_attr "type" "floaddf"))
-@@ -191,6 +214,7 @@
-        (eq_attr "type" "floadsf"))
-   "x_e1_t,x-wr-fp") 
+--- gcc/config/s390/libgcc-glibc.ver.jj	2006-01-31 14:38:38.000000000 +0100
++++ gcc/config/s390/libgcc-glibc.ver	2006-01-31 15:07:03.000000000 +0100
+@@ -39,3 +39,36 @@ GLIBC_2.2 {
+ }
+ %endif
  
++# With GCC 4.1.0 long double 128 bit support was introduced. The
++# following symbols coming from libgcc are enabled when -mlong-double-128
++# is specified. These lines make the symbols to get a @@GCC_4.1.0 attached.
 +
- (define_insn_reservation "x_fstoredf" 1 
-   (and (eq_attr "cpu" "z990,z9_109")
-        (eq_attr "type" "fstoredf"))
-@@ -201,6 +225,18 @@
-        (eq_attr "type" "fstoresf"))
-   "x_e1_t,x-wr-fp") 
- 
++%exclude {
++  __divtc3
++  __multc3
++  __powitf2
++  __fixtfti
++  __fixunstfti
++  __floattitf
 +
-+(define_insn_reservation "x_ftrunctf" 16
-+  (and (eq_attr "cpu" "z990,z9_109")
-+       (eq_attr "type" "ftrunctf"))
-+  "x_e1_t*10,x-wr-fp") 
++  __fixtfdi
++  __fixunstfdi
++  __floatditf
++}
 +
-+(define_insn_reservation "x_ftruncdf" 11
-+  (and (eq_attr "cpu" "z990,z9_109")
-+       (eq_attr "type" "ftruncdf"))
-+  "x_e1_t*5,x-wr-fp") 
++GCC_4.1.0 {
++  __divtc3
++  __multc3
++  __powitf2
 +
++%ifdef __s390x__
++  __fixtfti
++  __fixunstfti
++  __floattitf
 +
- (define_insn_reservation "x_ftoi" 1 
-   (and (eq_attr "cpu" "z990,z9_109")
-        (eq_attr "type" "ftoi"))
-@@ -234,7 +270,7 @@
- 	         "s390_agen_dep_p")
- 
- (define_bypass 9 "x_int,x_agen,x_lr" 
--                 "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
-+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
- 		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
- 	         "s390_agen_dep_p")
- ;;
-@@ -247,7 +283,7 @@
- 	         "s390_agen_dep_p")
- 
- (define_bypass 5 "x_load"
--                 "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
-+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
- 		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
- 	         "s390_agen_dep_p")
- 
-@@ -261,7 +297,7 @@
- 	         "s390_agen_dep_p")
- 
- (define_bypass 5 "x_larl, x_la"
--                 "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
-+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
- 		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
- 	         "s390_agen_dep_p")
- 
---- gcc/config/s390/s390.c.jj	2006-01-28 10:29:03.000000000 +0100
-+++ gcc/config/s390/s390.c	2006-01-28 10:39:11.000000000 +0100
++%else
++  __fixtfdi
++  __fixunstfdi
++  __floatditf
++%endif
++}
+--- gcc/config/s390/s390.c.jj	2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/s390.c	2006-01-31 15:07:03.000000000 +0100
+@@ -1,5 +1,5 @@
+ /* Subroutines used for code generation on IBM S/390 and zSeries
+-   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
++   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+    Free Software Foundation, Inc.
+    Contributed by Hartmut Penner (hpenner at de.ibm.com) and
+                   Ulrich Weigand (uweigand at de.ibm.com).
 @@ -71,13 +71,17 @@ struct processor_costs 
    const int msgr;     /* cost of an MSGR instruction.  */
    const int msr;      /* cost of an MSR instruction.  */
@@ -217,7 +167,7 @@
    COSTS_N_INSNS (40),    /* DDBR */
    COSTS_N_INSNS (37),    /* DDR  */
    COSTS_N_INSNS (26),    /* DDBR */
-@@ -2154,6 +2170,9 @@ s390_rtx_costs (rtx x, int code, int out
+@@ -2159,6 +2175,9 @@ s390_rtx_costs (rtx x, int code, int out
  	case DFmode:
  	  *total = s390_cost->mult_df;
  	  break;
@@ -227,7 +177,7 @@
  	default:
  	  return false;
  	}
-@@ -2204,13 +2223,22 @@ s390_rtx_costs (rtx x, int code, int out
+@@ -2209,13 +2228,22 @@ s390_rtx_costs (rtx x, int code, int out
  	  else /* TARGET_IBM_FLOAT */
  	    *total = s390_cost->ddr;
  	}
@@ -251,7 +201,7 @@
        return false;
  
      case SIGN_EXTEND:
-@@ -2565,6 +2593,18 @@ s390_secondary_input_reload_class (enum 
+@@ -2570,6 +2598,18 @@ s390_secondary_input_reload_class (enum 
    if (s390_plus_operand (in, mode))
      return ADDR_REGS;
  
@@ -270,7 +220,16 @@
    if (reg_classes_intersect_p (CC_REGS, class))
      return GENERAL_REGS;
  
-@@ -2592,6 +2632,18 @@ s390_secondary_output_reload_class (enum
+@@ -2586,7 +2626,7 @@ enum reg_class
+ s390_secondary_output_reload_class (enum reg_class class,
+ 				    enum machine_mode mode, rtx out)
+ {
+-  if ((TARGET_64BIT ? mode == TImode
++  if ((TARGET_64BIT ? (mode == TImode || mode == TFmode)
+                     : (mode == DImode || mode == DFmode))
+       && reg_classes_intersect_p (GENERAL_REGS, class)
+       && GET_CODE (out) == MEM
+@@ -2597,6 +2637,18 @@ s390_secondary_output_reload_class (enum
  			 + GET_MODE_SIZE (mode) - 1))
      return ADDR_REGS;
  
@@ -289,7 +248,7 @@
    if (reg_classes_intersect_p (CC_REGS, class))
      return GENERAL_REGS;
  
-@@ -5164,10 +5216,10 @@ replace_ltrel_base (rtx *x)
+@@ -5168,10 +5220,10 @@ replace_ltrel_base (rtx *x)
  /* We keep a list of constants which we have to add to internal
     constant tables in the middle of large functions.  */
  
@@ -302,107 +261,154 @@
    DFmode, DImode,
    SFmode, SImode,
    HImode,
---- gcc/config/s390/s390.h.jj	2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/s390.h	2006-01-28 10:39:11.000000000 +0100
-@@ -93,6 +93,8 @@ extern enum processor_flags s390_arch_fl
-       builtin_define ("__s390__");			\
-       if (TARGET_64BIT)					\
-         builtin_define ("__s390x__");			\
-+      if (TARGET_LONG_DOUBLE_128)			\
-+        builtin_define ("__LONG_DOUBLE_128__");		\
-     }							\
-   while (0)
+--- gcc/config/s390/s390.opt.jj	2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/s390.opt	2006-01-31 15:07:03.000000000 +0100
+@@ -1,6 +1,6 @@
+ ; Options for the S/390 / zSeries port of the compiler.
  
-@@ -216,7 +218,18 @@ if (INTEGRAL_MODE_P (MODE) &&	        	 
- #define LONG_LONG_TYPE_SIZE 64
- #define FLOAT_TYPE_SIZE 32
- #define DOUBLE_TYPE_SIZE 64
--#define LONG_DOUBLE_TYPE_SIZE 64  /* ??? Should support extended format.  */
-+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+-; Copyright (C) 2005 Free Software Foundation, Inc.
++; Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+ ;
+ ; This file is part of GCC.
+ ;
+@@ -51,6 +51,14 @@ mhard-float
+ Target Report RejectNegative Mask(HARD_FLOAT)
+ Use hardware fp
+ 
++mlong-double-128
++Target Report RejectNegative Mask(LONG_DOUBLE_128)
++Use 128-bit long double
 +
-+/* Define this to set long double type size to use in libgcc2.c, which can
-+   not depend on target_flags.  */
-+#ifdef __LONG_DOUBLE_128__
-+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
-+#else
-+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
-+#endif
++mlong-double-64
++Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
++Use 64-bit long double
 +
-+/* Work around target_flags dependency in ada/targtyps.c.  */
-+#define WIDEST_HARDWARE_FP_SIZE 64
+ mpacked-stack
+ Target Report Mask(PACKED_STACK)
+ Use packed stack layout
+--- gcc/config/s390/2084.md.jj	2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/2084.md	2006-01-31 15:07:03.000000000 +0100
+@@ -1,5 +1,5 @@
+ ;; Scheduling description for z990 (cpu 2084).
+-;;   Copyright (C) 2003,2004, 2005 Free Software Foundation, Inc.
++;;   Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ ;;   Contributed by Hartmut Penner (hpenner at de.ibm.com) and
+ ;;                  Ulrich Weigand (uweigand at de.ibm.com).
  
- /* We use "unsigned char" as default.  */
- #define DEFAULT_SIGNED_CHAR 0
-@@ -334,28 +347,34 @@ if (INTEGRAL_MODE_P (MODE) &&	        	 
-    Floating point modes <= word size fit into any FPR or GPR.
-    Floating point modes > word size (i.e. DFmode on 32-bit) fit
-    into any FPR, or an even-odd GPR pair.
-+   TFmode fits only into an even-odd FPR pair.
+@@ -161,6 +161,11 @@
+ ;; Floating point insns
+ ;;
  
-    Complex floating point modes fit either into two FPRs, or into
-    successive GPRs (again starting with an even number).
-+   TCmode fits only into two successive even-odd FPR pairs.
++(define_insn_reservation "x_fsimptf" 7 
++  (and (eq_attr "cpu" "z990,z9_109")
++       (eq_attr "type" "fsimptf"))
++  "x_e1_t*2,x-wr-fp") 
++
+ (define_insn_reservation "x_fsimpdf" 6 
+   (and (eq_attr "cpu" "z990,z9_109")
+        (eq_attr "type" "fsimpdf,fmuldf"))
+@@ -171,6 +176,18 @@
+        (eq_attr "type" "fsimpsf,fmulsf"))
+   "x_e1_t,x-wr-fp") 
  
-    Condition code modes fit only into the CC register.  */
++
++(define_insn_reservation "x_fmultf" 33
++  (and (eq_attr "cpu" "z990,z9_109")
++       (eq_attr "type" "fmultf"))
++  "x_e1_t*27,x-wr-fp") 
++
++
++(define_insn_reservation "x_fdivtf" 82
++  (and (eq_attr "cpu" "z990,z9_109")
++       (eq_attr "type" "fdivtf,fsqrttf"))
++  "x_e1_t*76,x-wr-fp") 
++
+ (define_insn_reservation "x_fdivdf" 36
+   (and (eq_attr "cpu" "z990,z9_109")
+        (eq_attr "type" "fdivdf,fsqrtdf"))
+@@ -181,6 +198,12 @@
+        (eq_attr "type" "fdivsf,fsqrtsf"))
+   "x_e1_t*30,x-wr-fp") 
  
- #define HARD_REGNO_NREGS(REGNO, MODE)                           \
-   (FP_REGNO_P(REGNO)?                                           \
--    (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ? 2 : 1) :      \
-+   (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ?                \
-+    2 * ((GET_MODE_SIZE(MODE) / 2 + 8 - 1) / 8) : 		\
-+    ((GET_MODE_SIZE(MODE) + 8 - 1) / 8)) :			\
-    GENERAL_REGNO_P(REGNO)?                                      \
-     ((GET_MODE_SIZE(MODE)+UNITS_PER_WORD-1) / UNITS_PER_WORD) : \
-    ACCESS_REGNO_P(REGNO)?					\
--    ((GET_MODE_SIZE(MODE)+4-1) / 4) : 				\
-+    ((GET_MODE_SIZE(MODE) + 4 - 1) / 4) : 			\
-    1)
++
++(define_insn_reservation "x_floadtf" 6 
++  (and (eq_attr "cpu" "z990,z9_109")
++       (eq_attr "type" "floadtf"))
++  "x_e1_t,x-wr-fp") 
++
+ (define_insn_reservation "x_floaddf" 6 
+   (and (eq_attr "cpu" "z990,z9_109")
+        (eq_attr "type" "floaddf"))
+@@ -191,6 +214,7 @@
+        (eq_attr "type" "floadsf"))
+   "x_e1_t,x-wr-fp") 
  
- #define HARD_REGNO_MODE_OK(REGNO, MODE)                             \
-   (FP_REGNO_P(REGNO)?                                               \
--   ((MODE) == SImode || (MODE) == DImode ||                         \
--    GET_MODE_CLASS(MODE) == MODE_FLOAT ||                           \
--    GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT) :                   \
-+   (((MODE) == SImode || (MODE) == DImode                           \
-+     || GET_MODE_CLASS(MODE) == MODE_FLOAT                          \
-+     || GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT)                 \
-+    && (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1))) :    \
-    GENERAL_REGNO_P(REGNO)?                                          \
--    (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1)) :        \
-+   ((HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1))	    \
-+    && (MODE) != TFmode && (MODE) != TCmode) :			    \
-    CC_REGNO_P(REGNO)?                                               \
-      GET_MODE_CLASS (MODE) == MODE_CC :                             \
-    FRAME_REGNO_P(REGNO)?                                            \
-@@ -376,7 +395,9 @@ if (INTEGRAL_MODE_P (MODE) &&	        	 
-    in a register of class CLASS.  */
- #define CLASS_MAX_NREGS(CLASS, MODE)   					\
-      ((CLASS) == FP_REGS ? 						\
--      (GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT ? 2 : 1) :  		\
-+      (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ?                     \
-+       2 * (GET_MODE_SIZE (MODE) / 2 + 8 - 1) / 8 :		        \
-+       (GET_MODE_SIZE (MODE) + 8 - 1) / 8) :				\
-       (CLASS) == ACCESS_REGS ?						\
-       (GET_MODE_SIZE (MODE) + 4 - 1) / 4 :				\
-       (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
-@@ -386,10 +407,11 @@ if (INTEGRAL_MODE_P (MODE) &&	        	 
-    cannot use SUBREGs to switch between modes in FP registers.
-    Likewise for access registers, since they have only half the
-    word size on 64-bit.  */
--#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
--  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			\
--   ? reg_classes_intersect_p (FP_REGS, CLASS)			\
--     || reg_classes_intersect_p (ACCESS_REGS, CLASS) : 0)
-+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		        \
-+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			        \
-+   ? ((reg_classes_intersect_p (FP_REGS, CLASS)				\
-+       && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8))		\
-+      || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0)
++
+ (define_insn_reservation "x_fstoredf" 1 
+   (and (eq_attr "cpu" "z990,z9_109")
+        (eq_attr "type" "fstoredf"))
+@@ -201,6 +225,18 @@
+        (eq_attr "type" "fstoresf"))
+   "x_e1_t,x-wr-fp") 
  
- /* Register classes.  */
++
++(define_insn_reservation "x_ftrunctf" 16
++  (and (eq_attr "cpu" "z990,z9_109")
++       (eq_attr "type" "ftrunctf"))
++  "x_e1_t*10,x-wr-fp") 
++
++(define_insn_reservation "x_ftruncdf" 11
++  (and (eq_attr "cpu" "z990,z9_109")
++       (eq_attr "type" "ftruncdf"))
++  "x_e1_t*5,x-wr-fp") 
++
++
+ (define_insn_reservation "x_ftoi" 1 
+   (and (eq_attr "cpu" "z990,z9_109")
+        (eq_attr "type" "ftoi"))
+@@ -234,7 +270,7 @@
+ 	         "s390_agen_dep_p")
+ 
+ (define_bypass 9 "x_int,x_agen,x_lr" 
+-                 "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
++                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+ 		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+ 	         "s390_agen_dep_p")
+ ;;
+@@ -247,7 +283,7 @@
+ 	         "s390_agen_dep_p")
+ 
+ (define_bypass 5 "x_load"
+-                 "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
++                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+ 		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+ 	         "s390_agen_dep_p")
+ 
+@@ -261,7 +297,7 @@
+ 	         "s390_agen_dep_p")
+ 
+ (define_bypass 5 "x_larl, x_la"
+-                 "x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
++                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+ 		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+ 	         "s390_agen_dep_p")
  
---- gcc/config/s390/s390.md.jj	2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/s390.md	2006-01-28 10:39:11.000000000 +0100
+--- gcc/config/s390/t-crtstuff.jj	2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/t-crtstuff	2006-01-31 15:07:03.000000000 +0100
+@@ -2,3 +2,4 @@
+ # because then __FRAME_END__ might not be the last thing in .eh_frame
+ # section.
+ CRTSTUFF_T_CFLAGS = -fno-asynchronous-unwind-tables
++TARGET_LIBGCC2_CFLAGS += -mlong-double-128
+--- gcc/config/s390/s390.md.jj	2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/s390.md	2006-01-31 15:07:03.000000000 +0100
+@@ -1,5 +1,5 @@
+ ;;- Machine description for GNU compiler -- S/390 / zSeries version.
+-;;  Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
++;;  Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ ;;  Free Software Foundation, Inc.
+ ;;  Contributed by Hartmut Penner (hpenner at de.ibm.com) and
+ ;;                 Ulrich Weigand (uweigand at de.ibm.com).
 @@ -199,11 +199,11 @@
  (define_attr "type" "none,integer,load,lr,la,larl,lm,stm,
  	             cs,vs,store,sem,idiv,
@@ -512,17 +518,40 @@
      (set_attr "type"  "fsimp<mode>")])
  
  
-@@ -1483,6 +1493,79 @@
+@@ -1483,6 +1493,149 @@
     (set_attr "type" "lr,load,load,*")])
  
  ;
 +; movtf instruction pattern(s).
 +;
 +
-+(define_insn "movtf"
++(define_expand "movtf"
++  [(set (match_operand:TF 0 "nonimmediate_operand" "")
++        (match_operand:TF 1 "general_operand"       ""))]
++  ""
++  "")
++
++(define_insn "*movtf_64"
++  [(set (match_operand:TF 0 "nonimmediate_operand" "=f,f,f,o,d,QS,d,o,Q")
++        (match_operand:TF 1 "general_operand"       "G,f,o,f,QS,d,dm,d,Q"))]
++  "TARGET_64BIT"
++  "@
++   lzxr\t%0
++   lxr\t%0,%1
++   #
++   #
++   lmg\t%0,%N0,%S1
++   stmg\t%1,%N1,%S0
++   #
++   #
++   #"
++  [(set_attr "op_type" "RRE,RRE,*,*,RSY,RSY,*,*,*")
++   (set_attr "type"    "fsimptf,fsimptf,*,*,lm,stm,*,*,*")])
++
++(define_insn "*movtf_31"
 +  [(set (match_operand:TF 0 "nonimmediate_operand" "=f,f,f,o,Q")
 +        (match_operand:TF 1 "general_operand"       "G,f,o,f,Q"))]
-+  ""
++  "!TARGET_64BIT"
 +  "@
 +   lzxr\t%0
 +   lxr\t%0,%1
@@ -530,12 +559,58 @@
 +   #
 +   #"
 +  [(set_attr "op_type" "RRE,RRE,*,*,*")
-+   (set_attr "type" "fsimptf,fsimptf,*,*,*")])
++   (set_attr "type"    "fsimptf,fsimptf,*,*,*")])
++
++; TFmode in GPRs splitters
++
++(define_split
++  [(set (match_operand:TF 0 "nonimmediate_operand" "")
++        (match_operand:TF 1 "general_operand" ""))]
++  "TARGET_64BIT && reload_completed
++   && s390_split_ok_p (operands[0], operands[1], TFmode, 0)"
++  [(set (match_dup 2) (match_dup 4))
++   (set (match_dup 3) (match_dup 5))]
++{
++  operands[2] = operand_subword (operands[0], 0, 0, TFmode);
++  operands[3] = operand_subword (operands[0], 1, 0, TFmode);
++  operands[4] = operand_subword (operands[1], 0, 0, TFmode);
++  operands[5] = operand_subword (operands[1], 1, 0, TFmode);
++})
++
++(define_split
++  [(set (match_operand:TF 0 "nonimmediate_operand" "")
++        (match_operand:TF 1 "general_operand" ""))]
++  "TARGET_64BIT && reload_completed
++   && s390_split_ok_p (operands[0], operands[1], TFmode, 1)"
++  [(set (match_dup 2) (match_dup 4))
++   (set (match_dup 3) (match_dup 5))]
++{
++  operands[2] = operand_subword (operands[0], 1, 0, TFmode);
++  operands[3] = operand_subword (operands[0], 0, 0, TFmode);
++  operands[4] = operand_subword (operands[1], 1, 0, TFmode);
++  operands[5] = operand_subword (operands[1], 0, 0, TFmode);
++})
++
++(define_split
++  [(set (match_operand:TF 0 "register_operand" "")
++        (match_operand:TF 1 "memory_operand" ""))]
++  "TARGET_64BIT && reload_completed
++   && !FP_REG_P (operands[0])
++   && !s_operand (operands[1], VOIDmode)"
++  [(set (match_dup 0) (match_dup 1))]
++{
++  rtx addr = operand_subword (operands[0], 1, 0, DFmode);
++  s390_load_address (addr, XEXP (operands[1], 0));
++  operands[1] = replace_equiv_address (operands[1], addr);
++})
++
++; TFmode in FPRs splitters
 +
 +(define_split
 +  [(set (match_operand:TF 0 "register_operand" "")
 +        (match_operand:TF 1 "memory_operand" ""))]
-+  "reload_completed && offsettable_memref_p (operands[1])"
++  "reload_completed && offsettable_memref_p (operands[1]) 
++   && FP_REG_P (operands[0])"
 +  [(set (match_dup 2) (match_dup 4))
 +   (set (match_dup 3) (match_dup 5))]
 +{
@@ -548,7 +623,8 @@
 +(define_split
 +  [(set (match_operand:TF 0 "memory_operand" "")
 +        (match_operand:TF 1 "register_operand" ""))]
-+  "reload_completed && offsettable_memref_p (operands[0])"
++  "reload_completed && offsettable_memref_p (operands[0])
++   && FP_REG_P (operands[1])"
 +  [(set (match_dup 2) (match_dup 4))
 +   (set (match_dup 3) (match_dup 5))]
 +{
@@ -592,7 +668,7 @@
  ; movdf instruction pattern(s).
  ;
  
-@@ -3083,13 +3166,13 @@
+@@ -3083,13 +3236,13 @@
    DONE;
  })
  
@@ -610,7 +686,7 @@
        GEN_INT(5)));
    DONE;
  })
-@@ -3100,11 +3183,23 @@
+@@ -3100,11 +3253,23 @@
     (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
     (clobber (reg:CC CC_REGNUM))]
    "TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -635,7 +711,7 @@
  ; fix_truncdfsi2 instruction pattern(s).
  ;
  
-@@ -3177,21 +3272,36 @@
+@@ -3177,21 +3342,36 @@
  })
  
  ;
@@ -675,7 +751,7 @@
  (define_expand "floatsidf2"
    [(set (match_operand:DF 0 "register_operand" "")
          (float:DF (match_operand:SI 1 "register_operand" "")))]
-@@ -3209,14 +3319,6 @@
+@@ -3209,14 +3389,6 @@
      }
  })
  
@@ -690,7 +766,7 @@
  (define_insn "floatsidf2_ibm"
    [(set (match_operand:DF 0 "register_operand" "=f")
          (float:DF (match_operand:SI 1 "register_operand" "d")))
-@@ -3252,14 +3354,6 @@
+@@ -3252,14 +3424,6 @@
      }
  })
  
@@ -705,7 +781,7 @@
  ;
  ; truncdfsf2 instruction pattern(s).
  ;
-@@ -3275,7 +3369,8 @@
+@@ -3275,7 +3439,8 @@
          (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
    "TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
    "ledbr\t%0,%1"
@@ -715,7 +791,7 @@
  
  (define_insn "truncdfsf2_ibm"
    [(set (match_operand:SF 0 "register_operand" "=f,f")
-@@ -3288,6 +3383,66 @@
+@@ -3288,6 +3453,66 @@
     (set_attr "type"   "floadsf")])
  
  ;
@@ -782,7 +858,7 @@
  ; extendsfdf2 instruction pattern(s).
  ;
  
-@@ -3311,7 +3466,7 @@
+@@ -3311,7 +3536,7 @@
     ldebr\t%0,%1
     ldeb\t%0,%1"
    [(set_attr "op_type"  "RRE,RXE")
@@ -791,7 +867,7 @@
  
  (define_insn "extendsfdf2_ibm"
    [(set (match_operand:DF 0 "register_operand" "=f,f")
-@@ -3324,6 +3479,66 @@
+@@ -3324,6 +3549,66 @@
    [(set_attr "length"   "4,6")
     (set_attr "type"     "floadsf")])
  
@@ -858,7 +934,7 @@
  
  ;;
  ;; ARITHMETIC OPERATIONS
-@@ -3751,7 +3966,7 @@
+@@ -3751,7 +4036,7 @@
    [(parallel
      [(set (match_operand:FPR 0 "register_operand" "=f,f")
            (plus:FPR (match_operand:FPR 1 "nonimmediate_operand" "%0,0")
@@ -867,7 +943,7 @@
       (clobber (reg:CC CC_REGNUM))])]
    "TARGET_HARD_FLOAT"
    "")
-@@ -3759,52 +3974,52 @@
+@@ -3759,52 +4044,52 @@
  (define_insn "*add<mode>3"
    [(set (match_operand:FPR 0 "register_operand" "=f,f")
          (plus:FPR (match_operand:FPR 1 "nonimmediate_operand" "%0,0")
@@ -933,7 +1009,7 @@
     (set_attr "type"     "fsimp<mode>")])
  
  
-@@ -4163,52 +4378,52 @@
+@@ -4163,52 +4448,52 @@
  (define_insn "*sub<mode>3"
    [(set (match_operand:FPR 0 "register_operand" "=f,f")
          (minus:FPR (match_operand:FPR 1 "register_operand" "0,0")
@@ -999,7 +1075,7 @@
     (set_attr "type"     "fsimp<mode>")])
  
  
-@@ -4456,53 +4671,53 @@
+@@ -4456,53 +4741,53 @@
  (define_expand "mul<mode>3"
    [(set (match_operand:FPR 0 "register_operand" "=f,f")
          (mult:FPR (match_operand:FPR 1 "nonimmediate_operand" "%0,0")
@@ -1073,7 +1149,7 @@
    [(set_attr "op_type"  "RRE,RXE")
     (set_attr "type"     "fmul<mode>")])
  
-@@ -4949,30 +5164,30 @@
+@@ -4949,30 +5234,30 @@
  (define_expand "div<mode>3"
    [(set (match_operand:FPR 0 "register_operand" "=f,f")
          (div:FPR (match_operand:FPR 1 "register_operand" "0,0")
@@ -1112,7 +1188,7 @@
     (set_attr "type"     "fdiv<mode>")])
  
  
-@@ -5955,7 +6170,7 @@
+@@ -5955,7 +6240,7 @@
     (set (match_operand:FPR 0 "register_operand" "=f")
          (neg:FPR (match_dup 1)))]
    "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1121,7 +1197,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
    
-@@ -5965,7 +6180,7 @@
+@@ -5965,7 +6250,7 @@
                   (match_operand:FPR 2 "const0_operand" "")))
     (clobber (match_scratch:FPR 0 "=f"))]
    "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1130,7 +1206,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
    
-@@ -5974,7 +6189,7 @@
+@@ -5974,7 +6259,7 @@
          (neg:FPR (match_operand:FPR 1 "register_operand" "f")))
     (clobber (reg:CC CC_REGNUM))]
    "TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1139,7 +1215,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
  
-@@ -5983,8 +6198,8 @@
+@@ -5983,8 +6268,8 @@
          (neg:FPR (match_operand:FPR 1 "register_operand" "f")))
     (clobber (reg:CC CC_REGNUM))]
    "TARGET_HARD_FLOAT && TARGET_IBM_FLOAT"
@@ -1150,7 +1226,7 @@
     (set_attr "type"     "fsimp<mode>")])
  
  
-@@ -6062,7 +6277,7 @@
+@@ -6062,7 +6347,7 @@
     (set (match_operand:FPR 0 "register_operand" "=f")
          (abs:FPR (match_dup 1)))]
    "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1159,7 +1235,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
    
-@@ -6072,7 +6287,7 @@
+@@ -6072,7 +6357,7 @@
                   (match_operand:FPR 2 "const0_operand" "")))
     (clobber (match_scratch:FPR 0 "=f"))]
    "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1168,7 +1244,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
    
-@@ -6081,7 +6296,7 @@
+@@ -6081,7 +6366,7 @@
          (abs:FPR (match_operand:FPR 1 "register_operand" "f")))
     (clobber (reg:CC CC_REGNUM))]
    "TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1177,7 +1253,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
  
-@@ -6090,8 +6305,8 @@
+@@ -6090,8 +6375,8 @@
          (abs:FPR (match_operand:FPR 1 "register_operand" "f")))
     (clobber (reg:CC CC_REGNUM))]
    "TARGET_HARD_FLOAT && TARGET_IBM_FLOAT"
@@ -1188,7 +1264,7 @@
     (set_attr "type"     "fsimp<mode>")])
  
  ;;
-@@ -6161,7 +6376,7 @@
+@@ -6161,7 +6446,7 @@
     (set (match_operand:FPR 0 "register_operand" "=f")
          (neg:FPR (abs:FPR (match_dup 1))))]
    "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1197,7 +1273,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
    
-@@ -6171,7 +6386,7 @@
+@@ -6171,7 +6456,7 @@
                   (match_operand:FPR 2 "const0_operand" "")))
     (clobber (match_scratch:FPR 0 "=f"))]
    "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1206,7 +1282,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
    
-@@ -6180,7 +6395,7 @@
+@@ -6180,7 +6465,7 @@
          (neg:FPR (abs:FPR (match_operand:FPR 1 "register_operand" "f"))))
     (clobber (reg:CC CC_REGNUM))]
    "TARGET_HARD_FLOAT && TARGET_IEEE_FLOAT"
@@ -1215,7 +1291,7 @@
    [(set_attr "op_type"  "RRE")
     (set_attr "type"     "fsimp<mode>")])
  
-@@ -6194,11 +6409,11 @@
+@@ -6194,11 +6479,11 @@
  
  (define_insn "sqrt<mode>2"
    [(set (match_operand:FPR 0 "register_operand" "=f,f")
@@ -1230,66 +1306,301 @@
    [(set_attr "op_type" "RRE,RXE")
     (set_attr "type" "fsqrt<mode>")])
  
---- gcc/config/s390/s390.opt.jj	2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/s390.opt	2006-01-28 10:39:11.000000000 +0100
-@@ -51,6 +51,14 @@ mhard-float
- Target Report RejectNegative Mask(HARD_FLOAT)
- Use hardware fp
- 
-+mlong-double-128
-+Target Report RejectNegative Mask(LONG_DOUBLE_128)
-+Use 128-bit long double
+--- gcc/config/s390/fixdfdi.h.jj	2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/fixdfdi.h	2006-01-31 15:07:03.000000000 +0100
+@@ -20,6 +20,156 @@ along with GCC; see the file COPYING.  I
+ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.  */
+ 
++#ifdef L_fixunstfdi
++
++#define EXPD(fp)	   (((fp.l.i[0]) >> 16) & 0x7FFF)
++#define EXPONENT_BIAS	   16383
++#define MANTISSA_BITS      112
++#define PRECISION          (MANTISSA_BITS + 1)
++#define SIGNBIT		   0x80000000
++#define SIGND(fp)	   ((fp.l.i[0]) & SIGNBIT)
++#define MANTD_HIGH_LL(fp)  ((fp.ll[0] & HIGH_LL_FRAC_MASK) | HIGH_LL_UNIT_BIT)
++#define MANTD_LOW_LL(fp)   (fp.ll[1])
++#define FRACD_ZERO_P(fp)   (!fp.ll[1] && !(fp.ll[0] & HIGH_LL_FRAC_MASK))
++#define HIGH_LL_FRAC_BITS  48
++#define HIGH_LL_UNIT_BIT   ((UDItype_x)1 << HIGH_LL_FRAC_BITS)
++#define HIGH_LL_FRAC_MASK  (HIGH_LL_UNIT_BIT - 1)
++
++typedef int DItype_x __attribute__ ((mode (DI)));
++typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
++typedef int SItype_x __attribute__ ((mode (SI)));
++typedef unsigned int USItype_x __attribute__ ((mode (SI)));
++
++union double_long {
++  long double d;
++  struct {
++      SItype_x i[4]; /* 32 bit parts: 0 upper ... 3 lowest */
++    } l;
++  UDItype_x ll[2];   /* 64 bit parts: 0 upper, 1 lower */
++};
++
++UDItype_x __fixunstfdi (long double a1);
++
++/* convert double to unsigned int */
++UDItype_x
++__fixunstfdi (long double a1)
++{
++    register union double_long dl1;
++    register int exp;
++    register UDItype_x l;
 +
-+mlong-double-64
-+Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
-+Use 64-bit long double
++    dl1.d = a1;
 +
- mpacked-stack
- Target Report Mask(PACKED_STACK)
- Use packed stack layout
---- gcc/config/s390/t-crtstuff.jj	2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/t-crtstuff	2006-01-28 10:39:11.000000000 +0100
-@@ -2,3 +2,4 @@
- # because then __FRAME_END__ might not be the last thing in .eh_frame
- # section.
- CRTSTUFF_T_CFLAGS = -fno-asynchronous-unwind-tables
-+TARGET_LIBGCC2_CFLAGS += -mlong-double-128
---- gcc/config/s390/libgcc-glibc.ver.jj	2006-01-28 09:54:03.000000000 +0100
-+++ gcc/config/s390/libgcc-glibc.ver	2006-01-28 10:39:11.000000000 +0100
-@@ -39,3 +39,36 @@ GLIBC_2.2 {
- }
- %endif
- 
-+# With GCC 4.1.0 long double 128 bit support was introduced. The
-+# following symbols coming from libgcc are enabled when -mlong-double-128
-+# is specified. These lines make the symbols to get a @@GCC_4.1.0 attached.
++    /* +/- 0, denormalized, negative */
++    if (!EXPD (dl1) || SIGND(dl1))
++      return 0;
 +
-+%exclude {
-+  __divtc3
-+  __multc3
-+  __powitf2
-+  __fixtfti
-+  __fixunstfti
-+  __floattitf
++    /* The exponent - considered the binary point at the right end of 
++       the mantissa.  */
++    exp = EXPD (dl1) - EXPONENT_BIAS - MANTISSA_BITS;
 +
-+  __fixtfdi
-+  __fixunstfdi
-+  __floatditf
++    /* number < 1: If the mantissa would need to be right-shifted more bits than
++       its size (plus the implied one bit on the left) the result would be 
++       zero.  */
++    if (exp <= -PRECISION)
++      return 0;
++
++    /* NaN: All exponent bits set and a non-zero fraction.  */
++    if ((EXPD(dl1) == 0x7fff) && !FRACD_ZERO_P (dl1))
++      return 0x0ULL;
++
++    /* If the upper ll part of the mantissa isn't
++       zeroed out after shifting the number would be to large.  */
++    if (exp >= -HIGH_LL_FRAC_BITS)
++      return 0xFFFFFFFFFFFFFFFFULL;
++
++    exp += HIGH_LL_FRAC_BITS + 1;
++
++    l = MANTD_LOW_LL (dl1) >> (HIGH_LL_FRAC_BITS + 1)
++        | MANTD_HIGH_LL (dl1) << (64 - (HIGH_LL_FRAC_BITS + 1));
++
++    return l >> -exp;
 +}
++#define __fixunstfdi ___fixunstfdi
++#endif
++#undef L_fixunstfdi
 +
-+GCC_4.1.0 {
-+  __divtc3
-+  __multc3
-+  __powitf2
++#ifdef L_fixtfdi
++#define EXPD(fp)	   (((fp.l.i[0]) >> 16) & 0x7FFF)
++#define EXPONENT_BIAS	   16383
++#define MANTISSA_BITS      112
++#define PRECISION          (MANTISSA_BITS + 1)
++#define SIGNBIT		   0x80000000
++#define SIGND(fp)	   ((fp.l.i[0]) & SIGNBIT)
++#define MANTD_HIGH_LL(fp)  ((fp.ll[0] & HIGH_LL_FRAC_MASK) | HIGH_LL_UNIT_BIT)
++#define MANTD_LOW_LL(fp)   (fp.ll[1])
++#define FRACD_ZERO_P(fp)   (!fp.ll[1] && !(fp.ll[0] & HIGH_LL_FRAC_MASK))
++#define HIGH_LL_FRAC_BITS  48
++#define HIGH_LL_UNIT_BIT   ((UDItype_x)1 << HIGH_LL_FRAC_BITS)
++#define HIGH_LL_FRAC_MASK  (HIGH_LL_UNIT_BIT - 1)
++
++typedef int DItype_x __attribute__ ((mode (DI)));
++typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
++typedef int SItype_x __attribute__ ((mode (SI)));
++typedef unsigned int USItype_x __attribute__ ((mode (SI)));
++
++union double_long {
++  long double d;
++  struct {
++      SItype_x i[4]; /* 32 bit parts: 0 upper ... 3 lowest */
++    } l;
++  DItype_x ll[2];   /* 64 bit parts: 0 upper, 1 lower */
++};
++
++DItype_x __fixtfdi (long double a1);
++
++/* convert double to unsigned int */
++DItype_x
++__fixtfdi (long double a1)
++{
++    register union double_long dl1;
++    register int exp;
++    register UDItype_x l;
++
++    dl1.d = a1;
++
++    /* +/- 0, denormalized */
++    if (!EXPD (dl1))
++      return 0;
++
++    /* The exponent - considered the binary point at the right end of 
++       the mantissa.  */
++    exp = EXPD (dl1) - EXPONENT_BIAS - MANTISSA_BITS;
++
++    /* number < 1: If the mantissa would need to be right-shifted more bits than
++       its size the result would be zero.  */
++    if (exp <= -PRECISION)
++      return 0;
++
++    /* NaN: All exponent bits set and a non-zero fraction.  */
++    if ((EXPD(dl1) == 0x7fff) && !FRACD_ZERO_P (dl1))
++      return 0x8000000000000000ULL;
++
++    /* If the upper ll part of the mantissa isn't
++       zeroed out after shifting the number would be to large.  */
++    if (exp >= -HIGH_LL_FRAC_BITS)
++      {
++	l = (long long)1 << 63; /* long int min */
++	return SIGND (dl1) ? l : l - 1;
++      }
 +
-+%ifdef __s390x__
-+  __fixtfti
-+  __fixunstfti
-+  __floattitf
++    /* The extra bit is needed for the sign bit.  */
++    exp += HIGH_LL_FRAC_BITS + 1;
 +
-+%else
-+  __fixtfdi
-+  __fixunstfdi
-+  __floatditf
-+%endif
++    l = MANTD_LOW_LL (dl1) >> (HIGH_LL_FRAC_BITS + 1)
++        | MANTD_HIGH_LL (dl1) << (64 - (HIGH_LL_FRAC_BITS + 1));
++
++    return SIGND (dl1) ? -(l >> -exp) : l >> -exp;
 +}
++#define __fixtfdi ___fixtfdi
++#endif
++#undef L_fixtfdi
++
+ #ifdef L_fixunsdfdi
+ #define EXPD(fp)	(((fp.l.upper) >> 20) & 0x7FF)
+ #define EXCESSD		1022
+@@ -305,4 +455,3 @@ __fixsfdi (float a1)
+ #define __fixsfdi ___fixsfdi
+ #endif
+ #undef L_fixsfdi
+-
+--- gcc/config/s390/s390.h.jj	2006-01-31 14:38:39.000000000 +0100
++++ gcc/config/s390/s390.h	2006-01-31 15:07:03.000000000 +0100
+@@ -1,5 +1,5 @@
+ /* Definitions of target machine for GNU compiler, for IBM S/390
+-   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
++   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+    Free Software Foundation, Inc.
+    Contributed by Hartmut Penner (hpenner at de.ibm.com) and
+                   Ulrich Weigand (uweigand at de.ibm.com).
+@@ -93,6 +93,8 @@ extern enum processor_flags s390_arch_fl
+       builtin_define ("__s390__");			\
+       if (TARGET_64BIT)					\
+         builtin_define ("__s390x__");			\
++      if (TARGET_LONG_DOUBLE_128)			\
++        builtin_define ("__LONG_DOUBLE_128__");		\
+     }							\
+   while (0)
+ 
+@@ -216,7 +218,18 @@ if (INTEGRAL_MODE_P (MODE) &&	        	 
+ #define LONG_LONG_TYPE_SIZE 64
+ #define FLOAT_TYPE_SIZE 32
+ #define DOUBLE_TYPE_SIZE 64
+-#define LONG_DOUBLE_TYPE_SIZE 64  /* ??? Should support extended format.  */
++#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
++
++/* Define this to set long double type size to use in libgcc2.c, which can
++   not depend on target_flags.  */
++#ifdef __LONG_DOUBLE_128__
++#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
++#else
++#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
++#endif
++
++/* Work around target_flags dependency in ada/targtyps.c.  */
++#define WIDEST_HARDWARE_FP_SIZE 64
+ 
+ /* We use "unsigned char" as default.  */
+ #define DEFAULT_SIGNED_CHAR 0
+@@ -334,28 +347,34 @@ if (INTEGRAL_MODE_P (MODE) &&	        	 
+    Floating point modes <= word size fit into any FPR or GPR.
+    Floating point modes > word size (i.e. DFmode on 32-bit) fit
+    into any FPR, or an even-odd GPR pair.
++   TFmode fits only into an even-odd FPR pair.
+ 
+    Complex floating point modes fit either into two FPRs, or into
+    successive GPRs (again starting with an even number).
++   TCmode fits only into two successive even-odd FPR pairs.
+ 
+    Condition code modes fit only into the CC register.  */
+ 
+ #define HARD_REGNO_NREGS(REGNO, MODE)                           \
+   (FP_REGNO_P(REGNO)?                                           \
+-    (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ? 2 : 1) :      \
++   (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ?                \
++    2 * ((GET_MODE_SIZE(MODE) / 2 + 8 - 1) / 8) : 		\
++    ((GET_MODE_SIZE(MODE) + 8 - 1) / 8)) :			\
+    GENERAL_REGNO_P(REGNO)?                                      \
+     ((GET_MODE_SIZE(MODE)+UNITS_PER_WORD-1) / UNITS_PER_WORD) : \
+    ACCESS_REGNO_P(REGNO)?					\
+-    ((GET_MODE_SIZE(MODE)+4-1) / 4) : 				\
++    ((GET_MODE_SIZE(MODE) + 4 - 1) / 4) : 			\
+    1)
+ 
+ #define HARD_REGNO_MODE_OK(REGNO, MODE)                             \
+   (FP_REGNO_P(REGNO)?                                               \
+-   ((MODE) == SImode || (MODE) == DImode ||                         \
+-    GET_MODE_CLASS(MODE) == MODE_FLOAT ||                           \
+-    GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT) :                   \
++   (((MODE) == SImode || (MODE) == DImode                           \
++     || GET_MODE_CLASS(MODE) == MODE_FLOAT                          \
++     || GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT)                 \
++    && (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1))) :    \
+    GENERAL_REGNO_P(REGNO)?                                          \
+-    (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1)) :        \
++   ((HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1))	    \
++    && (((MODE) != TFmode && (MODE) != TCmode) || TARGET_64BIT)) :  \
+    CC_REGNO_P(REGNO)?                                               \
+      GET_MODE_CLASS (MODE) == MODE_CC :                             \
+    FRAME_REGNO_P(REGNO)?                                            \
+@@ -376,7 +395,9 @@ if (INTEGRAL_MODE_P (MODE) &&	        	 
+    in a register of class CLASS.  */
+ #define CLASS_MAX_NREGS(CLASS, MODE)   					\
+      ((CLASS) == FP_REGS ? 						\
+-      (GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT ? 2 : 1) :  		\
++      (GET_MODE_CLASS(MODE) == MODE_COMPLEX_FLOAT ?                     \
++       2 * (GET_MODE_SIZE (MODE) / 2 + 8 - 1) / 8 :		        \
++       (GET_MODE_SIZE (MODE) + 8 - 1) / 8) :				\
+       (CLASS) == ACCESS_REGS ?						\
+       (GET_MODE_SIZE (MODE) + 4 - 1) / 4 :				\
+       (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+@@ -386,10 +407,11 @@ if (INTEGRAL_MODE_P (MODE) &&	        	 
+    cannot use SUBREGs to switch between modes in FP registers.
+    Likewise for access registers, since they have only half the
+    word size on 64-bit.  */
+-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+-  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			\
+-   ? reg_classes_intersect_p (FP_REGS, CLASS)			\
+-     || reg_classes_intersect_p (ACCESS_REGS, CLASS) : 0)
++#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		        \
++  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			        \
++   ? ((reg_classes_intersect_p (FP_REGS, CLASS)				\
++       && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8))		\
++      || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0)
+ 
+ /* Register classes.  */
+ 
+--- gcc/doc/invoke.texi.jj	2006-01-31 14:38:56.000000000 +0100
++++ gcc/doc/invoke.texi	2006-01-31 15:07:03.000000000 +0100
+@@ -666,8 +666,8 @@ See RS/6000 and PowerPC Options.
+ 
+ @emph{S/390 and zSeries Options}
+ @gccoptlist{-mtune=@var{cpu-type}  -march=@var{cpu-type} @gol
+--mhard-float  -msoft-float  -mbackchain  -mno-backchain @gol
+--mpacked-stack  -mno-packed-stack @gol
++-mhard-float  -msoft-float -mlong-double-64 -mlong-double-128 @gol
++-mbackchain  -mno-backchain -mpacked-stack  -mno-packed-stack @gol
+ -msmall-exec  -mno-small-exec  -mmvcle -mno-mvcle @gol
+ -m64  -m31  -mdebug  -mno-debug  -mesa  -mzarch @gol
+ -mtpf-trace -mno-tpf-trace  -mfused-madd  -mno-fused-madd @gol
+@@ -11585,6 +11585,14 @@ functions in @file{libgcc.a} will be use
+ operations.  When @option{-mhard-float} is specified, the compiler
+ generates IEEE floating-point instructions.  This is the default.
+ 
++ at item -mlong-double-64
++ at itemx -mlong-double-128
++ at opindex mlong-double-64
++ at opindex mlong-double-128
++These switches control the size of @code{long double} type. A size
++of 64bit makes the @code{long double} type equivalent to the @code{double}
++type. This is the default.
++
+ @item -mbackchain
+ @itemx -mno-backchain
+ @opindex mbackchain


Index: gcc41.spec
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/gcc41.spec,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- gcc41.spec	28 Jan 2006 10:00:01 -0000	1.19
+++ gcc41.spec	31 Jan 2006 14:29:33 -0000	1.20
@@ -1,6 +1,6 @@
-%define DATE 20060128
+%define DATE 20060131
 %define gcc_version 4.1.0
-%define gcc_release 0.17
+%define gcc_release 0.18
 %define _unpackaged_files_terminate_build 0
 %define multilib_64_archs sparc64 ppc64 s390x x86_64
 %ifarch %{ix86} x86_64 ia64
@@ -96,6 +96,9 @@
 Patch15: gcc41-ppc32-retaddr.patch
 Patch16: gcc41-ppc32-ldbl.patch
 Patch17: gcc41-s390-ldbl.patch
+Patch18: gcc41-x86-mtune-generic1.patch
+Patch19: gcc41-x86-mtune-generic2.patch
+Patch20: gcc41-x86-mtune-generic3.patch
 
 %define _gnu %{nil}
 %ifarch sparc
@@ -443,6 +446,9 @@
 %patch15 -p0 -b .ppc32-retaddr~
 %patch16 -p0 -b .ppc32-ldbl~
 %patch17 -p0 -b .s390-ldbl~
+%patch18 -p0 -b .x86-mtune-generic1~
+%patch19 -p0 -b .x86-mtune-generic2~
+%patch20 -p0 -b .x86-mtune-generic3~
 
 sed -i -e 's/4\.1\.0/4.1.0/' gcc/BASE-VER gcc/version.c
 sed -i -e 's/" (Red Hat[^)]*)"/" (Red Hat %{version}-%{gcc_release})"/' gcc/version.c
@@ -547,6 +553,9 @@
 %ifarch ppc
 	--host=%{gcc_target_platform} --build=%{gcc_target_platform} --target=%{gcc_target_platform} --with-cpu=default32
 %endif
+%ifarch %{ix86} x86_64
+	--with-cpu=generic \
+%endif
 %ifnarch sparc ppc
 	--host=%{gcc_target_platform}
 %endif
@@ -1528,6 +1537,20 @@
 %endif
 
 %changelog
+* Tue Jan 31 2006 Jakub Jelinek <jakub at redhat.com> 4.1.0-0.18
+- update from gcc-4_1-branch (-r110317:110433)
+  - PRs c++/25855, c++/25999, fortran/17911, fortran/18578, fortran/18579,
+	fortran/20857, fortran/20885, fortran/20895, fortran/25030,
+	fortran/25835, fortran/25951, java/21428, libgfortran/25835,
+	target/14798, target/25706, target/25718, target/25947,
+	target/26018, testsuite/25318
+- add -mtune=generic support for i?86 and x86_64 (Jan Hubicka, H.J. Lu,
+  Evandro Menezes)
+- use -mtune=generic by default if neither -march= nor -mtune= is specified
+  on command line on i?86 or x86_64
+- updated s390{,x} long double patch, fixing ICEs on s390x glibc build
+  (Andreas Krebbel, Ulrich Weigand)
+
 * Sat Jan 28 2006 Jakub Jelinek <jakub at redhat.com> 4.1.0-0.17
 - update from gcc-4_1-branch (-r110062:110317)
   - PRs ada/20548, ada/21317, bootstrap/25859, c++/25552, c++/25856,


Index: sources
===================================================================
RCS file: /cvs/dist/rpms/gcc/devel/sources,v
retrieving revision 1.130
retrieving revision 1.131
diff -u -r1.130 -r1.131
--- sources	28 Jan 2006 10:00:01 -0000	1.130
+++ sources	31 Jan 2006 14:29:33 -0000	1.131
@@ -1 +1 @@
-dc997a74f17dccebeb7145a1849573b4  gcc-4.1.0-20060128.tar.bz2
+7a1b6cd7661918ede937ff3b157f2788  gcc-4.1.0-20060131.tar.bz2