rpms/kernel/devel die-floppy-die.patch, NONE, 1.1.2.2 drm-hush-vblank-warning.patch, NONE, 1.1.2.2 drm-radeon-fixes.patch, NONE, 1.1.2.2 linux-2.6-block-silently-error-unsupported-empty-barriers-too.patch, NONE, 1.1.2.2 linux-2.6-ksm-updates.patch, NONE, 1.1.2.2 config-generic, 1.238.6.32, 1.238.6.33 config-x86-generic, 1.68.6.15, 1.68.6.16 drm-r600-kms.patch, 1.1.2.2, 1.1.2.3 kernel.spec, 1.1294.2.44, 1.1294.2.45 linux-2.6-ppc-perfctr-oops-fix.patch, 1.2.2.2, 1.2.2.3 linux-2.6-vga-arb.patch, 1.6.2.2, 1.6.2.3 xen.pvops.patch, 1.1.2.29, 1.1.2.30 xen.pvops.post.patch, 1.1.2.18, 1.1.2.19 xen.pvops.pre.patch, 1.1.2.14, 1.1.2.15

myoung myoung at fedoraproject.org
Sat Aug 8 18:06:14 UTC 2009


Author: myoung

Update of /cvs/pkgs/rpms/kernel/devel
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv10194

Modified Files:
      Tag: private-myoung-dom0-branch
	config-generic config-x86-generic drm-r600-kms.patch 
	kernel.spec linux-2.6-ppc-perfctr-oops-fix.patch 
	linux-2.6-vga-arb.patch xen.pvops.patch xen.pvops.post.patch 
	xen.pvops.pre.patch 
Added Files:
      Tag: private-myoung-dom0-branch
	die-floppy-die.patch drm-hush-vblank-warning.patch 
	drm-radeon-fixes.patch 
	linux-2.6-block-silently-error-unsupported-empty-barriers-too.patch 
	linux-2.6-ksm-updates.patch 
Log Message:
update pvops patch to latest rebase/master and current rawhide


die-floppy-die.patch:
 floppy.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- NEW FILE die-floppy-die.patch ---
Kill the floppy.ko pnp modalias. We were surviving just fine without
autoloading floppy drivers, tyvm.

Please feel free to register all complaints in the wastepaper bin.

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 91b7530..2ea84a6 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4631,7 +4631,7 @@ static const struct pnp_device_id floppy_pnpids[] = {
 	{ "PNP0700", 0 },
 	{ }
 };
-MODULE_DEVICE_TABLE(pnp, floppy_pnpids);
+/* MODULE_DEVICE_TABLE(pnp, floppy_pnpids); */
 
 #else
 

drm-hush-vblank-warning.patch:
 drm_irq.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- NEW FILE drm-hush-vblank-warning.patch ---
diff -up linux-2.6.30.noarch/drivers/gpu/drm/drm_irq.c.jx linux-2.6.30.noarch/drivers/gpu/drm/drm_irq.c
--- linux-2.6.30.noarch/drivers/gpu/drm/drm_irq.c.jx	2009-08-06 11:09:29.000000000 -0400
+++ linux-2.6.30.noarch/drivers/gpu/drm/drm_irq.c	2009-08-06 14:44:34.000000000 -0400
@@ -634,7 +634,7 @@ int drm_wait_vblank(struct drm_device *d
 
 	ret = drm_vblank_get(dev, crtc);
 	if (ret) {
-		DRM_ERROR("failed to acquire vblank counter, %d\n", ret);
+		DRM_DEBUG("failed to acquire vblank counter, %d\n", ret);
 		return ret;
 	}
 	seq = drm_vblank_count(dev, crtc);

drm-radeon-fixes.patch:
 drivers/gpu/drm/radeon/r600_cp.c       |   22 +++++++++++++++-------
 drivers/gpu/drm/radeon/radeon_device.c |    4 +++-
 drivers/gpu/drm/radeon/radeon_drv.h    |    1 +
 drivers/gpu/drm/radeon/radeon_object.c |    2 +-
 include/drm/drm_pciids.h               |    5 +++++
 5 files changed, 25 insertions(+), 9 deletions(-)

--- NEW FILE drm-radeon-fixes.patch ---
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 146f357..20f1790 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -384,8 +384,9 @@ static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
 		DRM_INFO("Loading RV670 PFP Microcode\n");
 		for (i = 0; i < PFP_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV670_pfp_microcode[i]);
-	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
-		DRM_INFO("Loading RS780 CP Microcode\n");
+	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
+		DRM_INFO("Loading RS780/RS880 CP Microcode\n");
 		for (i = 0; i < PM4_UCODE_SIZE; i++) {
 			RADEON_WRITE(R600_CP_ME_RAM_DATA,
 				     RS780_cp_microcode[i][0]);
@@ -396,7 +397,7 @@ static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
 		}
 
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
-		DRM_INFO("Loading RS780 PFP Microcode\n");
+		DRM_INFO("Loading RS780/RS880 PFP Microcode\n");
 		for (i = 0; i < PFP_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RS780_pfp_microcode[i]);
 	}
@@ -783,6 +784,7 @@ static void r600_gfx_init(struct drm_device *dev,
 		break;
 	case CHIP_RV610:
 	case CHIP_RS780:
+	case CHIP_RS880:
 	case CHIP_RV620:
 		dev_priv->r600_max_pipes = 1;
 		dev_priv->r600_max_tile_pipes = 1;
@@ -917,7 +919,8 @@ static void r600_gfx_init(struct drm_device *dev,
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
 		RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
 	else
 		RADEON_WRITE(R600_DB_DEBUG, 0);
@@ -935,7 +938,8 @@ static void r600_gfx_init(struct drm_device *dev,
 	sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
 		sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
 				    R600_FETCH_FIFO_HIWATER(0xa) |
 				    R600_DONE_FIFO_HIWATER(0xe0) |
@@ -978,7 +982,8 @@ static void r600_gfx_init(struct drm_device *dev,
 					    R600_NUM_ES_STACK_ENTRIES(0));
 	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
+		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
 		/* no vertex cache */
 		sq_config &= ~R600_VC_ENABLE;
 
@@ -1035,7 +1040,8 @@ static void r600_gfx_init(struct drm_device *dev,
 
 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
 		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
 	else
 		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
@@ -1078,6 +1084,7 @@ static void r600_gfx_init(struct drm_device *dev,
 		break;
 	case CHIP_RV610:
 	case CHIP_RS780:
+	case CHIP_RS880:
 	case CHIP_RV620:
 		gs_prim_buffer_depth = 32;
 		break;
@@ -1123,6 +1130,7 @@ static void r600_gfx_init(struct drm_device *dev,
 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
 	case CHIP_RV610:
 	case CHIP_RS780:
+	case CHIP_RS880:
 	case CHIP_RV620:
 		tc_cntl = R600_TC_L2_SIZE(8);
 		break;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index a162ade..9ff6dcb 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -152,7 +152,9 @@ int radeon_mc_setup(struct radeon_device *rdev)
 		}
 	} else {
 		rdev->mc.vram_location = 0;
-		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		tmp = rdev->mc.mc_vram_size;
+		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
+		rdev->mc.gtt_location = tmp;
 	}
 	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20);
 	DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 127d045..3933f82 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -143,6 +143,7 @@ enum radeon_family {
 	CHIP_RV635,
 	CHIP_RV670,
 	CHIP_RS780,
+	CHIP_RS880,
 	CHIP_RV770,
 	CHIP_RV730,
 	CHIP_RV710,
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index dd9ac2f..e98cae3 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -106,7 +106,7 @@ static inline uint32_t radeon_object_flags_from_domain(uint32_t domain)
 		flags |= TTM_PL_FLAG_VRAM | TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED;
 	}
 	if (domain & RADEON_GEM_DOMAIN_GTT) {
-		flags |= TTM_PL_FLAG_TT | TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED;
+		flags |= TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
 	}
 	if (domain & RADEON_GEM_DOMAIN_CPU) {
 		flags |= TTM_PL_FLAG_SYSTEM | TTM_PL_MASK_CACHING;
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 9d4c004..8535084 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -370,6 +370,11 @@
 	{0x1002, 0x9614, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9615, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9616, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9710, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9711, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9712, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9713, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9714, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0, 0, 0}
 
 #define r128_PCI_IDS \

linux-2.6-block-silently-error-unsupported-empty-barriers-too.patch:
 blk-core.c |    4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

--- NEW FILE linux-2.6-block-silently-error-unsupported-empty-barriers-too.patch ---
From: Mark McLoughlin <markmc at redhat.com>
Subject: [PATCH] block: silently error unsupported empty barriers too

With 2.6.31-rc5 in a KVM guest using dm and virtio_blk, we see the
following errors:

  end_request: I/O error, dev vda, sector 0
  end_request: I/O error, dev vda, sector 0

The errors go away if dm stops submitting empty barriers, by reverting:

  commit 52b1fd5a27c625c78373e024bf570af3c9d44a79
  Author: Mikulas Patocka <mpatocka at redhat.com>
    dm: send empty barriers to targets in dm_flush

We should error all barriers, even empty barriers, on devices like
virtio_blk which don't support them.

See also:

  https://bugzilla.redhat.com/514901

Signed-off-by: Mark McLoughlin <markmc at redhat.com>
Cc: Rusty Russell <rusty at rustcorp.com.au>
Cc: Mikulas Patocka <mpatocka at redhat.com>
Cc: Alasdair G Kergon <agk at redhat.com>
Cc: Neil Brown <neilb at suse.de>
---
 block/blk-core.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index e3299a7..35ad2bb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1163,8 +1163,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 	const int unplug = bio_unplug(bio);
 	int rw_flags;
 
-	if (bio_barrier(bio) && bio_has_data(bio) &&
-	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
+	if (bio_barrier(bio) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
-- 
1.6.4


linux-2.6-ksm-updates.patch:
 Documentation/vm/00-INDEX |    2 
 Documentation/vm/ksm.txt  |   89 ++++++
 include/linux/ksm.h       |   31 +-
 kernel/fork.c             |    1 
 mm/Kconfig                |    2 
 mm/ksm.c                  |  665 ++++++++++++++++++++++++++++------------------
 mm/memory.c               |    5 
 mm/mmap.c                 |   16 -
 8 files changed, 539 insertions(+), 272 deletions(-)

--- NEW FILE linux-2.6-ksm-updates.patch ---
Date: 	Mon, 3 Aug 2009 13:10:02 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 1/12] ksm: rename kernel_pages_allocated

We're not implementing swapping of KSM pages in its first release;
but when that follows, "kernel_pages_allocated" will be a very poor
name for the sysfs file showing number of nodes in the stable tree:
rename that to "pages_shared" throughout.

But we already have a "pages_shared", counting those page slots
sharing the shared pages: first rename that to... "pages_sharing".

What will become of "max_kernel_pages" when the pages shared can
be swapped?  I guess it will just be removed, so keep that name.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |   57 ++++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 29 deletions(-)

--- ksm0/mm/ksm.c	2009-08-01 05:02:09.000000000 +0100
+++ ksm1/mm/ksm.c	2009-08-02 13:49:36.000000000 +0100
@@ -150,10 +150,10 @@ static struct kmem_cache *rmap_item_cach
 static struct kmem_cache *mm_slot_cache;
 
 /* The number of nodes in the stable tree */
-static unsigned long ksm_kernel_pages_allocated;
+static unsigned long ksm_pages_shared;
 
 /* The number of page slots sharing those nodes */
-static unsigned long ksm_pages_shared;
+static unsigned long ksm_pages_sharing;
 
 /* Limit on the number of unswappable pages used */
 static unsigned long ksm_max_kernel_pages;
@@ -384,7 +384,7 @@ static void remove_rmap_item_from_tree(s
 				next_item->address |= NODE_FLAG;
 			} else {
 				rb_erase(&rmap_item->node, &root_stable_tree);
-				ksm_kernel_pages_allocated--;
+				ksm_pages_shared--;
 			}
 		} else {
 			struct rmap_item *prev_item = rmap_item->prev;
@@ -398,7 +398,7 @@ static void remove_rmap_item_from_tree(s
 		}
 
 		rmap_item->next = NULL;
-		ksm_pages_shared--;
+		ksm_pages_sharing--;
 
 	} else if (rmap_item->address & NODE_FLAG) {
 		unsigned char age;
@@ -748,7 +748,7 @@ static int try_to_merge_two_pages(struct
 	 * is the number of kernel pages that we hold.
 	 */
 	if (ksm_max_kernel_pages &&
-	    ksm_max_kernel_pages <= ksm_kernel_pages_allocated)
+	    ksm_max_kernel_pages <= ksm_pages_shared)
 		return err;
 
 	kpage = alloc_page(GFP_HIGHUSER);
@@ -787,7 +787,7 @@ static int try_to_merge_two_pages(struct
 		if (err)
 			break_cow(mm1, addr1);
 		else
-			ksm_pages_shared += 2;
+			ksm_pages_sharing += 2;
 	}
 
 	put_page(kpage);
@@ -817,7 +817,7 @@ static int try_to_merge_with_ksm_page(st
 	up_read(&mm1->mmap_sem);
 
 	if (!err)
-		ksm_pages_shared++;
+		ksm_pages_sharing++;
 
 	return err;
 }
@@ -935,7 +935,7 @@ static struct rmap_item *stable_tree_ins
 		}
 	}
 
-	ksm_kernel_pages_allocated++;
+	ksm_pages_shared++;
 
 	rmap_item->address |= NODE_FLAG | STABLE_FLAG;
 	rmap_item->next = NULL;
@@ -1051,7 +1051,7 @@ static void cmp_and_merge_page(struct pa
 	tree_rmap_item = stable_tree_search(page, page2, rmap_item);
 	if (tree_rmap_item) {
 		if (page == page2[0]) {			/* forked */
-			ksm_pages_shared++;
+			ksm_pages_sharing++;
 			err = 0;
 		} else
 			err = try_to_merge_with_ksm_page(rmap_item->mm,
@@ -1114,7 +1114,7 @@ static void cmp_and_merge_page(struct pa
 				break_cow(tree_rmap_item->mm,
 						tree_rmap_item->address);
 				break_cow(rmap_item->mm, rmap_item->address);
-				ksm_pages_shared -= 2;
+				ksm_pages_sharing -= 2;
 			}
 		}
 
@@ -1430,7 +1430,7 @@ static ssize_t run_store(struct kobject
 	/*
 	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
 	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
-	 * breaking COW to free the kernel_pages_allocated (but leaves
+	 * breaking COW to free the unswappable pages_shared (but leaves
 	 * mm_slots on the list for when ksmd may be set running again).
 	 */
 
@@ -1449,22 +1449,6 @@ static ssize_t run_store(struct kobject
 }
 KSM_ATTR(run);
 
-static ssize_t pages_shared_show(struct kobject *kobj,
-				 struct kobj_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%lu\n",
-			ksm_pages_shared - ksm_kernel_pages_allocated);
-}
-KSM_ATTR_RO(pages_shared);
-
-static ssize_t kernel_pages_allocated_show(struct kobject *kobj,
-					   struct kobj_attribute *attr,
-					   char *buf)
-{
-	return sprintf(buf, "%lu\n", ksm_kernel_pages_allocated);
-}
-KSM_ATTR_RO(kernel_pages_allocated);
-
 static ssize_t max_kernel_pages_store(struct kobject *kobj,
 				      struct kobj_attribute *attr,
 				      const char *buf, size_t count)
@@ -1488,13 +1472,28 @@ static ssize_t max_kernel_pages_show(str
 }
 KSM_ATTR(max_kernel_pages);
 
+static ssize_t pages_shared_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", ksm_pages_shared);
+}
+KSM_ATTR_RO(pages_shared);
+
+static ssize_t pages_sharing_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n",
+			ksm_pages_sharing - ksm_pages_shared);
+}
+KSM_ATTR_RO(pages_sharing);
+
 static struct attribute *ksm_attrs[] = {
 	&sleep_millisecs_attr.attr,
 	&pages_to_scan_attr.attr,
 	&run_attr.attr,
-	&pages_shared_attr.attr,
-	&kernel_pages_allocated_attr.attr,
 	&max_kernel_pages_attr.attr,
+	&pages_shared_attr.attr,
+	&pages_sharing_attr.attr,
 	NULL,
 };
 
--
Date: 	Mon, 3 Aug 2009 13:11:00 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 2/12] ksm: move pages_sharing updates

The pages_shared count is incremented and decremented when adding a node
to and removing a node from the stable tree: easy to understand.  But the
pages_sharing count was hard to follow, being adjusted in various places:
increment and decrement it when adding to and removing from the stable tree.

And the pages_sharing variable used to include the pages_shared, then those
were subtracted when shown in the pages_sharing sysfs file: now keep it as
an exclusive count of leaves hanging off the stable tree nodes, throughout.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |   24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

--- ksm1/mm/ksm.c	2009-08-02 13:49:36.000000000 +0100
+++ ksm2/mm/ksm.c	2009-08-02 13:49:43.000000000 +0100
@@ -152,7 +152,7 @@ static struct kmem_cache *mm_slot_cache;
 /* The number of nodes in the stable tree */
 static unsigned long ksm_pages_shared;
 
-/* The number of page slots sharing those nodes */
+/* The number of page slots additionally sharing those nodes */
 static unsigned long ksm_pages_sharing;
 
 /* Limit on the number of unswappable pages used */
@@ -382,6 +382,7 @@ static void remove_rmap_item_from_tree(s
 						&next_item->node,
 						&root_stable_tree);
 				next_item->address |= NODE_FLAG;
+				ksm_pages_sharing--;
 			} else {
 				rb_erase(&rmap_item->node, &root_stable_tree);
 				ksm_pages_shared--;
@@ -395,10 +396,10 @@ static void remove_rmap_item_from_tree(s
 				BUG_ON(next_item->prev != rmap_item);
 				next_item->prev = rmap_item->prev;
 			}
+			ksm_pages_sharing--;
 		}
 
 		rmap_item->next = NULL;
-		ksm_pages_sharing--;
 
 	} else if (rmap_item->address & NODE_FLAG) {
 		unsigned char age;
@@ -786,8 +787,6 @@ static int try_to_merge_two_pages(struct
 		 */
 		if (err)
 			break_cow(mm1, addr1);
-		else
-			ksm_pages_sharing += 2;
 	}
 
 	put_page(kpage);
@@ -816,9 +815,6 @@ static int try_to_merge_with_ksm_page(st
 	err = try_to_merge_one_page(vma, page1, kpage);
 	up_read(&mm1->mmap_sem);
 
-	if (!err)
-		ksm_pages_sharing++;
-
 	return err;
 }
 
@@ -935,13 +931,12 @@ static struct rmap_item *stable_tree_ins
 		}
 	}
 
-	ksm_pages_shared++;
-
 	rmap_item->address |= NODE_FLAG | STABLE_FLAG;
 	rmap_item->next = NULL;
 	rb_link_node(&rmap_item->node, parent, new);
 	rb_insert_color(&rmap_item->node, &root_stable_tree);
 
+	ksm_pages_shared++;
 	return rmap_item;
 }
 
@@ -1026,6 +1021,8 @@ static void stable_tree_append(struct rm
 
 	tree_rmap_item->next = rmap_item;
 	rmap_item->address |= STABLE_FLAG;
+
+	ksm_pages_sharing++;
 }
 
 /*
@@ -1050,10 +1047,9 @@ static void cmp_and_merge_page(struct pa
 	/* We first start with searching the page inside the stable tree */
 	tree_rmap_item = stable_tree_search(page, page2, rmap_item);
 	if (tree_rmap_item) {
-		if (page == page2[0]) {			/* forked */
-			ksm_pages_sharing++;
+		if (page == page2[0])			/* forked */
 			err = 0;
-		} else
+		else
 			err = try_to_merge_with_ksm_page(rmap_item->mm,
 							 rmap_item->address,
 							 page, page2[0]);
@@ -1114,7 +1110,6 @@ static void cmp_and_merge_page(struct pa
 				break_cow(tree_rmap_item->mm,
 						tree_rmap_item->address);
 				break_cow(rmap_item->mm, rmap_item->address);
-				ksm_pages_sharing -= 2;
 			}
 		}
 
@@ -1482,8 +1477,7 @@ KSM_ATTR_RO(pages_shared);
 static ssize_t pages_sharing_show(struct kobject *kobj,
 				  struct kobj_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%lu\n",
-			ksm_pages_sharing - ksm_pages_shared);
+	return sprintf(buf, "%lu\n", ksm_pages_sharing);
 }
 KSM_ATTR_RO(pages_sharing);
 
--
Date: 	Mon, 3 Aug 2009 13:11:53 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 3/12] ksm: pages_unshared and pages_volatile

The pages_shared and pages_sharing counts give a good picture of how
successful KSM is at sharing; but no clue to how much wasted work it's
doing to get there.  Add pages_unshared (count of unique pages waiting
in the unstable tree, hoping to find a mate) and pages_volatile.

pages_volatile is harder to define.  It includes those pages changing
too fast to get into the unstable tree, but also whatever other edge
conditions prevent a page getting into the trees: a high value may
deserve investigation.  Don't try to calculate it from the various
conditions: it's the total of rmap_items less those accounted for.

Also show full_scans: the number of completed scans of everything
registered in the mm list.

The locking for all these counts is simply ksm_thread_mutex.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |   52 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

--- ksm2/mm/ksm.c	2009-08-02 13:49:43.000000000 +0100
+++ ksm3/mm/ksm.c	2009-08-02 13:49:51.000000000 +0100
@@ -155,6 +155,12 @@ static unsigned long ksm_pages_shared;
 /* The number of page slots additionally sharing those nodes */
 static unsigned long ksm_pages_sharing;
 
+/* The number of nodes in the unstable tree */
+static unsigned long ksm_pages_unshared;
+
+/* The number of rmap_items in use: to calculate pages_volatile */
+static unsigned long ksm_rmap_items;
+
 /* Limit on the number of unswappable pages used */
 static unsigned long ksm_max_kernel_pages;
 
@@ -204,11 +210,17 @@ static void __init ksm_slab_free(void)
 
 static inline struct rmap_item *alloc_rmap_item(void)
 {
-	return kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+	struct rmap_item *rmap_item;
+
+	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+	if (rmap_item)
+		ksm_rmap_items++;
+	return rmap_item;
 }
 
 static inline void free_rmap_item(struct rmap_item *rmap_item)
 {
+	ksm_rmap_items--;
 	rmap_item->mm = NULL;	/* debug safety */
 	kmem_cache_free(rmap_item_cache, rmap_item);
 }
@@ -419,6 +431,7 @@ static void remove_rmap_item_from_tree(s
 		BUG_ON(age > 2);
 		if (!age)
 			rb_erase(&rmap_item->node, &root_unstable_tree);
+		ksm_pages_unshared--;
 	}
 
 	rmap_item->address &= PAGE_MASK;
@@ -1002,6 +1015,7 @@ static struct rmap_item *unstable_tree_s
 	rb_link_node(&rmap_item->node, parent, new);
 	rb_insert_color(&rmap_item->node, &root_unstable_tree);
 
+	ksm_pages_unshared++;
 	return NULL;
 }
 
@@ -1098,6 +1112,8 @@ static void cmp_and_merge_page(struct pa
 		if (!err) {
 			rb_erase(&tree_rmap_item->node, &root_unstable_tree);
 			tree_rmap_item->address &= ~NODE_FLAG;
+			ksm_pages_unshared--;
+
 			/*
 			 * If we fail to insert the page into the stable tree,
 			 * we will have 2 virtual addresses that are pointing
@@ -1481,6 +1497,37 @@ static ssize_t pages_sharing_show(struct
 }
 KSM_ATTR_RO(pages_sharing);
 
+static ssize_t pages_unshared_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", ksm_pages_unshared);
+}
+KSM_ATTR_RO(pages_unshared);
+
+static ssize_t pages_volatile_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	long ksm_pages_volatile;
+
+	ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
+				- ksm_pages_sharing - ksm_pages_unshared;
+	/*
+	 * It was not worth any locking to calculate that statistic,
+	 * but it might therefore sometimes be negative: conceal that.
+	 */
+	if (ksm_pages_volatile < 0)
+		ksm_pages_volatile = 0;
+	return sprintf(buf, "%ld\n", ksm_pages_volatile);
+}
+KSM_ATTR_RO(pages_volatile);
+
+static ssize_t full_scans_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", ksm_scan.seqnr);
+}
+KSM_ATTR_RO(full_scans);
+
 static struct attribute *ksm_attrs[] = {
 	&sleep_millisecs_attr.attr,
 	&pages_to_scan_attr.attr,
@@ -1488,6 +1535,9 @@ static struct attribute *ksm_attrs[] = {
 	&max_kernel_pages_attr.attr,
 	&pages_shared_attr.attr,
 	&pages_sharing_attr.attr,
+	&pages_unshared_attr.attr,
+	&pages_volatile_attr.attr,
+	&full_scans_attr.attr,
 	NULL,
 };
 
--
Date: 	Mon, 3 Aug 2009 13:12:59 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 4/12] ksm: break cow once unshared

We kept agreeing not to bother about the unswappable shared KSM pages
which later become unshared by others: observation suggests they're not
a significant proportion.  But they are disadvantageous, and it is easier
to break COW to replace them by swappable pages, than offer statistics
to show that they don't matter; then we can stop worrying about them.

Doing this in ksm_do_scan, they don't go through cmp_and_merge_page on
this pass: give them a good chance of getting into the unstable tree
on the next pass, or back into the stable, by computing checksum now.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |    8 ++++++++
 1 file changed, 8 insertions(+)

--- ksm3/mm/ksm.c	2009-08-02 13:49:51.000000000 +0100
+++ ksm4/mm/ksm.c	2009-08-02 13:49:59.000000000 +0100
@@ -1275,6 +1275,14 @@ static void ksm_do_scan(unsigned int sca
 			return;
 		if (!PageKsm(page) || !in_stable_tree(rmap_item))
 			cmp_and_merge_page(page, rmap_item);
+		else if (page_mapcount(page) == 1) {
+			/*
+			 * Replace now-unshared ksm page by ordinary page.
+			 */
+			break_cow(rmap_item->mm, rmap_item->address);
+			remove_rmap_item_from_tree(rmap_item);
+			rmap_item->oldchecksum = calc_checksum(page);
+		}
 		put_page(page);
 	}
 }
--
Date: 	Mon, 3 Aug 2009 13:14:03 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 5/12] ksm: keep quiet while list empty

ksm_scan_thread already sleeps in wait_event_interruptible until setting
ksm_run activates it; but if there's nothing on its list to look at, i.e.
nobody has yet said madvise MADV_MERGEABLE, it's a shame to be clocking
up system time and full_scans: ksmd_should_run added to check that too.

And move the mutex_lock out around it: the new counts showed that when
ksm_run is stopped, a little work often got done afterwards, because it
had been read before taking the mutex.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |   28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

--- ksm4/mm/ksm.c	2009-08-02 13:49:59.000000000 +0100
+++ ksm5/mm/ksm.c	2009-08-02 13:50:07.000000000 +0100
@@ -1287,21 +1287,27 @@ static void ksm_do_scan(unsigned int sca
 	}
 }
 
+static int ksmd_should_run(void)
+{
+	return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
+}
+
 static int ksm_scan_thread(void *nothing)
 {
 	set_user_nice(current, 5);
 
 	while (!kthread_should_stop()) {
-		if (ksm_run & KSM_RUN_MERGE) {
-			mutex_lock(&ksm_thread_mutex);
+		mutex_lock(&ksm_thread_mutex);
+		if (ksmd_should_run())
 			ksm_do_scan(ksm_thread_pages_to_scan);
-			mutex_unlock(&ksm_thread_mutex);
+		mutex_unlock(&ksm_thread_mutex);
+
+		if (ksmd_should_run()) {
 			schedule_timeout_interruptible(
 				msecs_to_jiffies(ksm_thread_sleep_millisecs));
 		} else {
 			wait_event_interruptible(ksm_thread_wait,
-					(ksm_run & KSM_RUN_MERGE) ||
-					kthread_should_stop());
+				ksmd_should_run() || kthread_should_stop());
 		}
 	}
 	return 0;
@@ -1346,10 +1352,16 @@ int ksm_madvise(struct vm_area_struct *v
 
 int __ksm_enter(struct mm_struct *mm)
 {
-	struct mm_slot *mm_slot = alloc_mm_slot();
+	struct mm_slot *mm_slot;
+	int needs_wakeup;
+
+	mm_slot = alloc_mm_slot();
 	if (!mm_slot)
 		return -ENOMEM;
 
+	/* Check ksm_run too?  Would need tighter locking */
+	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
+
 	spin_lock(&ksm_mmlist_lock);
 	insert_to_mm_slots_hash(mm, mm_slot);
 	/*
@@ -1361,6 +1373,10 @@ int __ksm_enter(struct mm_struct *mm)
 	spin_unlock(&ksm_mmlist_lock);
 
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+
+	if (needs_wakeup)
+		wake_up_interruptible(&ksm_thread_wait);
+
 	return 0;
 }
 
--
Date: 	Mon, 3 Aug 2009 13:15:15 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 6/12] ksm: five little cleanups

1. We don't use __break_cow entry point now: merge it into break_cow.
2. remove_all_slot_rmap_items is just a special case of
   remove_trailing_rmap_items: use the latter instead.
3. Extend comment on unmerge_ksm_pages and rmap_items.
4. try_to_merge_two_pages should use try_to_merge_with_ksm_page
   instead of duplicating its code; and so swap them around.
5. Comment on cmp_and_merge_page described last year's: update it.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |  112 ++++++++++++++++++++---------------------------------
 1 file changed, 44 insertions(+), 68 deletions(-)

--- ksm5/mm/ksm.c	2009-08-02 13:50:07.000000000 +0100
+++ ksm6/mm/ksm.c	2009-08-02 13:50:15.000000000 +0100
@@ -315,22 +315,18 @@ static void break_ksm(struct vm_area_str
 	/* Which leaves us looping there if VM_FAULT_OOM: hmmm... */
 }
 
-static void __break_cow(struct mm_struct *mm, unsigned long addr)
+static void break_cow(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma;
 
+	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
-		return;
+		goto out;
 	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
-		return;
+		goto out;
 	break_ksm(vma, addr);
-}
-
-static void break_cow(struct mm_struct *mm, unsigned long addr)
-{
-	down_read(&mm->mmap_sem);
-	__break_cow(mm, addr);
+out:
 	up_read(&mm->mmap_sem);
 }
 
@@ -439,17 +435,6 @@ static void remove_rmap_item_from_tree(s
 	cond_resched();		/* we're called from many long loops */
 }
 
-static void remove_all_slot_rmap_items(struct mm_slot *mm_slot)
-{
-	struct rmap_item *rmap_item, *node;
-
-	list_for_each_entry_safe(rmap_item, node, &mm_slot->rmap_list, link) {
-		remove_rmap_item_from_tree(rmap_item);
-		list_del(&rmap_item->link);
-		free_rmap_item(rmap_item);
-	}
-}
-
 static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
 				       struct list_head *cur)
 {
@@ -471,6 +456,11 @@ static void remove_trailing_rmap_items(s
  * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
  * rmap_items from parent to child at fork time (so as not to waste time
  * if exit comes before the next scan reaches it).
+ *
+ * Similarly, although we'd like to remove rmap_items (so updating counts
+ * and freeing memory) when unmerging an area, it's easier to leave that
+ * to the next pass of ksmd - consider, for example, how ksmd might be
+ * in cmp_and_merge_page on one of the rmap_items we would be removing.
  */
 static void unmerge_ksm_pages(struct vm_area_struct *vma,
 			      unsigned long start, unsigned long end)
@@ -495,7 +485,7 @@ static void unmerge_and_remove_all_rmap_
 				continue;
 			unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
 		}
-		remove_all_slot_rmap_items(mm_slot);
+		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
 		up_read(&mm->mmap_sem);
 	}
 
@@ -533,7 +523,7 @@ static void remove_mm_from_lists(struct
 	list_del(&mm_slot->mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	remove_all_slot_rmap_items(mm_slot);
+	remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
 	free_mm_slot(mm_slot);
 	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
 }
@@ -740,6 +730,29 @@ out:
 }
 
 /*
+ * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
+ * but no new kernel page is allocated: kpage must already be a ksm page.
+ */
+static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
+				      unsigned long addr1,
+				      struct page *page1,
+				      struct page *kpage)
+{
+	struct vm_area_struct *vma;
+	int err = -EFAULT;
+
+	down_read(&mm1->mmap_sem);
+	vma = find_vma(mm1, addr1);
+	if (!vma || vma->vm_start > addr1)
+		goto out;
+
+	err = try_to_merge_one_page(vma, page1, kpage);
+out:
+	up_read(&mm1->mmap_sem);
+	return err;
+}
+
+/*
  * try_to_merge_two_pages - take two identical pages and prepare them
  * to be merged into one page.
  *
@@ -772,9 +785,8 @@ static int try_to_merge_two_pages(struct
 	down_read(&mm1->mmap_sem);
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1) {
-		put_page(kpage);
 		up_read(&mm1->mmap_sem);
-		return err;
+		goto out;
 	}
 
 	copy_user_highpage(kpage, page1, addr1, vma);
@@ -782,56 +794,20 @@ static int try_to_merge_two_pages(struct
 	up_read(&mm1->mmap_sem);
 
 	if (!err) {
-		down_read(&mm2->mmap_sem);
-		vma = find_vma(mm2, addr2);
-		if (!vma || vma->vm_start > addr2) {
-			put_page(kpage);
-			up_read(&mm2->mmap_sem);
-			break_cow(mm1, addr1);
-			return -EFAULT;
-		}
-
-		err = try_to_merge_one_page(vma, page2, kpage);
-		up_read(&mm2->mmap_sem);
-
+		err = try_to_merge_with_ksm_page(mm2, addr2, page2, kpage);
 		/*
-		 * If the second try_to_merge_one_page failed, we have a
-		 * ksm page with just one pte pointing to it, so break it.
+		 * If that fails, we have a ksm page with only one pte
+		 * pointing to it: so break it.
 		 */
 		if (err)
 			break_cow(mm1, addr1);
 	}
-
+out:
 	put_page(kpage);
 	return err;
 }
 
 /*
- * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
- * but no new kernel page is allocated: kpage must already be a ksm page.
- */
-static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
-				      unsigned long addr1,
-				      struct page *page1,
-				      struct page *kpage)
-{
-	struct vm_area_struct *vma;
-	int err = -EFAULT;
-
-	down_read(&mm1->mmap_sem);
-	vma = find_vma(mm1, addr1);
-	if (!vma || vma->vm_start > addr1) {
-		up_read(&mm1->mmap_sem);
-		return err;
-	}
-
-	err = try_to_merge_one_page(vma, page1, kpage);
-	up_read(&mm1->mmap_sem);
-
-	return err;
-}
-
-/*
  * stable_tree_search - search page inside the stable tree
  * @page: the page that we are searching identical pages to.
  * @page2: pointer into identical page that we are holding inside the stable
@@ -1040,10 +1016,10 @@ static void stable_tree_append(struct rm
 }
 
 /*
- * cmp_and_merge_page - take a page computes its hash value and check if there
- * is similar hash value to different page,
- * in case we find that there is similar hash to different page we call to
- * try_to_merge_two_pages().
+ * cmp_and_merge_page - first see if page can be merged into the stable tree;
+ * if not, compare checksum to previous and if it's the same, see if page can
+ * be inserted into the unstable tree, or merged with a page already there and
+ * both transferred to the stable tree.
  *
  * @page: the page that we are searching identical page to.
  * @rmap_item: the reverse mapping into the virtual address of this page
--
Date: 	Mon, 3 Aug 2009 13:16:15 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 7/12] ksm: fix endless loop on oom

break_ksm has been looping endlessly ignoring VM_FAULT_OOM: that should
only be a problem for ksmd when a memory control group imposes limits
(normally the OOM killer will kill others with an mm until it succeeds);
but in general (especially for MADV_UNMERGEABLE and KSM_RUN_UNMERGE) we
do need to route the error (or kill) back to the caller (or sighandling).

Test signal_pending in unmerge_ksm_pages, which could be a lengthy
procedure if it has to spill into swap: returning -ERESTARTSYS so that
trivial signals will restart but fatals will terminate (is that right?
we do different things in different places in mm, none exactly this).

unmerge_and_remove_all_rmap_items was forgetting to lock when going
down the mm_list: fix that.  Whether it's successful or not, reset
ksm_scan cursor to head; but only if it's successful, reset seqnr
(shown in full_scans) - page counts will have gone down to zero.

This patch leaves a significant OOM deadlock, but it's a good step
on the way, and that deadlock is fixed in a subsequent patch.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |  108 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 85 insertions(+), 23 deletions(-)

--- ksm6/mm/ksm.c	2009-08-02 13:50:15.000000000 +0100
+++ ksm7/mm/ksm.c	2009-08-02 13:50:25.000000000 +0100
@@ -294,10 +294,10 @@ static inline int in_stable_tree(struct
  * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
  * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
  */
-static void break_ksm(struct vm_area_struct *vma, unsigned long addr)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *page;
-	int ret;
+	int ret = 0;
 
 	do {
 		cond_resched();
@@ -310,9 +310,36 @@ static void break_ksm(struct vm_area_str
 		else
 			ret = VM_FAULT_WRITE;
 		put_page(page);
-	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS)));
-
-	/* Which leaves us looping there if VM_FAULT_OOM: hmmm... */
+	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
+	/*
+	 * We must loop because handle_mm_fault() may back out if there's
+	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
+	 *
+	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
+	 * COW has been broken, even if the vma does not permit VM_WRITE;
+	 * but note that a concurrent fault might break PageKsm for us.
+	 *
+	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
+	 * backing file, which also invalidates anonymous pages: that's
+	 * okay, that truncation will have unmapped the PageKsm for us.
+	 *
+	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
+	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
+	 * current task has TIF_MEMDIE set, and will be OOM killed on return
+	 * to user; and ksmd, having no mm, would never be chosen for that.
+	 *
+	 * But if the mm is in a limited mem_cgroup, then the fault may fail
+	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
+	 * even ksmd can fail in this way - though it's usually breaking ksm
+	 * just to undo a merge it made a moment before, so unlikely to oom.
+	 *
+	 * That's a pity: we might therefore have more kernel pages allocated
+	 * than we're counting as nodes in the stable tree; but ksm_do_scan
+	 * will retry to break_cow on each pass, so should recover the page
+	 * in due course.  The important thing is to not let VM_MERGEABLE
+	 * be cleared while any such pages might remain in the area.
+	 */
+	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
 }
 
 static void break_cow(struct mm_struct *mm, unsigned long addr)
@@ -462,39 +489,61 @@ static void remove_trailing_rmap_items(s
  * to the next pass of ksmd - consider, for example, how ksmd might be
  * in cmp_and_merge_page on one of the rmap_items we would be removing.
  */
-static void unmerge_ksm_pages(struct vm_area_struct *vma,
-			      unsigned long start, unsigned long end)
+static int unmerge_ksm_pages(struct vm_area_struct *vma,
+			     unsigned long start, unsigned long end)
 {
 	unsigned long addr;
+	int err = 0;
 
-	for (addr = start; addr < end; addr += PAGE_SIZE)
-		break_ksm(vma, addr);
+	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
+		if (signal_pending(current))
+			err = -ERESTARTSYS;
+		else
+			err = break_ksm(vma, addr);
+	}
+	return err;
 }
 
-static void unmerge_and_remove_all_rmap_items(void)
+static int unmerge_and_remove_all_rmap_items(void)
 {
 	struct mm_slot *mm_slot;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
+	int err = 0;
+
+	spin_lock(&ksm_mmlist_lock);
+	mm_slot = list_entry(ksm_mm_head.mm_list.next,
+						struct mm_slot, mm_list);
+	spin_unlock(&ksm_mmlist_lock);
 
-	list_for_each_entry(mm_slot, &ksm_mm_head.mm_list, mm_list) {
+	while (mm_slot != &ksm_mm_head) {
 		mm = mm_slot->mm;
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
 				continue;
-			unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+			err = unmerge_ksm_pages(vma,
+						vma->vm_start, vma->vm_end);
+			if (err) {
+				up_read(&mm->mmap_sem);
+				goto out;
+			}
 		}
 		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
 		up_read(&mm->mmap_sem);
+
+		spin_lock(&ksm_mmlist_lock);
+		mm_slot = list_entry(mm_slot->mm_list.next,
+						struct mm_slot, mm_list);
+		spin_unlock(&ksm_mmlist_lock);
 	}
 
+	ksm_scan.seqnr = 0;
+out:
 	spin_lock(&ksm_mmlist_lock);
-	if (ksm_scan.mm_slot != &ksm_mm_head) {
-		ksm_scan.mm_slot = &ksm_mm_head;
-		ksm_scan.seqnr++;
-	}
+	ksm_scan.mm_slot = &ksm_mm_head;
 	spin_unlock(&ksm_mmlist_lock);
+	return err;
 }
 
 static void remove_mm_from_lists(struct mm_struct *mm)
@@ -1058,6 +1107,8 @@ static void cmp_and_merge_page(struct pa
 	/*
 	 * A ksm page might have got here by fork, but its other
 	 * references have already been removed from the stable tree.
+	 * Or it might be left over from a break_ksm which failed
+	 * when the mem_cgroup had reached its limit: try again now.
 	 */
 	if (PageKsm(page))
 		break_cow(rmap_item->mm, rmap_item->address);
@@ -1293,6 +1344,7 @@ int ksm_madvise(struct vm_area_struct *v
 		unsigned long end, int advice, unsigned long *vm_flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	int err;
 
 	switch (advice) {
 	case MADV_MERGEABLE:
@@ -1305,9 +1357,11 @@ int ksm_madvise(struct vm_area_struct *v
 				 VM_MIXEDMAP  | VM_SAO))
 			return 0;		/* just ignore the advice */
 
-		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
-			if (__ksm_enter(mm) < 0)
-				return -EAGAIN;
+		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
+			err = __ksm_enter(mm);
+			if (err)
+				return err;
+		}
 
 		*vm_flags |= VM_MERGEABLE;
 		break;
@@ -1316,8 +1370,11 @@ int ksm_madvise(struct vm_area_struct *v
 		if (!(*vm_flags & VM_MERGEABLE))
 			return 0;		/* just ignore the advice */
 
-		if (vma->anon_vma)
-			unmerge_ksm_pages(vma, start, end);
+		if (vma->anon_vma) {
+			err = unmerge_ksm_pages(vma, start, end);
+			if (err)
+				return err;
+		}
 
 		*vm_flags &= ~VM_MERGEABLE;
 		break;
@@ -1448,8 +1505,13 @@ static ssize_t run_store(struct kobject
 	mutex_lock(&ksm_thread_mutex);
 	if (ksm_run != flags) {
 		ksm_run = flags;
-		if (flags & KSM_RUN_UNMERGE)
-			unmerge_and_remove_all_rmap_items();
+		if (flags & KSM_RUN_UNMERGE) {
+			err = unmerge_and_remove_all_rmap_items();
+			if (err) {
+				ksm_run = KSM_RUN_STOP;
+				count = err;
+			}
+		}
 	}
 	mutex_unlock(&ksm_thread_mutex);
 
--
Date: 	Mon, 3 Aug 2009 13:17:15 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 8/12] ksm: distribute remove_mm_from_lists

Do some housekeeping in ksm.c, to help make the next patch easier
to understand: remove the function remove_mm_from_lists, distributing
its code to its callsites scan_get_next_rmap_item and __ksm_exit.

That turns out to be a win in scan_get_next_rmap_item: move its
remove_trailing_rmap_items and cursor advancement up, and it becomes
simpler than before.  __ksm_exit becomes messier, but will change
again; and moving its remove_trailing_rmap_items up lets us strengthen
the unstable tree item's age condition in remove_rmap_item_from_tree.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |   97 ++++++++++++++++++++++-------------------------------
 1 file changed, 42 insertions(+), 55 deletions(-)

--- ksm7/mm/ksm.c	2009-08-02 13:50:25.000000000 +0100
+++ ksm8/mm/ksm.c	2009-08-02 13:50:32.000000000 +0100
@@ -444,14 +444,9 @@ static void remove_rmap_item_from_tree(s
 		 * But __ksm_exit has to be careful: do the rb_erase
 		 * if it's interrupting a scan, and this rmap_item was
 		 * inserted by this scan rather than left from before.
-		 *
-		 * Because of the case in which remove_mm_from_lists
-		 * increments seqnr before removing rmaps, unstable_nr
-		 * may even be 2 behind seqnr, but should never be
-		 * further behind.  Yes, I did have trouble with this!
 		 */
 		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
-		BUG_ON(age > 2);
+		BUG_ON(age > 1);
 		if (!age)
 			rb_erase(&rmap_item->node, &root_unstable_tree);
 		ksm_pages_unshared--;
@@ -546,37 +541,6 @@ out:
 	return err;
 }
 
-static void remove_mm_from_lists(struct mm_struct *mm)
-{
-	struct mm_slot *mm_slot;
-
-	spin_lock(&ksm_mmlist_lock);
-	mm_slot = get_mm_slot(mm);
-
-	/*
-	 * This mm_slot is always at the scanning cursor when we're
-	 * called from scan_get_next_rmap_item; but it's a special
-	 * case when we're called from __ksm_exit.
-	 */
-	if (ksm_scan.mm_slot == mm_slot) {
-		ksm_scan.mm_slot = list_entry(
-			mm_slot->mm_list.next, struct mm_slot, mm_list);
-		ksm_scan.address = 0;
-		ksm_scan.rmap_item = list_entry(
-			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
-		if (ksm_scan.mm_slot == &ksm_mm_head)
-			ksm_scan.seqnr++;
-	}
-
-	hlist_del(&mm_slot->link);
-	list_del(&mm_slot->mm_list);
-	spin_unlock(&ksm_mmlist_lock);
-
-	remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-	free_mm_slot(mm_slot);
-	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
-}
-
 static u32 calc_checksum(struct page *page)
 {
 	u32 checksum;
@@ -1248,33 +1212,31 @@ next_mm:
 		}
 	}
 
-	if (!ksm_scan.address) {
-		/*
-		 * We've completed a full scan of all vmas, holding mmap_sem
-		 * throughout, and found no VM_MERGEABLE: so do the same as
-		 * __ksm_exit does to remove this mm from all our lists now.
-		 */
-		remove_mm_from_lists(mm);
-		up_read(&mm->mmap_sem);
-		slot = ksm_scan.mm_slot;
-		if (slot != &ksm_mm_head)
-			goto next_mm;
-		return NULL;
-	}
-
 	/*
 	 * Nuke all the rmap_items that are above this current rmap:
 	 * because there were no VM_MERGEABLE vmas with such addresses.
 	 */
 	remove_trailing_rmap_items(slot, ksm_scan.rmap_item->link.next);
-	up_read(&mm->mmap_sem);
 
 	spin_lock(&ksm_mmlist_lock);
-	slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
-	ksm_scan.mm_slot = slot;
+	ksm_scan.mm_slot = list_entry(slot->mm_list.next,
+						struct mm_slot, mm_list);
+	if (ksm_scan.address == 0) {
+		/*
+		 * We've completed a full scan of all vmas, holding mmap_sem
+		 * throughout, and found no VM_MERGEABLE: so do the same as
+		 * __ksm_exit does to remove this mm from all our lists now.
+		 */
+		hlist_del(&slot->link);
+		list_del(&slot->mm_list);
+		free_mm_slot(slot);
+		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+	}
 	spin_unlock(&ksm_mmlist_lock);
+	up_read(&mm->mmap_sem);
 
 	/* Repeat until we've completed scanning the whole list */
+	slot = ksm_scan.mm_slot;
 	if (slot != &ksm_mm_head)
 		goto next_mm;
 
@@ -1415,13 +1377,38 @@ int __ksm_enter(struct mm_struct *mm)
 
 void __ksm_exit(struct mm_struct *mm)
 {
+	struct mm_slot *mm_slot;
+
 	/*
 	 * This process is exiting: doesn't hold and doesn't need mmap_sem;
 	 * but we do need to exclude ksmd and other exiters while we modify
 	 * the various lists and trees.
 	 */
 	mutex_lock(&ksm_thread_mutex);
-	remove_mm_from_lists(mm);
+	spin_lock(&ksm_mmlist_lock);
+	mm_slot = get_mm_slot(mm);
+	if (!list_empty(&mm_slot->rmap_list)) {
+		spin_unlock(&ksm_mmlist_lock);
+		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
+		spin_lock(&ksm_mmlist_lock);
+	}
+
+	if (ksm_scan.mm_slot == mm_slot) {
+		ksm_scan.mm_slot = list_entry(
+			mm_slot->mm_list.next, struct mm_slot, mm_list);
+		ksm_scan.address = 0;
+		ksm_scan.rmap_item = list_entry(
+			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
+		if (ksm_scan.mm_slot == &ksm_mm_head)
+			ksm_scan.seqnr++;
+	}
+
+	hlist_del(&mm_slot->link);
+	list_del(&mm_slot->mm_list);
+	spin_unlock(&ksm_mmlist_lock);
+
+	free_mm_slot(mm_slot);
+	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
 	mutex_unlock(&ksm_thread_mutex);
 }
 
--
Date: 	Mon, 3 Aug 2009 13:18:16 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 9/12] ksm: fix oom deadlock

There's a now-obvious deadlock in KSM's out-of-memory handling:
imagine ksmd or KSM_RUN_UNMERGE handling, holding ksm_thread_mutex,
trying to allocate a page to break KSM in an mm which becomes the
OOM victim (quite likely in the unmerge case): it's killed and goes
to exit, and hangs there waiting to acquire ksm_thread_mutex.

Clearly we must not require ksm_thread_mutex in __ksm_exit, simple
though that made everything else: perhaps use mmap_sem somehow?
And part of the answer lies in the comments on unmerge_ksm_pages:
__ksm_exit should also leave all the rmap_item removal to ksmd.

But there's a fundamental problem, that KSM relies upon mmap_sem to
guarantee the consistency of the mm it's dealing with, yet exit_mmap
tears down an mm without taking mmap_sem.  And bumping mm_users won't
help at all, that just ensures that the pages the OOM killer assumes
are on their way to being freed will not be freed.

The best answer seems to be, to move the ksm_exit callout from just
before exit_mmap, to the middle of exit_mmap: after the mm's pages
have been freed (if the mmu_gather is flushed), but before its page
tables and vma structures have been freed; and down_write,up_write
mmap_sem there to serialize with KSM's own reliance on mmap_sem.

But KSM then needs to be careful, whenever it downs mmap_sem, to
check that the mm is not already exiting: there's a danger of using
find_vma on a layout that's being torn apart, or writing into page
tables which have been freed for reuse; and even do_anonymous_page
and __do_fault need to check they're not being called by break_ksm
to reinstate a pte after zap_pte_range has zapped that page table.

Though it might be clearer to add an exiting flag, set while holding
mmap_sem in __ksm_exit, that wouldn't cover the issue of reinstating
a zapped pte.  All we need is to check whether mm_users is 0 - but
must remember that ksmd may detect that before __ksm_exit is reached.
So, ksm_test_exit(mm) added to comment such checks on mm->mm_users.

__ksm_exit now has to leave clearing up the rmap_items to ksmd,
that needs ksm_thread_mutex; but shift the exiting mm just after the
ksm_scan cursor so that it will soon be dealt with.  __ksm_enter raise
mm_count to hold the mm_struct, ksmd's exit processing (exactly like
its processing when it finds all VM_MERGEABLEs unmapped) mmdrop it,
similar procedure for KSM_RUN_UNMERGE (which has stopped ksmd).

But also give __ksm_exit a fast path: when there's no complication
(no rmap_items attached to mm and it's not at the ksm_scan cursor),
it can safely do all the exiting work itself.  This is not just an
optimization: when ksmd is not running, the raised mm_count would
otherwise leak mm_structs.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 include/linux/ksm.h |   31 +++++++--
 kernel/fork.c       |    1 
 mm/ksm.c            |  144 ++++++++++++++++++++++++++++--------------
 mm/memory.c         |    5 -
 mm/mmap.c           |    9 ++
 5 files changed, 137 insertions(+), 53 deletions(-)

--- ksm8/include/linux/ksm.h	2009-08-01 05:02:09.000000000 +0100
+++ ksm9/include/linux/ksm.h	2009-08-02 13:50:41.000000000 +0100
@@ -12,11 +12,14 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
+struct mmu_gather;
+
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
 int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm,
+		struct mmu_gather **tlbp, unsigned long end);
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
@@ -25,10 +28,24 @@ static inline int ksm_fork(struct mm_str
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm)
+/*
+ * For KSM to handle OOM without deadlock when it's breaking COW in a
+ * likely victim of the OOM killer, exit_mmap() has to serialize with
+ * ksm_exit() after freeing mm's pages but before freeing its page tables.
+ * That leaves a window in which KSM might refault pages which have just
+ * been finally unmapped: guard against that with ksm_test_exit(), and
+ * use it after getting mmap_sem in ksm.c, to check if mm is exiting.
+ */
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm,
+			    struct mmu_gather **tlbp, unsigned long end)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm);
+		__ksm_exit(mm, tlbp, end);
 }
 
 /*
@@ -64,7 +81,13 @@ static inline int ksm_fork(struct mm_str
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm)
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm,
+			    struct mmu_gather **tlbp, unsigned long end)
 {
 }
 
--- ksm8/kernel/fork.c	2009-08-01 05:02:09.000000000 +0100
+++ ksm9/kernel/fork.c	2009-08-02 13:50:41.000000000 +0100
@@ -492,7 +492,6 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
-		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
--- ksm8/mm/ksm.c	2009-08-02 13:50:32.000000000 +0100
+++ ksm9/mm/ksm.c	2009-08-02 13:50:41.000000000 +0100
@@ -32,6 +32,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
+#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -347,6 +348,8 @@ static void break_cow(struct mm_struct *
 	struct vm_area_struct *vma;
 
 	down_read(&mm->mmap_sem);
+	if (ksm_test_exit(mm))
+		goto out;
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
 		goto out;
@@ -365,6 +368,8 @@ static struct page *get_mergeable_page(s
 	struct page *page;
 
 	down_read(&mm->mmap_sem);
+	if (ksm_test_exit(mm))
+		goto out;
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
 		goto out;
@@ -439,11 +444,11 @@ static void remove_rmap_item_from_tree(s
 	} else if (rmap_item->address & NODE_FLAG) {
 		unsigned char age;
 		/*
-		 * ksm_thread can and must skip the rb_erase, because
+		 * Usually ksmd can and must skip the rb_erase, because
 		 * root_unstable_tree was already reset to RB_ROOT.
-		 * But __ksm_exit has to be careful: do the rb_erase
-		 * if it's interrupting a scan, and this rmap_item was
-		 * inserted by this scan rather than left from before.
+		 * But be careful when an mm is exiting: do the rb_erase
+		 * if this rmap_item was inserted by this scan, rather
+		 * than left over from before.
 		 */
 		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
 		BUG_ON(age > 1);
@@ -491,6 +496,8 @@ static int unmerge_ksm_pages(struct vm_a
 	int err = 0;
 
 	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
+		if (ksm_test_exit(vma->vm_mm))
+			break;
 		if (signal_pending(current))
 			err = -ERESTARTSYS;
 		else
@@ -507,34 +514,50 @@ static int unmerge_and_remove_all_rmap_i
 	int err = 0;
 
 	spin_lock(&ksm_mmlist_lock);
-	mm_slot = list_entry(ksm_mm_head.mm_list.next,
+	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
 						struct mm_slot, mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	while (mm_slot != &ksm_mm_head) {
+	for (mm_slot = ksm_scan.mm_slot;
+			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
 		mm = mm_slot->mm;
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			if (ksm_test_exit(mm))
+				break;
 			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
 				continue;
 			err = unmerge_ksm_pages(vma,
 						vma->vm_start, vma->vm_end);
-			if (err) {
-				up_read(&mm->mmap_sem);
-				goto out;
-			}
+			if (err)
+				goto error;
 		}
+
 		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-		up_read(&mm->mmap_sem);
 
 		spin_lock(&ksm_mmlist_lock);
-		mm_slot = list_entry(mm_slot->mm_list.next,
+		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
 						struct mm_slot, mm_list);
-		spin_unlock(&ksm_mmlist_lock);
+		if (ksm_test_exit(mm)) {
+			hlist_del(&mm_slot->link);
+			list_del(&mm_slot->mm_list);
+			spin_unlock(&ksm_mmlist_lock);
+
+			free_mm_slot(mm_slot);
+			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+			up_read(&mm->mmap_sem);
+			mmdrop(mm);
+		} else {
+			spin_unlock(&ksm_mmlist_lock);
+			up_read(&mm->mmap_sem);
+		}
 	}
 
 	ksm_scan.seqnr = 0;
-out:
+	return 0;
+
+error:
+	up_read(&mm->mmap_sem);
 	spin_lock(&ksm_mmlist_lock);
 	ksm_scan.mm_slot = &ksm_mm_head;
 	spin_unlock(&ksm_mmlist_lock);
@@ -755,6 +778,9 @@ static int try_to_merge_with_ksm_page(st
 	int err = -EFAULT;
 
 	down_read(&mm1->mmap_sem);
+	if (ksm_test_exit(mm1))
+		goto out;
+
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1)
 		goto out;
@@ -796,6 +822,10 @@ static int try_to_merge_two_pages(struct
 		return err;
 
 	down_read(&mm1->mmap_sem);
+	if (ksm_test_exit(mm1)) {
+		up_read(&mm1->mmap_sem);
+		goto out;
+	}
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1) {
 		up_read(&mm1->mmap_sem);
@@ -1181,7 +1211,12 @@ next_mm:
 
 	mm = slot->mm;
 	down_read(&mm->mmap_sem);
-	for (vma = find_vma(mm, ksm_scan.address); vma; vma = vma->vm_next) {
+	if (ksm_test_exit(mm))
+		vma = NULL;
+	else
+		vma = find_vma(mm, ksm_scan.address);
+
+	for (; vma; vma = vma->vm_next) {
 		if (!(vma->vm_flags & VM_MERGEABLE))
 			continue;
 		if (ksm_scan.address < vma->vm_start)
@@ -1190,6 +1225,8 @@ next_mm:
 			ksm_scan.address = vma->vm_end;
 
 		while (ksm_scan.address < vma->vm_end) {
+			if (ksm_test_exit(mm))
+				break;
 			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
 			if (*page && PageAnon(*page)) {
 				flush_anon_page(vma, *page, ksm_scan.address);
@@ -1212,6 +1249,11 @@ next_mm:
 		}
 	}
 
+	if (ksm_test_exit(mm)) {
+		ksm_scan.address = 0;
+		ksm_scan.rmap_item = list_entry(&slot->rmap_list,
+						struct rmap_item, link);
+	}
 	/*
 	 * Nuke all the rmap_items that are above this current rmap:
 	 * because there were no VM_MERGEABLE vmas with such addresses.
@@ -1226,24 +1268,29 @@ next_mm:
 		 * We've completed a full scan of all vmas, holding mmap_sem
 		 * throughout, and found no VM_MERGEABLE: so do the same as
 		 * __ksm_exit does to remove this mm from all our lists now.
+		 * This applies either when cleaning up after __ksm_exit
+		 * (but beware: we can reach here even before __ksm_exit),
+		 * or when all VM_MERGEABLE areas have been unmapped (and
+		 * mmap_sem then protects against race with MADV_MERGEABLE).
 		 */
 		hlist_del(&slot->link);
 		list_del(&slot->mm_list);
+		spin_unlock(&ksm_mmlist_lock);
+
 		free_mm_slot(slot);
 		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+		up_read(&mm->mmap_sem);
+		mmdrop(mm);
+	} else {
+		spin_unlock(&ksm_mmlist_lock);
+		up_read(&mm->mmap_sem);
 	}
-	spin_unlock(&ksm_mmlist_lock);
-	up_read(&mm->mmap_sem);
 
 	/* Repeat until we've completed scanning the whole list */
 	slot = ksm_scan.mm_slot;
 	if (slot != &ksm_mm_head)
 		goto next_mm;
 
-	/*
-	 * Bump seqnr here rather than at top, so that __ksm_exit
-	 * can skip rb_erase on unstable tree until we run again.
-	 */
 	ksm_scan.seqnr++;
 	return NULL;
 }
@@ -1368,6 +1415,7 @@ int __ksm_enter(struct mm_struct *mm)
 	spin_unlock(&ksm_mmlist_lock);
 
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+	atomic_inc(&mm->mm_count);
 
 	if (needs_wakeup)
 		wake_up_interruptible(&ksm_thread_wait);
@@ -1375,41 +1423,45 @@ int __ksm_enter(struct mm_struct *mm)
 	return 0;
 }
 
-void __ksm_exit(struct mm_struct *mm)
+void __ksm_exit(struct mm_struct *mm,
+		struct mmu_gather **tlbp, unsigned long end)
 {
 	struct mm_slot *mm_slot;
+	int easy_to_free = 0;
 
 	/*
-	 * This process is exiting: doesn't hold and doesn't need mmap_sem;
-	 * but we do need to exclude ksmd and other exiters while we modify
-	 * the various lists and trees.
+	 * This process is exiting: if it's straightforward (as is the
+	 * case when ksmd was never running), free mm_slot immediately.
+	 * But if it's at the cursor or has rmap_items linked to it, use
+	 * mmap_sem to synchronize with any break_cows before pagetables
+	 * are freed, and leave the mm_slot on the list for ksmd to free.
+	 * Beware: ksm may already have noticed it exiting and freed the slot.
 	 */
-	mutex_lock(&ksm_thread_mutex);
+
 	spin_lock(&ksm_mmlist_lock);
 	mm_slot = get_mm_slot(mm);
-	if (!list_empty(&mm_slot->rmap_list)) {
-		spin_unlock(&ksm_mmlist_lock);
-		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-		spin_lock(&ksm_mmlist_lock);
-	}
-
-	if (ksm_scan.mm_slot == mm_slot) {
-		ksm_scan.mm_slot = list_entry(
-			mm_slot->mm_list.next, struct mm_slot, mm_list);
-		ksm_scan.address = 0;
-		ksm_scan.rmap_item = list_entry(
-			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
-		if (ksm_scan.mm_slot == &ksm_mm_head)
-			ksm_scan.seqnr++;
+	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
+		if (list_empty(&mm_slot->rmap_list)) {
+			hlist_del(&mm_slot->link);
+			list_del(&mm_slot->mm_list);
+			easy_to_free = 1;
+		} else {
+			list_move(&mm_slot->mm_list,
+				  &ksm_scan.mm_slot->mm_list);
+		}
 	}
-
-	hlist_del(&mm_slot->link);
-	list_del(&mm_slot->mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	free_mm_slot(mm_slot);
-	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
-	mutex_unlock(&ksm_thread_mutex);
+	if (easy_to_free) {
+		free_mm_slot(mm_slot);
+		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+		mmdrop(mm);
+	} else if (mm_slot) {
+		tlb_finish_mmu(*tlbp, 0, end);
+		down_write(&mm->mmap_sem);
+		up_write(&mm->mmap_sem);
+		*tlbp = tlb_gather_mmu(mm, 1);
+	}
 }
 
 #define KSM_ATTR_RO(_name) \
--- ksm8/mm/memory.c	2009-08-01 05:02:09.000000000 +0100
+++ ksm9/mm/memory.c	2009-08-02 13:50:41.000000000 +0100
@@ -2647,8 +2647,9 @@ static int do_anonymous_page(struct mm_s
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table))
+	if (!pte_none(*page_table) || ksm_test_exit(mm))
 		goto release;
+
 	inc_mm_counter(mm, anon_rss);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
@@ -2790,7 +2791,7 @@ static int __do_fault(struct mm_struct *
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (likely(pte_same(*page_table, orig_pte))) {
+	if (likely(pte_same(*page_table, orig_pte) && !ksm_test_exit(mm))) {
 		flush_icache_page(vma, page);
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
--- ksm8/mm/mmap.c	2009-06-25 05:18:10.000000000 +0100
+++ ksm9/mm/mmap.c	2009-08-02 13:50:41.000000000 +0100
@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/ksm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_counter.h>
 
@@ -2114,6 +2115,14 @@ void exit_mmap(struct mm_struct *mm)
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
+
+	/*
+	 * For KSM to handle OOM without deadlock when it's breaking COW in a
+	 * likely victim of the OOM killer, we must serialize with ksm_exit()
+	 * after freeing mm's pages but before freeing its page tables.
+	 */
+	ksm_exit(mm, &tlb, end);
+
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
--
Date: 	Mon, 3 Aug 2009 13:19:13 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 10/12] ksm: sysfs and defaults

At present KSM is just a waste of space if you don't have CONFIG_SYSFS=y
to provide the /sys/kernel/mm/ksm files to tune and activate it.

Make KSM depend on SYSFS?  Could do, but it might be better to provide
some defaults so that KSM works out-of-the-box, ready for testers to
madvise MADV_MERGEABLE, even without SYSFS.

Though anyone serious is likely to want to retune the numbers to their
taste once they have experience; and whether these settings ever reach
2.6.32 can be discussed along the way.  

Save 1kB from tiny kernels by #ifdef'ing the SYSFS side of it.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |   26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

--- ksm9/mm/ksm.c	2009-08-02 13:50:41.000000000 +0100
+++ ksm10/mm/ksm.c	2009-08-02 13:50:48.000000000 +0100
@@ -163,18 +163,18 @@ static unsigned long ksm_pages_unshared;
 static unsigned long ksm_rmap_items;
 
 /* Limit on the number of unswappable pages used */
-static unsigned long ksm_max_kernel_pages;
+static unsigned long ksm_max_kernel_pages = 2000;
 
 /* Number of pages ksmd should scan in one batch */
-static unsigned int ksm_thread_pages_to_scan;
+static unsigned int ksm_thread_pages_to_scan = 200;
 
 /* Milliseconds ksmd should sleep between batches */
-static unsigned int ksm_thread_sleep_millisecs;
+static unsigned int ksm_thread_sleep_millisecs = 20;
 
 #define KSM_RUN_STOP	0
 #define KSM_RUN_MERGE	1
 #define KSM_RUN_UNMERGE	2
-static unsigned int ksm_run;
+static unsigned int ksm_run = KSM_RUN_MERGE;
 
 static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
 static DEFINE_MUTEX(ksm_thread_mutex);
@@ -506,6 +506,10 @@ static int unmerge_ksm_pages(struct vm_a
 	return err;
 }
 
+#ifdef CONFIG_SYSFS
+/*
+ * Only called through the sysfs control interface:
+ */
 static int unmerge_and_remove_all_rmap_items(void)
 {
 	struct mm_slot *mm_slot;
@@ -563,6 +567,7 @@ error:
 	spin_unlock(&ksm_mmlist_lock);
 	return err;
 }
+#endif /* CONFIG_SYSFS */
 
 static u32 calc_checksum(struct page *page)
 {
@@ -1464,6 +1469,11 @@ void __ksm_exit(struct mm_struct *mm,
 	}
 }
 
+#ifdef CONFIG_SYSFS
+/*
+ * This all compiles without CONFIG_SYSFS, but is a waste of space.
+ */
+
 #define KSM_ATTR_RO(_name) \
 	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 #define KSM_ATTR(_name) \
@@ -1646,6 +1656,7 @@ static struct attribute_group ksm_attr_g
 	.attrs = ksm_attrs,
 	.name = "ksm",
 };
+#endif /* CONFIG_SYSFS */
 
 static int __init ksm_init(void)
 {
@@ -1667,16 +1678,17 @@ static int __init ksm_init(void)
 		goto out_free2;
 	}
 
+#ifdef CONFIG_SYSFS
 	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
 	if (err) {
 		printk(KERN_ERR "ksm: register sysfs failed\n");
-		goto out_free3;
+		kthread_stop(ksm_thread);
+		goto out_free2;
 	}
+#endif /* CONFIG_SYSFS */
 
 	return 0;
 
-out_free3:
-	kthread_stop(ksm_thread);
 out_free2:
 	mm_slots_hash_free();
 out_free1:
--
Date: 	Mon, 3 Aug 2009 13:21:34 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 11/12] ksm: add some documentation

Add Documentation/vm/ksm.txt: how to use the Kernel Samepage Merging feature

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Cc: Michael Kerrisk <mtk.manpages at googlemail.com>
Cc: Randy Dunlap <randy.dunlap at oracle.com>
---

 Documentation/vm/00-INDEX |    2 
 Documentation/vm/ksm.txt  |   89 ++++++++++++++++++++++++++++++++++++
 mm/Kconfig                |    1 
 3 files changed, 92 insertions(+)

--- ksm10/Documentation/vm/00-INDEX	2009-06-10 04:05:27.000000000 +0100
+++ ksm11/Documentation/vm/00-INDEX	2009-08-02 13:50:57.000000000 +0100
@@ -6,6 +6,8 @@ balance
 	- various information on memory balancing.
 hugetlbpage.txt
 	- a brief summary of hugetlbpage support in the Linux kernel.
+ksm.txt
+	- how to use the Kernel Samepage Merging feature.
 locking
 	- info on how locking and synchronization is done in the Linux vm code.
 numa
--- ksm10/Documentation/vm/ksm.txt	1970-01-01 01:00:00.000000000 +0100
+++ ksm11/Documentation/vm/ksm.txt	2009-08-02 13:50:57.000000000 +0100
@@ -0,0 +1,89 @@
+How to use the Kernel Samepage Merging feature
+----------------------------------------------
+
+KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
+added to the Linux kernel in 2.6.32.  See mm/ksm.c for its implementation,
+and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
+
+The KSM daemon ksmd periodically scans those areas of user memory which
+have been registered with it, looking for pages of identical content which
+can be replaced by a single write-protected page (which is automatically
+copied if a process later wants to update its content).
+
+KSM was originally developed for use with KVM (where it was known as
+Kernel Shared Memory), to fit more virtual machines into physical memory,
+by sharing the data common between them.  But it can be useful to any
+application which generates many instances of the same data.
+
+KSM only merges anonymous (private) pages, never pagecache (file) pages.
+KSM's merged pages are at present locked into kernel memory for as long
+as they are shared: so cannot be swapped out like the user pages they
+replace (but swapping KSM pages should follow soon in a later release).
+
+KSM only operates on those areas of address space which an application
+has advised to be likely candidates for merging, by using the madvise(2)
+system call: int madvise(addr, length, MADV_MERGEABLE).
+
+The app may call int madvise(addr, length, MADV_UNMERGEABLE) to cancel
+that advice and restore unshared pages: whereupon KSM unmerges whatever
+it merged in that range.  Note: this unmerging call may suddenly require
+more memory than is available - possibly failing with EAGAIN, but more
+probably arousing the Out-Of-Memory killer.
+
+If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
+and MADV_UNMERGEABLE simply fail with EINVAL.  If the running kernel was
+built with CONFIG_KSM=y, those calls will normally succeed: even if the
+the KSM daemon is not currently running, MADV_MERGEABLE still registers
+the range for whenever the KSM daemon is started; even if the range
+cannot contain any pages which KSM could actually merge; even if
+MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
+
+Like other madvise calls, they are intended for use on mapped areas of
+the user address space: they will report ENOMEM if the specified range
+includes unmapped gaps (though working on the intervening mapped areas),
+and might fail with EAGAIN if not enough memory for internal structures.
+
+Applications should be considerate in their use of MADV_MERGEABLE,
+restricting its use to areas likely to benefit.  KSM's scans may use
+a lot of processing power, and its kernel-resident pages are a limited
+resource.  Some installations will disable KSM for these reasons.
+
+The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
+readable by all but writable only by root:
+
+max_kernel_pages - set to maximum number of kernel pages that KSM may use
+                   e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages"
+                   Value 0 imposes no limit on the kernel pages KSM may use;
+                   but note that any process using MADV_MERGEABLE can cause
+                   KSM to allocate these pages, unswappable until it exits.
+                   Default: 2000 (chosen for demonstration purposes)
+
+pages_to_scan    - how many present pages to scan before ksmd goes to sleep
+                   e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan"
+                   Default: 200 (chosen for demonstration purposes)
+
+sleep_millisecs  - how many milliseconds ksmd should sleep before next scan
+                   e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
+                   Default: 20 (chosen for demonstration purposes)
+
+run              - set 0 to stop ksmd from running but keep merged pages,
+                   set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
+                   set 2 to stop ksmd and unmerge all pages currently merged,
+                         but leave mergeable areas registered for next run
+                   Default: 1 (for immediate use by apps which register)
+
+The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
+
+pages_shared     - how many shared unswappable kernel pages KSM is using
+pages_sharing    - how many more sites are sharing them i.e. how much saved
+pages_unshared   - how many pages unique but repeatedly checked for merging
+pages_volatile   - how many pages changing too fast to be placed in a tree
+full_scans       - how many times all mergeable areas have been scanned
+
+A high ratio of pages_sharing to pages_shared indicates good sharing, but
+a high ratio of pages_unshared to pages_sharing indicates wasted effort.
+pages_volatile embraces several different kinds of activity, but a high
+proportion there would also indicate poor use of madvise MADV_MERGEABLE.
+
+Izik Eidus,
+Hugh Dickins, 30 July 2009
--- ksm10/mm/Kconfig	2009-08-01 05:02:09.000000000 +0100
+++ ksm11/mm/Kconfig	2009-08-02 13:50:57.000000000 +0100
@@ -224,6 +224,7 @@ config KSM
 	  the many instances by a single resident page with that content, so
 	  saving memory until one or another app needs to modify the content.
 	  Recommended for use with KVM, or with other duplicative applications.
+	  See Documentation/vm/ksm.txt for more information.
 
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
--
Date: 	Mon, 3 Aug 2009 13:22:53 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
Subject: [PATCH 12/12] ksm: remove VM_MERGEABLE_FLAGS

KSM originally stood for Kernel Shared Memory: but the kernel has long
supported shared memory, and VM_SHARED and VM_MAYSHARE vmas, and KSM is
something else.  So we switched to saying "merge" instead of "share".

But Chris Wright points out that this is confusing where mmap.c merges
adjacent vmas: most especially in the name VM_MERGEABLE_FLAGS, used by
is_mergeable_vma() to let vmas be merged despite flags being different.

Call it VMA_MERGE_DESPITE_FLAGS?  Perhaps, but at present it consists
only of VM_CAN_NONLINEAR: so for now it's clearer on all sides to use
that directly, with a comment on it in is_mergeable_vma().

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---
This patch got lost along the way last time: no big deal but try again.

 mm/mmap.c |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

--- ksm11/mm/mmap.c	2009-08-02 13:50:41.000000000 +0100
+++ ksm12/mm/mmap.c	2009-08-02 13:51:04.000000000 +0100
@@ -660,9 +660,6 @@ again:			remove_next = 1 + (end > next->
 	validate_mm(mm);
 }
 
-/* Flags that can be inherited from an existing mapping when merging */
-#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR)
-
 /*
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
@@ -670,7 +667,8 @@ again:			remove_next = 1 + (end > next->
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
 			struct file *file, unsigned long vm_flags)
 {
-	if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS)
+	/* VM_CAN_NONLINEAR may get set later by f_op->mmap() */
+	if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR)
 		return 0;
 	if (vma->vm_file != file)
 		return 0;



Index: config-generic
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/config-generic,v
retrieving revision 1.238.6.32
retrieving revision 1.238.6.33
diff -u -p -r1.238.6.32 -r1.238.6.33
--- config-generic	5 Aug 2009 23:00:08 -0000	1.238.6.32
+++ config-generic	8 Aug 2009 18:06:03 -0000	1.238.6.33
@@ -2230,6 +2230,7 @@ CONFIG_AGP_SIS=y
 CONFIG_AGP_SWORKS=y
 CONFIG_AGP_VIA=y
 CONFIG_AGP_EFFICEON=y
+CONFIG_VGA_ARB=y
 CONFIG_DRM=m
 CONFIG_DRM_TDFX=m
 CONFIG_DRM_R128=m


Index: config-x86-generic
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/config-x86-generic,v
retrieving revision 1.68.6.15
retrieving revision 1.68.6.16
diff -u -p -r1.68.6.15 -r1.68.6.16
--- config-x86-generic	5 Aug 2009 23:00:09 -0000	1.68.6.15
+++ config-x86-generic	8 Aug 2009 18:06:03 -0000	1.68.6.16
@@ -75,6 +75,7 @@ CONFIG_X86_CPU_DEBUG=m
 CONFIG_EDD=m
 # CONFIG_EDD_OFF is not set
 # CONFIG_NUMA is not set
+
 # CONFIG_NOHIGHMEM is not set
 CONFIG_HIGHMEM4G=y
 # CONFIG_HIGHMEM64G is not set

drm-r600-kms.patch:
 b/drivers/gpu/drm/radeon/Makefile        |    3 
 b/drivers/gpu/drm/radeon/atombios_crtc.c |    1 
 b/drivers/gpu/drm/radeon/avivod.h        |   60 +
 b/drivers/gpu/drm/radeon/r100.c          |   69 +
 b/drivers/gpu/drm/radeon/r300.c          |    2 
 b/drivers/gpu/drm/radeon/r600.c          | 1243 +++++++++++++++++++++++++++++--
 b/drivers/gpu/drm/radeon/r600d.h         |  349 ++++++++
 b/drivers/gpu/drm/radeon/radeon.h        |   51 +
 b/drivers/gpu/drm/radeon/radeon_asic.h   |  139 +++
 b/drivers/gpu/drm/radeon/radeon_clocks.c |   10 
 b/drivers/gpu/drm/radeon/radeon_device.c |  340 ++++----
 b/drivers/gpu/drm/radeon/radeon_ring.c   |   63 -
 b/drivers/gpu/drm/radeon/radeon_share.h  |   66 +
 b/drivers/gpu/drm/radeon/radeon_ttm.c    |    6 
 b/drivers/gpu/drm/radeon/rs400.c         |    2 
 b/drivers/gpu/drm/radeon/rv770.c         |  981 +++++++++++++++++++++++-
 b/drivers/gpu/drm/radeon/rv770d.h        |  341 ++++++++
 drivers/gpu/drm/radeon/r300.h            |   36 
 drivers/gpu/drm/radeon/rs780.c           |  102 --
 19 files changed, 3366 insertions(+), 498 deletions(-)

Index: drm-r600-kms.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/drm-r600-kms.patch,v
retrieving revision 1.1.2.2
retrieving revision 1.1.2.3
diff -u -p -r1.1.2.2 -r1.1.2.3
--- drm-r600-kms.patch	5 Aug 2009 23:00:15 -0000	1.1.2.2
+++ drm-r600-kms.patch	8 Aug 2009 18:06:03 -0000	1.1.2.3
@@ -1,12 +1,14 @@
-From 6599b4a047d80cd7b8715b5ad74e0735e6d4b941 Mon Sep 17 00:00:00 2001
+From 00f3a05a622d6888af3ba284e7115f3a08035a3e Mon Sep 17 00:00:00 2001
 From: Jerome Glisse <jglisse at redhat.com>
 Date: Fri, 24 Jul 2009 19:42:23 +0200
 Subject: [PATCH] radeon: add basic KMS support for r6xx & r7xx chipset.
 
 This only provide a kms drm fb device for this hw and allow X
 to run with no acceleration.
+
+Signed-off-by: Jerome Glisse <jglisse at redhat.com>
 ---
- drivers/gpu/drm/radeon/Makefile        |    2 +-
+ drivers/gpu/drm/radeon/Makefile        |    3 +-
  drivers/gpu/drm/radeon/atombios_crtc.c |    1 +
  drivers/gpu/drm/radeon/avivod.h        |   60 ++
  drivers/gpu/drm/radeon/r100.c          |   69 ++
@@ -15,18 +17,17 @@ to run with no acceleration.
  drivers/gpu/drm/radeon/r600.c          | 1243 ++++++++++++++++++++++++++++++--
  drivers/gpu/drm/radeon/r600d.h         |  349 +++++++++
  drivers/gpu/drm/radeon/radeon.h        |   51 ++-
- drivers/gpu/drm/radeon/radeon_asic.h   |  137 ++++-
+ drivers/gpu/drm/radeon/radeon_asic.h   |  139 ++++-
  drivers/gpu/drm/radeon/radeon_clocks.c |   10 +-
- drivers/gpu/drm/radeon/radeon_device.c |  338 +++++----
- drivers/gpu/drm/radeon/radeon_drv.h    |    1 +
+ drivers/gpu/drm/radeon/radeon_device.c |  340 +++++----
  drivers/gpu/drm/radeon/radeon_ring.c   |   63 +--
  drivers/gpu/drm/radeon/radeon_share.h  |   66 ++
  drivers/gpu/drm/radeon/radeon_ttm.c    |    6 +-
  drivers/gpu/drm/radeon/rs400.c         |    2 +-
  drivers/gpu/drm/radeon/rs780.c         |  102 ---
- drivers/gpu/drm/radeon/rv770.c         |  980 +++++++++++++++++++++++--
+ drivers/gpu/drm/radeon/rv770.c         |  981 ++++++++++++++++++++++++--
  drivers/gpu/drm/radeon/rv770d.h        |  340 +++++++++
- 20 files changed, 3364 insertions(+), 494 deletions(-)
+ 19 files changed, 3366 insertions(+), 497 deletions(-)
  create mode 100644 drivers/gpu/drm/radeon/avivod.h
  delete mode 100644 drivers/gpu/drm/radeon/r300.h
  create mode 100644 drivers/gpu/drm/radeon/r600d.h
@@ -34,18 +35,19 @@ to run with no acceleration.
  create mode 100644 drivers/gpu/drm/radeon/rv770d.h
 
 diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
-index 013d380..7384cad 100644
+index 013d380..308416f 100644
 --- a/drivers/gpu/drm/radeon/Makefile
 +++ b/drivers/gpu/drm/radeon/Makefile
-@@ -13,7 +13,7 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \
+@@ -13,8 +13,7 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \
  	radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \
  	radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \
  	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
 -	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \
-+	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o \
- 	radeon_test.o
+-	radeon_test.o
++	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o
  
  radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
+ 
 diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
 index 74d034f..629d7c8 100644
 --- a/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -264,7 +266,7 @@ index 8486b4d..0000000
 -
 -#endif
 diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
-index 538cd90..ae3a9e9 100644
+index 538cd90..46c9ffd 100644
 --- a/drivers/gpu/drm/radeon/r600.c
 +++ b/drivers/gpu/drm/radeon/r600.c
 @@ -25,12 +25,17 @@
@@ -618,8 +620,8 @@ index 538cd90..ae3a9e9 100644
 +	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 +	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
 +	/* Setup GPU memory space */
++	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
 +	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
-+	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
 +	if (rdev->flags & RADEON_IS_AGP) {
 +		r = radeon_agp_init(rdev);
 +		if (r)
@@ -688,25 +690,22 @@ index 538cd90..ae3a9e9 100644
 +	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
 +	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
 +	return 0;
- }
- 
--void r600_vram_info(struct radeon_device *rdev)
++}
++
 +int r600_gpu_reset(struct radeon_device *rdev)
- {
--	r600_vram_get_type(rdev);
--	rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE);
--	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
++{
 +	/* FIXME: implement */
 +	return 0;
-+}
+ }
  
--	/* Could aper size report 0 ? */
--	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
--	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+-void r600_vram_info(struct radeon_device *rdev)
 +static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
 +					     u32 num_backends,
 +					     u32 backend_disable_mask)
-+{
+ {
+-	r600_vram_get_type(rdev);
+-	rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE);
+-	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
 +	u32 backend_map = 0;
 +	u32 enabled_backends_mask;
 +	u32 enabled_backends_count;
@@ -714,7 +713,10 @@ index 538cd90..ae3a9e9 100644
 +	u32 swizzle_pipe[R6XX_MAX_PIPES];
 +	u32 cur_backend;
 +	u32 i;
-+
+ 
+-	/* Could aper size report 0 ? */
+-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
 +	if (num_tile_pipes > R6XX_MAX_PIPES)
 +		num_tile_pipes = R6XX_MAX_PIPES;
 +	if (num_tile_pipes < 1)
@@ -821,8 +823,8 @@ index 538cd90..ae3a9e9 100644
 +		val >>= 1;
 +	}
 +	return ret;
-+}
-+
+ }
+ 
 +void r600_gpu_init(struct radeon_device *rdev)
 +{
 +	u32 tiling_config;
@@ -1091,8 +1093,8 @@ index 538cd90..ae3a9e9 100644
 +		break;
 +	}
 +	WREG32(TC_CNTL, tmp);
- }
- 
++}
++
 +
  /*
   * Indirect registers accessor
@@ -1351,7 +1353,11 @@ index 538cd90..ae3a9e9 100644
 +		  uint64_t dst_offset,
 +		  unsigned num_pages,
 +		  struct radeon_fence *fence)
-+{
+ {
+-	WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
+-	(void)RREG32(R600_PCIE_PORT_INDEX);
+-	WREG32(R600_PCIE_PORT_DATA, (v));
+-	(void)RREG32(R600_PCIE_PORT_DATA);
 +	/* FIXME: implement */
 +	return 0;
 +}
@@ -1371,11 +1377,7 @@ index 538cd90..ae3a9e9 100644
 +int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 +			 uint32_t tiling_flags, uint32_t pitch,
 +			 uint32_t offset, uint32_t obj_size)
- {
--	WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
--	(void)RREG32(R600_PCIE_PORT_INDEX);
--	WREG32(R600_PCIE_PORT_DATA, (v));
--	(void)RREG32(R600_PCIE_PORT_DATA);
++{
 +	/* FIXME: implement */
 +	return 0;
 +}
@@ -1933,7 +1935,7 @@ index 0000000..3337227
 +
 +#endif
 diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
-index b1d945b..542131c 100644
+index b1d945b..0c5c63a 100644
 --- a/drivers/gpu/drm/radeon/radeon.h
 +++ b/drivers/gpu/drm/radeon/radeon.h
 @@ -50,8 +50,8 @@
@@ -1997,20 +1999,20 @@ index b1d945b..542131c 100644
 -	unsigned		gtt_location;
 -	unsigned		gtt_size;
 -	unsigned		vram_location;
+ 	/* for some chips with <= 32MB we need to lie
+ 	 * about vram size near mc fb location */
+-	unsigned		mc_vram_size;
++	u64			mc_vram_size;
 +	u64			gtt_location;
 +	u64			gtt_size;
 +	u64			gtt_start;
 +	u64			gtt_end;
 +	u64			vram_location;
- 	/* for some chips with <= 32MB we need to lie
- 	 * about vram size near mc fb location */
--	unsigned		mc_vram_size;
-+	u64			mc_vram_size;
 +	u64			vram_start;
 +	u64			vram_end;
-+	u64			real_vram_size;
  	unsigned		vram_width;
 -	unsigned		real_vram_size;
++	u64			real_vram_size;
  	int			vram_mtrr;
  	bool			vram_is_ddr;
  };
@@ -2095,7 +2097,7 @@ index b1d945b..542131c 100644
  #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
  #define radeon_fence_ring_emit(rdev, fence) (rdev)->asic->fence_ring_emit((rdev), (fence))
 diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
-index 9a75876..6c30d11 100644
+index 9a75876..7e9bb12 100644
 --- a/drivers/gpu/drm/radeon/radeon_asic.h
 +++ b/drivers/gpu/drm/radeon/radeon_asic.h
 @@ -58,6 +58,7 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
@@ -2181,15 +2183,18 @@ index 9a75876..6c30d11 100644
  	.irq_set = &rs600_irq_set,
  	.irq_process = &r100_irq_process,
  	.fence_ring_emit = &r300_fence_ring_emit,
-@@ -380,6 +401,7 @@ static struct radeon_asic rv515_asic = {
+@@ -380,7 +401,10 @@ static struct radeon_asic rv515_asic = {
  	.cp_init = &r100_cp_init,
  	.cp_fini = &r100_cp_fini,
  	.cp_disable = &r100_cp_disable,
 +	.cp_commit = &r100_cp_commit,
  	.ring_start = &rv515_ring_start,
++	.ring_ib_execute = &r100_ring_ib_execute,
++	.ib_test = &r100_ib_test,
  	.irq_set = &r100_irq_set,
  	.irq_process = &r100_irq_process,
-@@ -422,7 +444,10 @@ static struct radeon_asic r520_asic = {
+ 	.fence_ring_emit = &r300_fence_ring_emit,
+@@ -422,7 +446,10 @@ static struct radeon_asic r520_asic = {
  	.cp_init = &r100_cp_init,
  	.cp_fini = &r100_cp_fini,
  	.cp_disable = &r100_cp_disable,
@@ -2200,7 +2205,7 @@ index 9a75876..6c30d11 100644
  	.irq_set = &r100_irq_set,
  	.irq_process = &r100_irq_process,
  	.fence_ring_emit = &r300_fence_ring_emit,
-@@ -440,9 +465,119 @@ static struct radeon_asic r520_asic = {
+@@ -440,9 +467,119 @@ static struct radeon_asic r520_asic = {
  };
  
  /*
@@ -2343,7 +2348,7 @@ index a37cbce..152eef1 100644
  			mpll->reference_div = spll->reference_div;
  	} else {
 diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
-index a162ade..532074e 100644
+index 9ff6dcb..d4c804d 100644
 --- a/drivers/gpu/drm/radeon/radeon_device.c
 +++ b/drivers/gpu/drm/radeon/radeon_device.c
 @@ -37,7 +37,7 @@
@@ -2364,20 +2369,20 @@ index a162ade..532074e 100644
  {
  	int i;
  
-@@ -154,16 +154,16 @@ int radeon_mc_setup(struct radeon_device *rdev)
- 		rdev->mc.vram_location = 0;
- 		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+@@ -156,16 +156,14 @@ int radeon_mc_setup(struct radeon_device *rdev)
+ 		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
+ 		rdev->mc.gtt_location = tmp;
  	}
 -	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20);
-+	DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.real_vram_size >> 20));
++	DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20));
  	DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
 -		 rdev->mc.vram_location,
 -		 rdev->mc.vram_location + rdev->mc.mc_vram_size - 1);
+-	if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size)
+-		DRM_INFO("radeon: VRAM less than aperture workaround enabled\n");
+-	DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20);
 +		 (unsigned)rdev->mc.vram_location,
 +		 (unsigned)(rdev->mc.vram_location + rdev->mc.mc_vram_size - 1));
- 	if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size)
- 		DRM_INFO("radeon: VRAM less than aperture workaround enabled\n");
--	DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20);
 +	DRM_INFO("radeon: GTT %uM\n", (unsigned)(rdev->mc.gtt_size >> 20));
  	DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n",
 -		 rdev->mc.gtt_location,
@@ -2387,7 +2392,7 @@ index a162ade..532074e 100644
  	return 0;
  }
  
-@@ -203,6 +203,31 @@ static bool radeon_card_posted(struct radeon_device *rdev)
+@@ -205,6 +203,31 @@ static bool radeon_card_posted(struct radeon_device *rdev)
  
  }
  
@@ -2397,7 +2402,7 @@ index a162ade..532074e 100644
 +	if (rdev->dummy_page.page == NULL)
 +		return -ENOMEM;
 +	rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page,
-+					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
++					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);	
 +	if (!rdev->dummy_page.addr) {
 +		__free_page(rdev->dummy_page.page);
 +		rdev->dummy_page.page = NULL;
@@ -2419,7 +2424,7 @@ index a162ade..532074e 100644
  
  /*
   * Registers accessors functions.
-@@ -328,9 +353,15 @@ int radeon_asic_init(struct radeon_device *rdev)
+@@ -330,9 +353,15 @@ int radeon_asic_init(struct radeon_device *rdev)
  	case CHIP_RV635:
  	case CHIP_RV670:
  	case CHIP_RS780:
@@ -2435,7 +2440,7 @@ index a162ade..532074e 100644
  	default:
  		/* FIXME: not supported yet */
  		return -EINVAL;
-@@ -453,7 +484,7 @@ int radeon_device_init(struct radeon_device *rdev,
+@@ -455,7 +484,7 @@ int radeon_device_init(struct radeon_device *rdev,
  		       struct pci_dev *pdev,
  		       uint32_t flags)
  {
@@ -2444,7 +2449,7 @@ index a162ade..532074e 100644
  	int dma_bits;
  
  	DRM_INFO("radeon: Initializing kernel modesetting.\n");
-@@ -492,10 +523,6 @@ int radeon_device_init(struct radeon_device *rdev,
+@@ -494,10 +523,6 @@ int radeon_device_init(struct radeon_device *rdev,
  	if (r) {
  		return r;
  	}
@@ -2455,7 +2460,7 @@ index a162ade..532074e 100644
  
  	/* set DMA mask + need_dma32 flags.
  	 * PCIE - can handle 40-bits.
-@@ -526,111 +553,118 @@ int radeon_device_init(struct radeon_device *rdev,
+@@ -528,111 +553,118 @@ int radeon_device_init(struct radeon_device *rdev,
  	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)rdev->rmmio_base);
  	DRM_INFO("register mmio size: %u\n", (unsigned)rdev->rmmio_size);
  
@@ -2531,7 +2536,7 @@ index a162ade..532074e 100644
 +		rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
 +				MTRR_TYPE_WRCOMB, 1);
 +		DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
-+				(unsigned)(rdev->mc.real_vram_size >> 20),
++				(unsigned)(rdev->mc.mc_vram_size >> 20),
 +				(unsigned)(rdev->mc.aper_size >> 20));
 +		DRM_INFO("RAM width %dbits %cDR\n",
 +				rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
@@ -2663,7 +2668,7 @@ index a162ade..532074e 100644
  	r = radeon_modeset_init(rdev);
  	if (r) {
  		return r;
-@@ -656,26 +690,29 @@ void radeon_device_fini(struct radeon_device *rdev)
+@@ -658,26 +690,29 @@ void radeon_device_fini(struct radeon_device *rdev)
  	rdev->shutdown = true;
  	/* Order matter so becarefull if you rearrange anythings */
  	radeon_modeset_fini(rdev);
@@ -2709,7 +2714,7 @@ index a162ade..532074e 100644
  	iounmap(rdev->rmmio);
  	rdev->rmmio = NULL;
  }
-@@ -713,9 +750,12 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
+@@ -715,9 +750,12 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
  	/* wait for gpu to finish processing current batch */
  	radeon_fence_wait_last(rdev);
  
@@ -2725,7 +2730,7 @@ index a162ade..532074e 100644
  	/* evict remaining vram memory */
  	radeon_object_evict_vram(rdev);
  
-@@ -751,33 +791,37 @@ int radeon_resume_kms(struct drm_device *dev)
+@@ -753,33 +791,37 @@ int radeon_resume_kms(struct drm_device *dev)
  	if (radeon_gpu_reset(rdev)) {
  		/* FIXME: what do we want to do here ? */
  	}
@@ -2789,18 +2794,6 @@ index a162ade..532074e 100644
  	}
  out:
  	fb_set_suspend(rdev->fbdev_info, 0);
-diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
-index 127d045..3933f82 100644
---- a/drivers/gpu/drm/radeon/radeon_drv.h
-+++ b/drivers/gpu/drm/radeon/radeon_drv.h
-@@ -143,6 +143,7 @@ enum radeon_family {
- 	CHIP_RV635,
- 	CHIP_RV670,
- 	CHIP_RS780,
-+	CHIP_RS880,
- 	CHIP_RV770,
- 	CHIP_RV730,
- 	CHIP_RV710,
 diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
 index 60d1593..37d0958 100644
 --- a/drivers/gpu/drm/radeon/radeon_ring.c
@@ -2973,7 +2966,7 @@ index 63a7735..365d4bc 100644
 +
  #endif
 diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
-index 15c3531..d2799cc 100644
+index 15c3531..d2aff30 100644
 --- a/drivers/gpu/drm/radeon/radeon_ttm.c
 +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
 @@ -352,7 +352,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
@@ -2990,7 +2983,7 @@ index 15c3531..d2799cc 100644
  	}
  	DRM_INFO("radeon: %uM of VRAM memory ready\n",
 -		 rdev->mc.real_vram_size / (1024 * 1024));
-+		 (unsigned)(rdev->mc.real_vram_size / (1024 * 1024)));
++		 (unsigned)rdev->mc.real_vram_size / (1024 * 1024));
  	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0,
  			   ((rdev->mc.gtt_size) >> PAGE_SHIFT));
  	if (r) {
@@ -3125,10 +3118,10 @@ index 0affcff..0000000
 -	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
 -}
 diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
-index 21d8ffd..08ce913 100644
+index 21d8ffd..3e12447 100644
 --- a/drivers/gpu/drm/radeon/rv770.c
 +++ b/drivers/gpu/drm/radeon/rv770.c
-@@ -26,99 +26,963 @@
+@@ -26,99 +26,964 @@
   *          Jerome Glisse
   */
  #include "drmP.h"
@@ -3964,7 +3957,8 @@ index 21d8ffd..08ce913 100644
 +	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 +	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
 +	/* Setup GPU memory space */
-+	rdev->mc.real_vram_size = rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
++	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
++	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
 +	if (rdev->flags & RADEON_IS_AGP) {
 +		r = radeon_agp_init(rdev);
 +		if (r)
@@ -4497,5 +4491,5 @@ index 0000000..b0a4354
 +
 +#endif
 -- 
-1.6.2.5
+1.6.0.6
 


Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/kernel.spec,v
retrieving revision 1.1294.2.44
retrieving revision 1.1294.2.45
diff -u -p -r1.1294.2.44 -r1.1294.2.45
--- kernel.spec	5 Aug 2009 23:00:15 -0000	1.1294.2.44
+++ kernel.spec	8 Aug 2009 18:06:03 -0000	1.1294.2.45
@@ -640,6 +640,8 @@ Patch452: linux-2.6.30-no-pcspkr-modalia
 
 Patch460: linux-2.6-serial-460800.patch
 
+Patch470: die-floppy-die.patch
+
 Patch510: linux-2.6-silence-noise.patch
 Patch520: linux-2.6.30-hush-rom-warning.patch
 Patch530: linux-2.6-silence-fbcon-logo.patch
@@ -667,8 +669,10 @@ Patch1518: hid-ignore-all-recent-imon-de
 
 Patch1550: linux-2.6-ksm.patch
 Patch1551: linux-2.6-ksm-kvm.patch
+Patch1552: linux-2.6-ksm-updates.patch
 
 # nouveau + drm fixes
+Patch1810: drm-radeon-fixes.patch
 Patch1813: drm-radeon-pm.patch
 Patch1814: drm-nouveau.patch
 Patch1818: drm-i915-resume-force-mode.patch
@@ -678,6 +682,7 @@ Patch1821: drm-page-flip.patch
 Patch1824: drm-intel-next.patch
 Patch1825: drm-intel-pm.patch
 Patch1826: drm-r600-kms.patch
+Patch1827: drm-hush-vblank-warning.patch
 
 # vga arb
 Patch1900: linux-2.6-vga-arb.patch
@@ -709,6 +714,7 @@ Patch11010: via-hwmon-temp-sensor.patch
 
 # patches headed upstream
 Patch12010: linux-2.6-dell-laptop-rfkill-fix.patch
+Patch12011: linux-2.6-block-silently-error-unsupported-empty-barriers-too.patch
 
 Patch19997: xen.pvops.pre.patch
 Patch19998: xen.pvops.patch
@@ -1225,6 +1231,9 @@ ApplyPatch alsa-tell-user-that-stream-to
 # The input layer spews crap no-one cares about.
 ApplyPatch linux-2.6-input-kill-stupid-messages.patch
 
+# stop floppy.ko from autoloading during udev...
+ApplyPatch die-floppy-die.patch
+
 # Get away from having to poll Toshibas
 #ApplyPatch linux-2.6-input-fix-toshiba-hotkeys.patch
 
@@ -1274,13 +1283,19 @@ ApplyPatch hid-ignore-all-recent-imon-de
 
 # Add kernel KSM support
 ApplyPatch linux-2.6-ksm.patch
+ApplyPatch linux-2.6-ksm-updates.patch
 # Optimize KVM for KSM support
 ApplyPatch linux-2.6-ksm-kvm.patch
 
+# Fix block I/O errors in KVM
+ApplyPatch linux-2.6-block-silently-error-unsupported-empty-barriers-too.patch
+
 ApplyPatch linux-2.6-e1000-ich9.patch
 
 # Nouveau DRM + drm fixes
+ApplyPatch drm-radeon-fixes.patch
 ApplyPatch drm-r600-kms.patch
+ApplyPatch drm-hush-vblank-warning.patch
 
 ApplyPatch drm-nouveau.patch
 # pm broken on my thinkpad t60p - airlied
@@ -1793,7 +1808,12 @@ if [ `uname -i` == "x86_64" -o `uname -i
    [ -f /etc/sysconfig/kernel ]; then\
   /bin/sed -r -i -e 's/^DEFAULTKERNEL=%{-r*}$/DEFAULTKERNEL=kernel%{?-v:-%{-v*}}/' /etc/sysconfig/kernel || exit $?\
 fi}\
+%{expand:\
+%if %{with_dracut}\
+/sbin/new-kernel-pkg --package kernel%{?-v:-%{-v*}} --depmod --add-dracut-args --initrdfile=/boot/initrd-generic-%{KVERREL}%{?-v:.%{-v*}}.img --install %{KVERREL}%{?-v:.%{-v*}} || exit $?\
+%else\
 /sbin/new-kernel-pkg --package kernel%{?-v:-%{-v*}} --mkinitrd --depmod --install %{KVERREL}%{?-v:.%{-v*}} || exit $?\
+%endif}\
 #if [ -x /sbin/weak-modules ]\
 #then\
 #    /sbin/weak-modules --add-kernel %{KVERREL}%{?-v*} || exit $?\
@@ -1953,6 +1973,33 @@ fi
 # and build.
 
 %changelog
+* Sat Aug 08 2009 Michael Young <m.a.young at durham.ac.uk>
+- update pvops patch to latest rebase/master and current rawhide
+
+* Fri Aug 07 2009 Justin M. Forbes <jforbes at redhat.com>
+- Apply KSM updates from upstream
+
+* Fri Aug 07 2009 Hans de Goede <hdegoede at redhat.com>
+- When building a dracut generic initrd tell new-kernel-pkg to use that
+  instead of running mkinitrd
+
+* Fri Aug 07 2009 Dave Airlie <airlied at redhat.com> 2.6.31-0.139.rc5.git3
+- drm-r600-kms.patch - update r600 KMS
+- drm-radeon-fixes.patch - patches for queue to Linus
+
+* Thu Aug 06 2009 Justin M. Forbes <jforbes at redhat.com> 2.6.31-0.138.rc5.git3
+- Fix kvm virtio_blk errors (#514901)
+
+* Thu Aug 06 2009 Adam Jackson <ajax at redhat.com>
+- Hush DRM vblank warnings, they're constant (and harmless) under DRI2.
+
+* Thu Aug 06 2009 Dave Airlie <airlied at redhat.com> 2.6.31.0.134.rc5.git3
+- fixup vga arb warning at startup and handover between gpus
+
+* Thu Aug 06 2009 Kyle McMartin <kyle at redhat.com> 2.6.31.0.133.rc5.git3
+- die-floppy-die.patch: it's the 21st century, let's not rely on
+  steam powered technology.
+
 * Wed Aug 05 2009 Dave Airlie <airlied at redhat.com> 2.6.31.0.132.rc5.git3
 - revert-ftrace-powerpc-snafu.patch - fix ppc build
 

linux-2.6-ppc-perfctr-oops-fix.patch:
 mpc7450-pmu.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

Index: linux-2.6-ppc-perfctr-oops-fix.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/linux-2.6-ppc-perfctr-oops-fix.patch,v
retrieving revision 1.2.2.2
retrieving revision 1.2.2.3
diff -u -p -r1.2.2.2 -r1.2.2.3
--- linux-2.6-ppc-perfctr-oops-fix.patch	5 Aug 2009 23:00:17 -0000	1.2.2.2
+++ linux-2.6-ppc-perfctr-oops-fix.patch	8 Aug 2009 18:06:04 -0000	1.2.2.3
@@ -7,7 +7,7 @@ index 75ff47f..ea383c1 100644
  static int init_mpc7450_pmu(void)
  {
 -	if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
-+	if (cur_cpu_spec->oprofile_cpu_type &&
++	if (!cur_cpu_spec->oprofile_cpu_type ||
 +	    strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
  		return -ENODEV;
  

linux-2.6-vga-arb.patch:
 drivers/gpu/Makefile     |    2 
 drivers/gpu/vga/Kconfig  |   10 
 drivers/gpu/vga/Makefile |    1 
 drivers/gpu/vga/vgaarb.c | 1206 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.c        |   44 +
 drivers/video/Kconfig    |    2 
 include/linux/pci.h      |    2 
 include/linux/vgaarb.h   |  196 +++++++
 8 files changed, 1461 insertions(+), 2 deletions(-)

Index: linux-2.6-vga-arb.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/linux-2.6-vga-arb.patch,v
retrieving revision 1.6.2.2
retrieving revision 1.6.2.3
diff -u -p -r1.6.2.2 -r1.6.2.3
--- linux-2.6-vga-arb.patch	5 Aug 2009 23:00:17 -0000	1.6.2.2
+++ linux-2.6-vga-arb.patch	8 Aug 2009 18:06:04 -0000	1.6.2.3
@@ -1,4 +1,4 @@
-From a00c47b3e783fe9ebb871071d2472387451d9225 Mon Sep 17 00:00:00 2001
+From 83ec7b4c9fecfcffe396290f6e96ea5a60a59598 Mon Sep 17 00:00:00 2001
 From: Tiago Vignatti <tiago.vignatti at nokia.com>
 Date: Tue, 14 Jul 2009 15:57:29 +0300
 Subject: [PATCH] vga: implements VGA arbitration on Linux
@@ -15,18 +15,20 @@ balance pci get/put
 use the decodes count for userspace to get card
 count also if a gpu disables decodes move it to
 the next card
+do handover properly to next card
+optimise notify to only be done when something happens
 
 Signed-off-by: Tiago Vignatti <tiago.vignatti at nokia.com>
 ---
  drivers/gpu/Makefile     |    2 +-
  drivers/gpu/vga/Kconfig  |   10 +
  drivers/gpu/vga/Makefile |    1 +
- drivers/gpu/vga/vgaarb.c | 1195 ++++++++++++++++++++++++++++++++++++++++++++++
+ drivers/gpu/vga/vgaarb.c | 1206 ++++++++++++++++++++++++++++++++++++++++++++++
  drivers/pci/pci.c        |   44 ++
  drivers/video/Kconfig    |    2 +
  include/linux/pci.h      |    2 +
  include/linux/vgaarb.h   |  195 ++++++++
- 8 files changed, 1450 insertions(+), 1 deletions(-)
+ 8 files changed, 1461 insertions(+), 1 deletions(-)
  create mode 100644 drivers/gpu/vga/Kconfig
  create mode 100644 drivers/gpu/vga/Makefile
  create mode 100644 drivers/gpu/vga/vgaarb.c
@@ -64,10 +66,10 @@ index 0000000..7cc8c1e
 +obj-$(CONFIG_VGA_ARB)  += vgaarb.o
 diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
 new file mode 100644
-index 0000000..08ba44d
+index 0000000..199138f
 --- /dev/null
 +++ b/drivers/gpu/vga/vgaarb.c
-@@ -0,0 +1,1195 @@
+@@ -0,0 +1,1206 @@
 +/*
 + * vgaarb.c
 + *
@@ -476,7 +478,7 @@ index 0000000..08ba44d
 + * the arbiter's client decides if devices decodes or not legacy
 + * things.
 + */
-+static void vga_arbiter_add_pci_device(struct pci_dev *pdev)
++static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
 +{
 +	struct vga_device *vgadev;
 +	unsigned long flags;
@@ -486,7 +488,7 @@ index 0000000..08ba44d
 +
 +	/* Only deal with VGA class devices */
 +	if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
-+		return;
++		return false;
 +
 +	/* Allocate structure */
 +	vgadev = kmalloc(sizeof(struct vga_device), GFP_KERNEL);
@@ -496,7 +498,7 @@ index 0000000..08ba44d
 +		 * just do nothing, I'm not sure there is anything saner
 +		 * to be done
 +		 */
-+		return;
++		return false;
 +	}
 +
 +	memset(vgadev, 0, sizeof(*vgadev));
@@ -559,21 +561,25 @@ index 0000000..08ba44d
 +		vga_iostate_to_str(vgadev->locks));
 +
 +	spin_unlock_irqrestore(&vga_lock, flags);
-+	return;
++	return true;
 +fail:
 +	spin_unlock_irqrestore(&vga_lock, flags);
 +	kfree(vgadev);
++	return false;
 +}
 +
-+static void vga_arbiter_del_pci_device(struct pci_dev *pdev)
++static bool vga_arbiter_del_pci_device(struct pci_dev *pdev)
 +{
 +	struct vga_device *vgadev;
 +	unsigned long flags;
++	bool ret = true;
 +
 +	spin_lock_irqsave(&vga_lock, flags);
 +	vgadev = vgadev_find(pdev);
-+	if (vgadev == NULL)
++	if (vgadev == NULL) {
++		ret = false;
 +		goto bail;
++	}
 +
 +	if (vga_default == pdev) {
 +		pci_dev_put(vga_default);
@@ -596,6 +602,7 @@ index 0000000..08ba44d
 +bail:
 +	spin_unlock_irqrestore(&vga_lock, flags);
 +	kfree(vgadev);
++	return ret;
 +}
 +
 +/* this is called with the lock */
@@ -608,18 +615,22 @@ index 0000000..08ba44d
 +	old_decodes = vgadev->decodes;
 +	vgadev->decodes = new_decodes;
 +
-+	pr_info("vgaarb: device changed decodes: PCI:%s,olddecodes=%s,decodes=%s\n",
++	pr_info("vgaarb: device changed decodes: PCI:%s,olddecodes=%s,decodes=%s:owns=%s\n",
 +		pci_name(vgadev->pdev),
 +		vga_iostate_to_str(old_decodes),
-+		vga_iostate_to_str(vgadev->decodes));
++		vga_iostate_to_str(vgadev->decodes),
++		vga_iostate_to_str(vgadev->owns));
++
 +
 +	/* if we own the decodes we should move them along to
 +	   another card */
-+	if ((vgadev->owns & new_decodes) && (vga_count > 1)) {
-+		vgadev->owns &= new_decodes;
++	if ((vgadev->owns & old_decodes) && (vga_count > 1)) {
++		/* set us to own nothing */
++		vgadev->owns &= ~old_decodes;
 +		list_for_each_entry(new_vgadev, &vga_list, list) {
 +			if ((new_vgadev != vgadev) &&
 +			    (new_vgadev->decodes & VGA_RSRC_LEGACY_MASK)) {
++				pr_info("vgaarb: transferring owner from PCI:%s to PCI:%s\n", pci_name(vgadev->pdev), pci_name(new_vgadev->pdev));
 +				conflict = __vga_tryget(new_vgadev, VGA_RSRC_LEGACY_MASK);
 +				if (!conflict)
 +					__vga_put(new_vgadev, VGA_RSRC_LEGACY_MASK);
@@ -1209,6 +1220,7 @@ index 0000000..08ba44d
 +{
 +	struct device *dev = data;
 +	struct pci_dev *pdev = to_pci_dev(dev);
++	bool notify = false;
 +
 +	pr_devel("%s\n", __func__);
 +
@@ -1216,11 +1228,12 @@ index 0000000..08ba44d
 +	 * test this thing here, so someone needs to double check for the
 +	 * cases of hotplugable vga cards. */
 +	if (action == BUS_NOTIFY_ADD_DEVICE)
-+		vga_arbiter_add_pci_device(pdev);
++		notify = vga_arbiter_add_pci_device(pdev);
 +	else if (action == BUS_NOTIFY_DEL_DEVICE)
-+		vga_arbiter_del_pci_device(pdev);
++		notify = vga_arbiter_del_pci_device(pdev);
 +
-+	vga_arbiter_notify_clients();
++	if (notify)
++		vga_arbiter_notify_clients();
 +	return 0;
 +}
 +
@@ -1319,7 +1332,7 @@ index dbd0f94..d837606 100644
  static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
  spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED;
 diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
-index 8afcf08..f4ed145 100644
+index 3b54b39..a0d9ee1 100644
 --- a/drivers/video/Kconfig
 +++ b/drivers/video/Kconfig
 @@ -7,6 +7,8 @@ menu "Graphics support"
@@ -1546,5 +1559,5 @@ index 0000000..68229ce
 +
 +#endif /* LINUX_VGA_H */
 -- 
-1.5.4.1
+1.6.4
 

xen.pvops.patch:
 arch/x86/Kconfig                           |    4 
 arch/x86/include/asm/agp.h                 |   15 
 arch/x86/include/asm/e820.h                |    2 
 arch/x86/include/asm/i387.h                |    1 
 arch/x86/include/asm/io.h                  |   15 
 arch/x86/include/asm/io_apic.h             |    7 
 arch/x86/include/asm/microcode.h           |    9 
 arch/x86/include/asm/paravirt.h            |  718 -------------
 arch/x86/include/asm/paravirt_types.h      |  722 +++++++++++++
 arch/x86/include/asm/pci.h                 |    8 
 arch/x86/include/asm/pci_x86.h             |    2 
 arch/x86/include/asm/pgtable.h             |    3 
 arch/x86/include/asm/processor.h           |    4 
 arch/x86/include/asm/tlbflush.h            |    6 
 arch/x86/include/asm/xen/hypercall.h       |   36 
 arch/x86/include/asm/xen/interface.h       |    8 
 arch/x86/include/asm/xen/interface_32.h    |    5 
 arch/x86/include/asm/xen/interface_64.h    |   13 
 arch/x86/include/asm/xen/iommu.h           |   12 
 arch/x86/include/asm/xen/page.h            |   34 
 arch/x86/include/asm/xen/pci.h             |   13 
 arch/x86/kernel/Makefile                   |    1 
 arch/x86/kernel/acpi/boot.c                |   18 
 arch/x86/kernel/acpi/sleep.c               |    2 
 arch/x86/kernel/apic/io_apic.c             |   37 
 arch/x86/kernel/cpu/mtrr/Makefile          |    1 
 arch/x86/kernel/cpu/mtrr/amd.c             |    6 
 arch/x86/kernel/cpu/mtrr/centaur.c         |    6 
 arch/x86/kernel/cpu/mtrr/cyrix.c           |    6 
 arch/x86/kernel/cpu/mtrr/generic.c         |   10 
 arch/x86/kernel/cpu/mtrr/main.c            |   19 
 arch/x86/kernel/cpu/mtrr/mtrr.h            |   11 
 arch/x86/kernel/cpu/mtrr/xen.c             |  104 +
 arch/x86/kernel/e820.c                     |   30 
 arch/x86/kernel/ioport.c                   |   29 
 arch/x86/kernel/microcode_core.c           |    5 
 arch/x86/kernel/microcode_xen.c            |  195 +++
 arch/x86/kernel/paravirt.c                 |    1 
 arch/x86/kernel/pci-dma.c                  |    3 
 arch/x86/kernel/pci-swiotlb.c              |   30 
 arch/x86/kernel/process.c                  |   27 
 arch/x86/kernel/process_32.c               |   27 
 arch/x86/kernel/process_64.c               |   33 
 arch/x86/kernel/setup.c                    |    4 
 arch/x86/kernel/traps.c                    |   33 
 arch/x86/mm/init_32.c                      |   42 
 arch/x86/mm/pat.c                          |    2 
 arch/x86/mm/pgtable.c                      |   10 
 arch/x86/mm/tlb.c                          |   35 
 arch/x86/pci/Makefile                      |    1 
 arch/x86/pci/common.c                      |   18 
 arch/x86/pci/i386.c                        |    3 
 arch/x86/pci/init.c                        |    6 
 arch/x86/pci/xen.c                         |   51 
 arch/x86/xen/Kconfig                       |   33 
 arch/x86/xen/Makefile                      |    4 
 arch/x86/xen/apic.c                        |   60 +
 arch/x86/xen/enlighten.c                   |   63 +
 arch/x86/xen/mmu.c                         |  475 ++++++++-
 arch/x86/xen/pci-swiotlb.c                 |   53 +
 arch/x86/xen/pci.c                         |   86 +
 arch/x86/xen/setup.c                       |   62 +
 arch/x86/xen/smp.c                         |    2 
 arch/x86/xen/time.c                        |    2 
 arch/x86/xen/vga.c                         |   67 +
 arch/x86/xen/xen-ops.h                     |   19 
 block/blk-core.c                           |    2 
 drivers/acpi/acpica/hwsleep.c              |   17 
 drivers/acpi/sleep.c                       |   19 
 drivers/block/Kconfig                      |    1 
 drivers/char/agp/intel-agp.c               |   17 
 drivers/char/hvc_xen.c                     |   99 +
 drivers/net/Kconfig                        |    1 
 drivers/pci/Makefile                       |    2 
 drivers/pci/pci.h                          |    2 
 drivers/pci/xen-iommu.c                    |  332 ++++++
 drivers/xen/Kconfig                        |   32 
 drivers/xen/Makefile                       |   18 
 drivers/xen/acpi.c                         |   23 
 drivers/xen/balloon.c                      |  155 ++
 drivers/xen/biomerge.c                     |   14 
 drivers/xen/blkback/Makefile               |    3 
 drivers/xen/blkback/blkback.c              |  658 ++++++++++++
 drivers/xen/blkback/common.h               |  137 ++
 drivers/xen/blkback/interface.c            |  182 +++
 drivers/xen/blkback/vbd.c                  |  118 ++
 drivers/xen/blkback/xenbus.c               |  542 ++++++++++
 drivers/xen/events.c                       |  315 +++++-
 drivers/xen/grant-table.c                  |  103 +
 drivers/xen/netback/Makefile               |    3 
 drivers/xen/netback/common.h               |  221 ++++
 drivers/xen/netback/interface.c            |  401 +++++++
 drivers/xen/netback/netback.c              | 1513 +++++++++++++++++++++++++++++
 drivers/xen/netback/xenbus.c               |  454 ++++++++
 drivers/xen/pci.c                          |  116 ++
 drivers/xen/xenbus/Makefile                |    5 
 drivers/xen/xenbus/xenbus_comms.c          |    1 
 drivers/xen/xenbus/xenbus_probe.c          |  380 +------
 drivers/xen/xenbus/xenbus_probe.h          |   29 
 drivers/xen/xenbus/xenbus_probe_backend.c  |  298 +++++
 drivers/xen/xenbus/xenbus_probe_frontend.c |  292 +++++
 drivers/xen/xenfs/Makefile                 |    3 
 drivers/xen/xenfs/privcmd.c                |  403 +++++++
 drivers/xen/xenfs/super.c                  |   98 +
 drivers/xen/xenfs/xenfs.h                  |    3 
 drivers/xen/xenfs/xenstored.c              |   67 +
 include/asm-generic/pci.h                  |    2 
 include/linux/interrupt.h                  |    1 
 include/linux/page-flags.h                 |   18 
 include/linux/pci.h                        |    6 
 include/xen/Kbuild                         |    1 
 include/xen/acpi.h                         |   23 
 include/xen/balloon.h                      |    8 
 include/xen/blkif.h                        |  122 ++
 include/xen/events.h                       |   27 
 include/xen/grant_table.h                  |   43 
 include/xen/interface/grant_table.h        |   22 
 include/xen/interface/memory.h             |   92 +
 include/xen/interface/physdev.h            |   21 
 include/xen/interface/platform.h           |  222 ++++
 include/xen/interface/xen.h                |   43 
 include/xen/privcmd.h                      |   80 +
 include/xen/swiotlb.h                      |   18 
 include/xen/xen-ops.h                      |   11 
 include/xen/xenbus.h                       |    2 
 kernel/irq/manage.c                        |    3 
 lib/swiotlb.c                              |    5 
 mm/page_alloc.c                            |   14 
 128 files changed, 9923 insertions(+), 1309 deletions(-)

Index: xen.pvops.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/Attic/xen.pvops.patch,v
retrieving revision 1.1.2.29
retrieving revision 1.1.2.30
diff -u -p -r1.1.2.29 -r1.1.2.30
--- xen.pvops.patch	15 Jul 2009 22:20:03 -0000	1.1.2.29
+++ xen.pvops.patch	8 Aug 2009 18:06:04 -0000	1.1.2.30
@@ -50,6 +50,19 @@ index 9825cd6..d972b14 100644
 +	dma_free_coherent(NULL, PAGE_SIZE<<(order), (table), virt_to_bus(table))
  
  #endif /* _ASM_X86_AGP_H */
+diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
+index 7ecba4d..df06646 100644
+--- a/arch/x86/include/asm/e820.h
++++ b/arch/x86/include/asm/e820.h
+@@ -109,6 +109,8 @@ extern void reserve_early(u64 start, u64 end, char *name);
+ extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
+ extern void free_early(u64 start, u64 end);
+ extern void early_res_to_bootmem(u64 start, u64 end);
++extern u64 early_res_next_free(u64 start);
++extern u64 early_res_next_reserved(u64 addr, u64 max);
+ extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
+ 
+ extern unsigned long e820_end_of_ram_pfn(void);
 diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
 index 175adf5..2e75292 100644
 --- a/arch/x86/include/asm/i387.h
@@ -96,10 +109,10 @@ index 7373932..c75e9eb 100644
  
  #endif /* _ASM_X86_IO_H */
 diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
-index daf866e..5881493 100644
+index 330ee80..70f5ea9 100644
 --- a/arch/x86/include/asm/io_apic.h
 +++ b/arch/x86/include/asm/io_apic.h
-@@ -169,6 +169,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+@@ -170,6 +170,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
  extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
  
  extern void probe_nr_irqs_gsi(void);
@@ -107,7 +120,7 @@ index daf866e..5881493 100644
  
  extern int setup_ioapic_entry(int apic, int irq,
  			      struct IO_APIC_route_entry *entry,
-@@ -184,4 +185,10 @@ static inline void ioapic_init_mappings(void)	{ }
+@@ -186,4 +187,10 @@ static inline void ioapic_insert_resources(void) { }
  static inline void probe_nr_irqs_gsi(void)	{ }
  #endif
  
@@ -118,6 +131,24 @@ index daf866e..5881493 100644
 +
 +
  #endif /* _ASM_X86_IO_APIC_H */
+diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
+index ef51b50..e15fca1 100644
+--- a/arch/x86/include/asm/microcode.h
++++ b/arch/x86/include/asm/microcode.h
+@@ -55,4 +55,13 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
+ }
+ #endif
+ 
++#ifdef CONFIG_MICROCODE_XEN
++extern struct microcode_ops * __init init_xen_microcode(void);
++#else
++static inline struct microcode_ops * __init init_xen_microcode(void)
++{
++	return NULL;
++}
++#endif
++
+ #endif /* _ASM_X86_MICROCODE_H */
 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
 index 4fb37c8..e19ffe3 100644
 --- a/arch/x86/include/asm/paravirt.h
@@ -1889,7 +1920,7 @@ index 0000000..75df312
 +#endif
 +
 diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
-index 018a0a4..f334014 100644
+index 018a0a4..ebc174c 100644
 --- a/arch/x86/include/asm/xen/page.h
 +++ b/arch/x86/include/asm/xen/page.h
 @@ -5,6 +5,7 @@
@@ -1900,7 +1931,7 @@ index 018a0a4..f334014 100644
  
  #include <asm/uaccess.h>
  #include <asm/page.h>
-@@ -35,6 +36,8 @@ typedef struct xpaddr {
+@@ -35,16 +36,25 @@ typedef struct xpaddr {
  #define MAX_DOMAIN_PAGES						\
      ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
  
@@ -1909,7 +1940,25 @@ index 018a0a4..f334014 100644
  
  extern unsigned long get_phys_to_machine(unsigned long pfn);
  extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-@@ -62,10 +65,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
+ 
+ static inline unsigned long pfn_to_mfn(unsigned long pfn)
+ {
++	unsigned long mfn;
++
+ 	if (xen_feature(XENFEAT_auto_translated_physmap))
+ 		return pfn;
+ 
+-	return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
++	mfn = get_phys_to_machine(pfn);
++
++	if (unlikely(mfn == INVALID_P2M_ENTRY))
++		return mfn;
++
++	return mfn & ~FOREIGN_FRAME_BIT;
+ }
+ 
+ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
+@@ -62,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
  	if (xen_feature(XENFEAT_auto_translated_physmap))
  		return mfn;
  
@@ -1921,7 +1970,7 @@ index 018a0a4..f334014 100644
  
  	pfn = 0;
  	/*
-@@ -112,13 +113,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
+@@ -112,13 +120,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
   */
  static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
  {
@@ -1937,7 +1986,23 @@ index 018a0a4..f334014 100644
  	return pfn;
  }
  
-@@ -163,6 +160,7 @@ static inline pte_t __pte_ma(pteval_t x)
+@@ -143,6 +147,15 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
+ 	return pte;
+ }
+ 
++static inline unsigned long pte_invalid_mfn(void)
++{
++	pte_t pte;
++
++	pte.pte = ((phys_addr_t)INVALID_P2M_ENTRY << PAGE_SHIFT);
++
++	return pte_mfn(pte);
++}
++
+ static inline pteval_t pte_val_ma(pte_t pte)
+ {
+ 	return pte.pte;
+@@ -163,6 +176,7 @@ static inline pte_t __pte_ma(pteval_t x)
  
  #define pgd_val_ma(x)	((x).pgd)
  
@@ -1964,6 +2029,18 @@ index 0000000..0563fc6
 +#endif
 +
 +#endif	/* _ASM_X86_XEN_PCI_H */
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index 430d5b2..96f9ecb 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -108,6 +108,7 @@ obj-$(CONFIG_OLPC)		+= olpc.o
+ microcode-y				:= microcode_core.o
+ microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
+ microcode-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
++microcode-$(CONFIG_MICROCODE_XEN)	+= microcode_xen.o
+ obj-$(CONFIG_MICROCODE)			+= microcode.o
+ 
+ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
 index 6b8ca3a..d47c54f 100644
 --- a/arch/x86/kernel/acpi/boot.c
@@ -2030,7 +2107,7 @@ index ca93638..9eff23c 100644
  #include "sleep.h"
  
 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
-index 90b5e6e..7011798 100644
+index d2ed6c5..205e277 100644
 --- a/arch/x86/kernel/apic/io_apic.c
 +++ b/arch/x86/kernel/apic/io_apic.c
 @@ -63,8 +63,10 @@
@@ -2089,7 +2166,7 @@ index 90b5e6e..7011798 100644
  
  	if (sis_apic_bug)
  		writel(reg, &io_apic->index);
-@@ -3851,6 +3872,11 @@ void __init probe_nr_irqs_gsi(void)
+@@ -3854,6 +3875,11 @@ void __init probe_nr_irqs_gsi(void)
  	printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
  }
  
@@ -2101,7 +2178,7 @@ index 90b5e6e..7011798 100644
  #ifdef CONFIG_SPARSE_IRQ
  int __init arch_probe_nr_irqs(void)
  {
-@@ -4144,6 +4170,11 @@ void __init ioapic_init_mappings(void)
+@@ -4147,6 +4173,11 @@ void __init ioapic_init_mappings(void)
  	struct resource *ioapic_res;
  	int i;
  
@@ -2404,6 +2481,47 @@ index 0000000..c4e7484
 +	    cpu_has_centaur_mcr)
 +		mtrr_if = &xen_mtrr_ops;
 +}
+diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
+index 5cb5725..1e3bd20 100644
+--- a/arch/x86/kernel/e820.c
++++ b/arch/x86/kernel/e820.c
+@@ -750,6 +750,36 @@ static int __init find_overlapped_early(u64 start, u64 end)
+ 	return i;
+ }
+ 
++u64 __init early_res_next_free(u64 addr)
++{
++	int i;
++	u64 end = addr;
++	struct early_res *r;
++
++	for (i = 0; i < MAX_EARLY_RES; i++) {
++		r = &early_res[i];
++		if (addr >= r->start && addr < r->end) {
++			end = r->end;
++			break;
++		}
++	}
++	return end;
++}
++
++u64 __init early_res_next_reserved(u64 addr, u64 max)
++{
++	int i;
++	struct early_res *r;
++	u64 next_res = max;
++
++	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
++		r = &early_res[i];
++		if ((r->start >= addr) && (r->start < next_res))
++			next_res = r->start;
++	}
++	return next_res;
++}
++
+ /*
+  * Drop the i-th range from the early reservation map,
+  * by copying any higher ranges down one over it, and
 diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
 index 99c4d30..0a421d3 100644
 --- a/arch/x86/kernel/ioport.c
@@ -2469,6 +2587,230 @@ index 99c4d30..0a421d3 100644
  
  	return 0;
  }
+diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
+index 9371448..4cb6bbd 100644
+--- a/arch/x86/kernel/microcode_core.c
++++ b/arch/x86/kernel/microcode_core.c
+@@ -81,6 +81,7 @@
+ #include <linux/fs.h>
+ #include <linux/mm.h>
+ 
++#include <asm/xen/hypervisor.h>
+ #include <asm/microcode.h>
+ #include <asm/processor.h>
+ 
+@@ -503,7 +504,9 @@ static int __init microcode_init(void)
+ 	struct cpuinfo_x86 *c = &cpu_data(0);
+ 	int error;
+ 
+-	if (c->x86_vendor == X86_VENDOR_INTEL)
++	if (xen_pv_domain())
++		microcode_ops = init_xen_microcode();
++	else if (c->x86_vendor == X86_VENDOR_INTEL)
+ 		microcode_ops = init_intel_microcode();
+ 	else if (c->x86_vendor == X86_VENDOR_AMD)
+ 		microcode_ops = init_amd_microcode();
+diff --git a/arch/x86/kernel/microcode_xen.c b/arch/x86/kernel/microcode_xen.c
+new file mode 100644
+index 0000000..ab9d4e1
+--- /dev/null
++++ b/arch/x86/kernel/microcode_xen.c
+@@ -0,0 +1,195 @@
++/*
++ * Xen microcode update driver
++ *
++ * Xen does most of the work here.  We just pass the whole blob into
++ * Xen, and it will apply it to all CPUs as appropriate.  Xen will
++ * worry about how different CPU models are actually updated.
++ */
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/firmware.h>
++#include <linux/vmalloc.h>
++#include <linux/uaccess.h>
++
++#include <asm/microcode.h>
++
++#include <xen/interface/platform.h>
++#include <xen/interface/xen.h>
++
++#include <asm/xen/hypercall.h>
++#include <asm/xen/hypervisor.h>
++
++MODULE_DESCRIPTION("Xen microcode update driver");
++MODULE_LICENSE("GPL");
++
++struct xen_microcode {
++	size_t len;
++	char data[0];
++};
++
++static int xen_microcode_update(int cpu)
++{
++	int err;
++	struct xen_platform_op op;
++	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
++	struct xen_microcode *uc = uci->mc;
++
++	if (uc == NULL || uc->len == 0) {
++		/*
++		 * We do all cpus at once, so we don't need to do
++		 * other cpus explicitly (besides, these vcpu numbers
++		 * have no relationship to underlying physical cpus).
++		 */
++		return 0;
++	}
++
++	op.cmd = XENPF_microcode_update;
++	set_xen_guest_handle(op.u.microcode.data, uc->data);
++	op.u.microcode.length = uc->len;
++
++	err = HYPERVISOR_dom0_op(&op);
++
++	if (err != 0)
++		printk(KERN_WARNING "microcode_xen: microcode update failed: %d\n", err);
++
++	return err;
++}
++
++static enum ucode_state xen_request_microcode_fw(int cpu, struct device *device)
++{
++	char name[30];
++	struct cpuinfo_x86 *c = &cpu_data(cpu);
++	const struct firmware *firmware;
++	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
++	enum ucode_state ret;
++	struct xen_microcode *uc;
++	size_t size;
++	int err;
++
++	if (c->x86_vendor == X86_VENDOR_INTEL) {
++		BUG_ON(cpu != raw_smp_processor_id());
++		snprintf(name, sizeof(name), "intel-ucode/%02x-%02x-%02x",
++			 c->x86, c->x86_model, c->x86_mask);
++	} else if (c->x86_vendor == X86_VENDOR_AMD) {
++		snprintf(name, sizeof(name), "amd-ucode/microcode_amd.bin");
++	} else
++		return UCODE_NFOUND;
++
++	err = request_firmware(&firmware, name, device);
++	if (err) {
++		pr_debug("microcode: data file %s load failed\n", name);
++		return UCODE_NFOUND;
++	}
++
++	/*
++	 * Only bother getting real firmware for cpu 0; the others get
++	 * dummy placeholders.
++	 */
++	if (cpu == 0)
++		size = firmware->size;
++	else
++		size = 0;
++
++	if (uci->mc != NULL) {
++		vfree(uci->mc);
++		uci->mc = NULL;
++	}
++
++	ret = UCODE_ERROR;
++	uc = vmalloc(sizeof(*uc) + size);
++	if (uc == NULL)
++		goto out;
++
++	ret = UCODE_OK;
++	uc->len = size;
++	memcpy(uc->data, firmware->data, uc->len);
++
++	uci->mc = uc;
++
++out:
++	release_firmware(firmware);
++
++	return ret;
++}
++
++static enum ucode_state xen_request_microcode_user(int cpu,
++						   const void __user *buf, size_t size)
++{
++	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
++	struct xen_microcode *uc;
++	enum ucode_state ret;
++	size_t unread;
++
++	if (cpu != 0) {
++		/* No real firmware for non-zero cpus; just store a
++		   placeholder */
++		size = 0;
++	}
++
++	if (uci->mc != NULL) {
++		vfree(uci->mc);
++		uci->mc = NULL;
++	}
++
++	ret = UCODE_ERROR;
++	uc = vmalloc(sizeof(*uc) + size);
++	if (uc == NULL)
++		goto out;
++
++	uc->len = size;
++
++	ret = UCODE_NFOUND;
++
++	/* XXX This sporadically returns uncopied bytes, so we return
++	   EFAULT.  As far as I can see, the usermode code
++	   (microcode_ctl) isn't doing anything wrong... */
++	unread = copy_from_user(uc->data, buf, size);
++
++	if (unread != 0) {
++		printk(KERN_WARNING "failed to read %zd of %zd bytes at %p -> %p\n",
++		       unread, size, buf, uc->data);
++		goto out;
++	}
++
++	ret = UCODE_OK;
++
++out:
++	if (ret == 0)
++		uci->mc = uc;
++	else
++		vfree(uc);
++
++	return ret;
++}
++
++static void xen_microcode_fini_cpu(int cpu)
++{
++	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
++
++	vfree(uci->mc);
++	uci->mc = NULL;
++}
++
++static int xen_collect_cpu_info(int cpu, struct cpu_signature *sig)
++{
++	sig->sig = 0;
++	sig->pf = 0;
++	sig->rev = 0;
++
++	return 0;
++}
++
++static struct microcode_ops microcode_xen_ops = {
++	.request_microcode_user		  = xen_request_microcode_user,
++	.request_microcode_fw             = xen_request_microcode_fw,
++	.collect_cpu_info                 = xen_collect_cpu_info,
++	.apply_microcode                  = xen_microcode_update,
++	.microcode_fini_cpu               = xen_microcode_fini_cpu,
++};
++
++struct microcode_ops * __init init_xen_microcode(void)
++{
++	if (!xen_initial_domain())
++		return NULL;
++	return &microcode_xen_ops;
++}
 diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
 index 70ec9b9..cef3d70 100644
 --- a/arch/x86/kernel/paravirt.c
@@ -2726,7 +3068,7 @@ index ebefb54..a28279d 100644
  }
  
 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index de2cab1..3539750 100644
+index 63f32d2..86bef0f 100644
 --- a/arch/x86/kernel/setup.c
 +++ b/arch/x86/kernel/setup.c
 @@ -87,6 +87,7 @@
@@ -2737,7 +3079,7 @@ index de2cab1..3539750 100644
  
  #include <asm/system.h>
  #include <asm/vsyscall.h>
-@@ -931,6 +932,9 @@ void __init setup_arch(char **cmdline_p)
+@@ -944,6 +945,9 @@ void __init setup_arch(char **cmdline_p)
  
  	initmem_init(0, max_pfn);
  
@@ -2799,6 +3141,73 @@ index 5204332..22a5a6d 100644
  }
  EXPORT_SYMBOL_GPL(math_state_restore);
  
+diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
+index 3cd7711..f16903d 100644
+--- a/arch/x86/mm/init_32.c
++++ b/arch/x86/mm/init_32.c
+@@ -430,22 +430,45 @@ static int __init add_highpages_work_fn(unsigned long start_pfn,
+ {
+ 	int node_pfn;
+ 	struct page *page;
++	phys_addr_t chunk_end, chunk_max;
+ 	unsigned long final_start_pfn, final_end_pfn;
+-	struct add_highpages_data *data;
+-
+-	data = (struct add_highpages_data *)datax;
++	struct add_highpages_data *data = (struct add_highpages_data *)datax;
+ 
+ 	final_start_pfn = max(start_pfn, data->start_pfn);
+ 	final_end_pfn = min(end_pfn, data->end_pfn);
+ 	if (final_start_pfn >= final_end_pfn)
+ 		return 0;
+ 
+-	for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
+-	     node_pfn++) {
+-		if (!pfn_valid(node_pfn))
+-			continue;
+-		page = pfn_to_page(node_pfn);
+-		add_one_highpage_init(page, node_pfn);
++	chunk_end = PFN_PHYS(final_start_pfn);
++	chunk_max = PFN_PHYS(final_end_pfn);
++
++	/*
++	 * Check for reserved areas.
++	 */
++	for (;;) {
++		phys_addr_t chunk_start;
++		chunk_start = early_res_next_free(chunk_end);
++		
++		/*
++		 * Reserved area. Just count high mem pages.
++		 */
++		for (node_pfn = PFN_DOWN(chunk_end);
++		     node_pfn < PFN_DOWN(chunk_start); node_pfn++) {
++			if (pfn_valid(node_pfn))
++				totalhigh_pages++;
++		}
++
++		if (chunk_start >= chunk_max)
++			break;
++
++		chunk_end = early_res_next_reserved(chunk_start, chunk_max);
++		for (node_pfn = PFN_DOWN(chunk_start);
++		     node_pfn < PFN_DOWN(chunk_end); node_pfn++) {
++			if (!pfn_valid(node_pfn))
++				continue;
++			page = pfn_to_page(node_pfn);
++			add_one_highpage_init(page, node_pfn);
++		}
+ 	}
+ 
+ 	return 0;
+@@ -459,7 +482,6 @@ void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
+ 
+ 	data.start_pfn = start_pfn;
+ 	data.end_pfn = end_pfn;
+-
+ 	work_with_active_regions(nid, add_highpages_work_fn, &data);
+ }
+ 
 diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
 index e6718bb..f03f6dc 100644
 --- a/arch/x86/mm/pat.c
@@ -2813,7 +3222,7 @@ index e6718bb..f03f6dc 100644
  
  #ifdef CONFIG_STRICT_DEVMEM
 diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
-index 8e43bdd..e68aea6 100644
+index ed34f5e..9c58425 100644
 --- a/arch/x86/mm/pgtable.c
 +++ b/arch/x86/mm/pgtable.c
 @@ -6,6 +6,16 @@
@@ -2970,10 +3379,10 @@ index 2202b62..6a522c2 100644
  
  	if (pci_bf_sort >= pci_force_bf)
 diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
-index 0fb56db..181f9eb 100644
+index 52e62e5..373b18e 100644
 --- a/arch/x86/pci/i386.c
 +++ b/arch/x86/pci/i386.c
-@@ -275,6 +275,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+@@ -282,6 +282,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
  		return -EINVAL;
  
  	prot = pgprot_val(vma->vm_page_prot);
@@ -3062,7 +3471,7 @@ index 0000000..1b922aa
 +}
 +
 diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
-index b83e119..de662fa 100644
+index b83e119..951c924 100644
 --- a/arch/x86/xen/Kconfig
 +++ b/arch/x86/xen/Kconfig
 @@ -6,6 +6,7 @@ config XEN
@@ -3073,7 +3482,7 @@ index b83e119..de662fa 100644
  	depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
  	depends on X86_CMPXCHG && X86_TSC
  	help
-@@ -36,3 +37,31 @@ config XEN_DEBUG_FS
+@@ -36,3 +37,35 @@ config XEN_DEBUG_FS
  	help
  	  Enable statistics output and various tuning options in debugfs.
  	  Enabling this option may incur a significant performance overhead.
@@ -3105,6 +3514,11 @@ index b83e119..de662fa 100644
 +       help
 +         Enable support for passing PCI devices through to
 +	 unprivileged domains. (COMPLETELY UNTESTED)
++
++config MICROCODE_XEN
++       def_bool y
++       depends on XEN_DOM0 && MICROCODE
+\ No newline at end of file
 diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
 index 172438f..6d697e5 100644
 --- a/arch/x86/xen/Makefile
@@ -3185,7 +3599,7 @@ index 0000000..496f07d
 +#endif
 +}
 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
-index 0a1700a..78970be 100644
+index 0a1700a..6ba04d4 100644
 --- a/arch/x86/xen/enlighten.c
 +++ b/arch/x86/xen/enlighten.c
 @@ -64,6 +64,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
@@ -3301,7 +3715,7 @@ index 0a1700a..78970be 100644
  
  	init_mm.pgd = pgd;
  
-@@ -1054,6 +1089,15 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1054,9 +1089,21 @@ asmlinkage void __init xen_start_kernel(void)
  	if (xen_feature(XENFEAT_supervisor_mode_kernel))
  		pv_info.kernel_rpl = 0;
  
@@ -3317,7 +3731,13 @@ index 0a1700a..78970be 100644
  	/* set the limit of our address space */
  	xen_reserve_top();
  
-@@ -1075,6 +1119,16 @@ asmlinkage void __init xen_start_kernel(void)
++	/* fixup p2m entries uninitialized by domain builder */
++	xen_fix_mfn_list();
++
+ #ifdef CONFIG_X86_32
+ 	/* set up basic CPUID stuff */
+ 	cpu_detect(&new_cpu_data);
+@@ -1075,6 +1122,16 @@ asmlinkage void __init xen_start_kernel(void)
  		add_preferred_console("xenboot", 0, NULL);
  		add_preferred_console("tty", 0, NULL);
  		add_preferred_console("hvc", 0, NULL);
@@ -3335,7 +3755,7 @@ index 0a1700a..78970be 100644
  
  	xen_raw_console_write("about to get started...\n");
 diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index 4ceb285..d4c1f78 100644
+index 4ceb285..e23f89f 100644
 --- a/arch/x86/xen/mmu.c
 +++ b/arch/x86/xen/mmu.c
 @@ -50,7 +50,9 @@
@@ -3370,7 +3790,27 @@ index 4ceb285..d4c1f78 100644
  #ifdef CONFIG_XEN_DEBUG_FS
  
  static struct {
-@@ -315,6 +325,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
+@@ -226,6 +236,19 @@ void __init xen_build_dynamic_phys_to_machine(void)
+ 	xen_build_mfn_list_list();
+ }
+ 
++/*
++ * P2M entries higher than xen_start_info->nr_pages allocated by the
++ * domain builder are uninitialized. Set them to INVALID_P2M_ENTRY.
++ */
++void __init xen_fix_mfn_list(void)
++{
++	unsigned idx;
++	unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
++
++	for (idx = p2m_index(max_pfn); idx < P2M_ENTRIES_PER_PAGE; idx++)
++		p2m_top[p2m_top_index(max_pfn)][idx] = INVALID_P2M_ENTRY;
++}
++
+ unsigned long get_phys_to_machine(unsigned long pfn)
+ {
+ 	unsigned topidx, idx;
+@@ -315,6 +338,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
  
  	return PFN_DOWN(maddr.maddr);
  }
@@ -3378,7 +3818,7 @@ index 4ceb285..d4c1f78 100644
  
  xmaddr_t arbitrary_virt_to_machine(void *vaddr)
  {
-@@ -376,6 +387,34 @@ static bool xen_page_pinned(void *ptr)
+@@ -376,6 +400,34 @@ static bool xen_page_pinned(void *ptr)
  	return PagePinned(page);
  }
  
@@ -3413,7 +3853,7 @@ index 4ceb285..d4c1f78 100644
  static void xen_extend_mmu_update(const struct mmu_update *update)
  {
  	struct multicall_space mcs;
-@@ -452,6 +491,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
+@@ -452,6 +504,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
  void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
  		    pte_t *ptep, pte_t pteval)
  {
@@ -3425,7 +3865,7 @@ index 4ceb285..d4c1f78 100644
  	ADD_STATS(set_pte_at, 1);
  //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
  	ADD_STATS(set_pte_at_current, mm == current->mm);
-@@ -522,8 +566,25 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
+@@ -522,8 +579,25 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
  	return val;
  }
  
@@ -3451,7 +3891,7 @@ index 4ceb285..d4c1f78 100644
  	return pte_mfn_to_pfn(pte.pte);
  }
  PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
-@@ -536,7 +597,22 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
+@@ -536,7 +610,22 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
  
  pte_t xen_make_pte(pteval_t pte)
  {
@@ -3475,7 +3915,7 @@ index 4ceb285..d4c1f78 100644
  	return native_make_pte(pte);
  }
  PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-@@ -592,6 +668,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
+@@ -592,6 +681,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
  
  void xen_set_pte(pte_t *ptep, pte_t pte)
  {
@@ -3487,7 +3927,7 @@ index 4ceb285..d4c1f78 100644
  	ADD_STATS(pte_update, 1);
  //	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
  	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-@@ -608,6 +689,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
+@@ -608,6 +702,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
  #ifdef CONFIG_X86_PAE
  void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
  {
@@ -3499,7 +3939,7 @@ index 4ceb285..d4c1f78 100644
  	set_64bit((u64 *)ptep, native_pte_val(pte));
  }
  
-@@ -1285,6 +1371,13 @@ static void xen_flush_tlb_single(unsigned long addr)
+@@ -1285,6 +1384,13 @@ static void xen_flush_tlb_single(unsigned long addr)
  	preempt_enable();
  }
  
@@ -3513,14 +3953,27 @@ index 4ceb285..d4c1f78 100644
  static void xen_flush_tlb_others(const struct cpumask *cpus,
  				 struct mm_struct *mm, unsigned long va)
  {
-@@ -1444,10 +1537,17 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
- #ifdef CONFIG_X86_32
- static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+@@ -1441,13 +1547,29 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
+ }
+ #endif
+ 
+-#ifdef CONFIG_X86_32
+-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
++static __init pte_t filter_pte(pte_t *ptep, pte_t pte)
  {
 -	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
 -	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
++#ifdef CONFIG_X86_32
 +	pte_t oldpte = *ptep;
++#endif
 +
++	if ((pte_flags(pte) & _PAGE_PRESENT) &&
++	    (pte_mfn(pte) == pte_invalid_mfn())) {
++		/* Don't allow pre-balloned mappings to be set */
++		pte = __pte_ma(0);
++	}
++
++#ifdef CONFIG_X86_32
 +	if (pte_flags(oldpte) & _PAGE_PRESENT) {
 +		/* Don't allow existing IO mappings to be overridden */
 +		if (pte_flags(oldpte) & _PAGE_IOMAP)
@@ -3530,10 +3983,24 @@ index 4ceb285..d4c1f78 100644
  		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
  			       pte_val_ma(pte));
 +	}
++#endif
  
  	return pte;
  }
-@@ -1616,6 +1716,7 @@ static void *m2v(phys_addr_t maddr)
+@@ -1456,11 +1578,10 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+    doesn't allow RO pagetable pages to be remapped RW */
+ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
+ {
+-	pte = mask_rw_pte(ptep, pte);
++	pte = filter_pte(ptep, pte);
+ 
+ 	xen_set_pte(ptep, pte);
+ }
+-#endif
+ 
+ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+ {
+@@ -1616,6 +1737,7 @@ static void *m2v(phys_addr_t maddr)
  	return __ka(m2p(maddr));
  }
  
@@ -3541,7 +4008,7 @@ index 4ceb285..d4c1f78 100644
  static void set_page_prot(void *addr, pgprot_t prot)
  {
  	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
-@@ -1671,6 +1772,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+@@ -1671,6 +1793,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
  	set_page_prot(pmd, PAGE_KERNEL_RO);
  }
  
@@ -3562,7 +4029,7 @@ index 4ceb285..d4c1f78 100644
  #ifdef CONFIG_X86_64
  static void convert_pfn_mfn(void *v)
  {
-@@ -1762,6 +1877,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1762,6 +1898,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
  					 unsigned long max_pfn)
  {
  	pmd_t *kernel_pmd;
@@ -3570,7 +4037,7 @@ index 4ceb285..d4c1f78 100644
  
  	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
  				  xen_start_info->nr_pt_frames * PAGE_SIZE +
-@@ -1773,6 +1889,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1773,6 +1910,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
  	xen_map_identity_early(level2_kernel_pgt, max_pfn);
  
  	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
@@ -3591,7 +4058,7 @@ index 4ceb285..d4c1f78 100644
  	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
  			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
  
-@@ -1824,9 +1954,25 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+@@ -1824,9 +1975,25 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  		pte = pfn_pte(phys, prot);
  		break;
  
@@ -3618,7 +4085,7 @@ index 4ceb285..d4c1f78 100644
  	}
  
  	__native_set_fixmap(idx, pte);
-@@ -1841,6 +1987,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+@@ -1841,6 +2008,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  #endif
  }
  
@@ -3648,7 +4115,19 @@ index 4ceb285..d4c1f78 100644
  __init void xen_post_allocator_init(void)
  {
  	pv_mmu_ops.set_pte = xen_set_pte;
-@@ -1955,6 +2124,271 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
+@@ -1906,11 +2096,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
+ 	.kmap_atomic_pte = xen_kmap_atomic_pte,
+ #endif
+ 
+-#ifdef CONFIG_X86_64
+-	.set_pte = xen_set_pte,
+-#else
+ 	.set_pte = xen_set_pte_init,
+-#endif
+ 	.set_pte_at = xen_set_pte_at,
+ 	.set_pmd = xen_set_pmd_hyper,
+ 
+@@ -1955,6 +2141,271 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
  };
  
  
@@ -4072,7 +4551,7 @@ index 0000000..07b59fe
 +	}
 +}
 diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
-index ad0047f..2439456 100644
+index ad0047f..c0304a1 100644
 --- a/arch/x86/xen/setup.c
 +++ b/arch/x86/xen/setup.c
 @@ -19,6 +19,7 @@
@@ -4083,15 +4562,19 @@ index ad0047f..2439456 100644
  #include <xen/interface/physdev.h>
  #include <xen/features.h>
  
-@@ -36,21 +37,62 @@ extern void xen_syscall32_target(void);
+@@ -36,21 +37,63 @@ extern void xen_syscall32_target(void);
  /**
   * machine_specific_memory_setup - Hook for machine specific memory setup.
   **/
 -
  char * __init xen_memory_setup(void)
  {
- 	unsigned long max_pfn = xen_start_info->nr_pages;
-+	unsigned long long mem_end;
+-	unsigned long max_pfn = xen_start_info->nr_pages;
+-
+-	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
++	unsigned long nrpages = min(xen_start_info->nr_pages, MAX_DOMAIN_PAGES);
++	unsigned long long mem_limit = PFN_PHYS((u64)MAX_DOMAIN_PAGES);
++	unsigned long long mem_max = 0;
 +	int rc;
 +	struct xen_memory_map memmap;
 +	/*
@@ -4103,9 +4586,6 @@ index ad0047f..2439456 100644
 +		XENMEM_machine_memory_map :
 +		XENMEM_memory_map;
 +	int i;
- 
- 	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
-+	mem_end = PFN_PHYS((u64)max_pfn);
 +
 +	memmap.nr_entries = E820MAX;
 +	set_xen_guest_handle(memmap.buffer, map);
@@ -4114,7 +4594,7 @@ index ad0047f..2439456 100644
 +	if (rc == -ENOSYS) {
 +		memmap.nr_entries = 1;
 +		map[0].addr = 0ULL;
-+		map[0].size = mem_end;
++		map[0].size = xen_start_info->nr_pages;
 +		/* 8MB slack (to balance backend allocations). */
 +		map[0].size += 8ULL << 20;
 +		map[0].type = E820_RAM;
@@ -4128,12 +4608,15 @@ index ad0047f..2439456 100644
 +	for (i = 0; i < memmap.nr_entries; i++) {
 +		unsigned long long end = map[i].addr + map[i].size;
 +		if (map[i].type == E820_RAM) {
-+			if (map[i].addr > mem_end)
++			if (map[i].addr > mem_limit)
 +				continue;
-+			if (end > mem_end) {
-+				/* Truncate region to max_mem. */
-+				map[i].size -= end - mem_end;
++			if (end > mem_limit) {
++				/* Truncate region to mem_limit. */
++				map[i].size -= end - mem_limit;
++				end = mem_limit;
 +			}
++			if (end > mem_max)
++				mem_max = end;
 +		}
 +		if (map[i].size > 0)
 +			e820_add_region(map[i].addr, map[i].size, map[i].type);
@@ -4149,7 +4632,19 @@ index ad0047f..2439456 100644
  	 */
  	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
  			E820_RESERVED);
-@@ -188,7 +230,5 @@ void __init xen_arch_setup(void)
+@@ -65,6 +108,11 @@ char * __init xen_memory_setup(void)
+ 		      __pa(xen_start_info->pt_base),
+ 			"XEN START INFO");
+ 
++	/*
++	 * Reserve extra memory for the balloon driver.
++	 */
++	reserve_early(nrpages * PAGE_SIZE, mem_max, "XEN BALLOON");
++
+ 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ 
+ 	return "Xen";
+@@ -188,7 +236,5 @@ void __init xen_arch_setup(void)
  
  	pm_idle = xen_idle;
  
@@ -4257,7 +4752,7 @@ index 0000000..1cd7f4d
 +	}
 +}
 diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
-index 22494fd..96f7ee0 100644
+index 22494fd..012f6fc 100644
 --- a/arch/x86/xen/xen-ops.h
 +++ b/arch/x86/xen/xen-ops.h
 @@ -29,6 +29,7 @@ void xen_setup_machphys_mapping(void);
@@ -4268,7 +4763,15 @@ index 22494fd..96f7ee0 100644
  
  void xen_post_allocator_init(void);
  
-@@ -82,6 +83,23 @@ static inline void xen_uninit_lock_cpu(int cpu)
+@@ -40,6 +41,7 @@ void xen_enable_syscall(void);
+ void xen_vcpu_restore(void);
+ 
+ void __init xen_build_dynamic_phys_to_machine(void);
++void __init xen_fix_mfn_list(void);
+ 
+ void xen_init_irq_ops(void);
+ void xen_setup_timer(int cpu);
+@@ -82,6 +84,23 @@ static inline void xen_uninit_lock_cpu(int cpu)
  }
  #endif
  
@@ -4293,7 +4796,7 @@ index 22494fd..96f7ee0 100644
     inlineable */
  #define DECL_ASM(ret, name, ...)		\
 diff --git a/block/blk-core.c b/block/blk-core.c
-index 4b45435..48804b0 100644
+index e3299a7..5410d13 100644
 --- a/block/blk-core.c
 +++ b/block/blk-core.c
 @@ -438,6 +438,7 @@ void blk_put_queue(struct request_queue *q)
@@ -4304,7 +4807,7 @@ index 4b45435..48804b0 100644
  
  void blk_cleanup_queue(struct request_queue *q)
  {
-@@ -617,6 +618,7 @@ int blk_get_queue(struct request_queue *q)
+@@ -610,6 +611,7 @@ int blk_get_queue(struct request_queue *q)
  
  	return 1;
  }
@@ -4350,7 +4853,7 @@ index db307a3..c6d845c 100644
  		return_ACPI_STATUS(status);
  	}
 diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
-index 01574a0..c57e7b2 100644
+index 42159a2..e00b83b 100644
 --- a/drivers/acpi/sleep.c
 +++ b/drivers/acpi/sleep.c
 @@ -19,6 +19,8 @@
@@ -4633,10 +5136,10 @@ index eba999f..11071ed 100644
  
  void xen_raw_printk(const char *fmt, ...)
 diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
-index c155bd3..fb433cb 100644
+index 5f6509a..6cd6859 100644
 --- a/drivers/net/Kconfig
 +++ b/drivers/net/Kconfig
-@@ -2756,6 +2756,7 @@ source "drivers/s390/net/Kconfig"
+@@ -2763,6 +2763,7 @@ source "drivers/s390/net/Kconfig"
  config XEN_NETDEV_FRONTEND
  	tristate "Xen network device frontend driver"
  	depends on XEN
@@ -5060,10 +5563,10 @@ index cab100a..3b1c421 100644
 +       depends on XEN_DOM0 && ACPI
 \ No newline at end of file
 diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index ec2a39b..386c775 100644
+index ec2a39b..7ff6d27 100644
 --- a/drivers/xen/Makefile
 +++ b/drivers/xen/Makefile
-@@ -1,9 +1,12 @@
+@@ -1,9 +1,13 @@
 -obj-y	+= grant-table.o features.o events.o manage.o
 +obj-y	+= grant-table.o features.o events.o manage.o biomerge.o
  obj-y	+= xenbus/
@@ -5075,6 +5578,7 @@ index ec2a39b..386c775 100644
 -obj-$(CONFIG_XENFS)		+= xenfs/
 -obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 \ No newline at end of file
++obj-$(CONFIG_PCI)			+= pci.o
 +obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 +obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
 +obj-$(CONFIG_XEN_BALLOON)		+= balloon.o
@@ -5115,10 +5619,18 @@ index 0000000..e6d3d0e
 +	return HYPERVISOR_dom0_op(&op);
 +}
 diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
-index f5bbd9e..bfe1fa3 100644
+index f5bbd9e..168e61f 100644
 --- a/drivers/xen/balloon.c
 +++ b/drivers/xen/balloon.c
-@@ -66,8 +66,6 @@ struct balloon_stats {
+@@ -43,6 +43,7 @@
+ #include <linux/mutex.h>
+ #include <linux/list.h>
+ #include <linux/sysdev.h>
++#include <linux/swap.h>
+ 
+ #include <asm/page.h>
+ #include <asm/pgalloc.h>
+@@ -66,8 +67,6 @@ struct balloon_stats {
  	/* We aim for 'current allocation' == 'target allocation'. */
  	unsigned long current_pages;
  	unsigned long target_pages;
@@ -5127,7 +5639,7 @@ index f5bbd9e..bfe1fa3 100644
  	/*
  	 * Drivers may alter the memory reservation independently, but they
  	 * must inform the balloon driver so we avoid hitting the hard limit.
-@@ -84,13 +82,6 @@ static struct sys_device balloon_sysdev;
+@@ -84,21 +83,11 @@ static struct sys_device balloon_sysdev;
  
  static int register_balloon(struct sys_device *sysdev);
  
@@ -5141,7 +5653,33 @@ index f5bbd9e..bfe1fa3 100644
  static struct balloon_stats balloon_stats;
  
  /* We increase/decrease in batches which fit in a page */
-@@ -185,7 +176,7 @@ static void balloon_alarm(unsigned long unused)
+ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
+ 
+-/* VM /proc information for memory */
+-extern unsigned long totalram_pages;
+-
+ #ifdef CONFIG_HIGHMEM
+ extern unsigned long totalhigh_pages;
+ #define inc_totalhigh_pages() (totalhigh_pages++)
+@@ -140,6 +129,8 @@ static void balloon_append(struct page *page)
+ 		list_add(&page->lru, &ballooned_pages);
+ 		balloon_stats.balloon_low++;
+ 	}
++
++	totalram_pages--;
+ }
+ 
+ /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+@@ -160,6 +151,8 @@ static struct page *balloon_retrieve(void)
+ 	else
+ 		balloon_stats.balloon_low--;
+ 
++	totalram_pages++;
++
+ 	return page;
+ }
+ 
+@@ -185,7 +178,7 @@ static void balloon_alarm(unsigned long unused)
  
  static unsigned long current_target(void)
  {
@@ -5150,7 +5688,7 @@ index f5bbd9e..bfe1fa3 100644
  
  	target = min(target,
  		     balloon_stats.current_pages +
-@@ -209,7 +200,7 @@ static int increase_reservation(unsigned long nr_pages)
+@@ -209,7 +202,7 @@ static int increase_reservation(unsigned long nr_pages)
  	if (nr_pages > ARRAY_SIZE(frame_list))
  		nr_pages = ARRAY_SIZE(frame_list);
  
@@ -5159,7 +5697,7 @@ index f5bbd9e..bfe1fa3 100644
  
  	page = balloon_first_page();
  	for (i = 0; i < nr_pages; i++) {
-@@ -221,23 +212,10 @@ static int increase_reservation(unsigned long nr_pages)
+@@ -221,23 +214,10 @@ static int increase_reservation(unsigned long nr_pages)
  	set_xen_guest_handle(reservation.extent_start, frame_list);
  	reservation.nr_extents = nr_pages;
  	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
@@ -5185,13 +5723,13 @@ index f5bbd9e..bfe1fa3 100644
  		page = balloon_retrieve();
  		BUG_ON(page == NULL);
  
-@@ -263,13 +241,13 @@ static int increase_reservation(unsigned long nr_pages)
+@@ -263,13 +243,12 @@ static int increase_reservation(unsigned long nr_pages)
  		__free_page(page);
  	}
  
 -	balloon_stats.current_pages += nr_pages;
+-	totalram_pages = balloon_stats.current_pages;
 +	balloon_stats.current_pages += rc;
- 	totalram_pages = balloon_stats.current_pages;
  
   out:
 -	spin_unlock_irqrestore(&balloon_lock, flags);
@@ -5202,7 +5740,7 @@ index f5bbd9e..bfe1fa3 100644
  }
  
  static int decrease_reservation(unsigned long nr_pages)
-@@ -312,7 +290,7 @@ static int decrease_reservation(unsigned long nr_pages)
+@@ -312,7 +291,7 @@ static int decrease_reservation(unsigned long nr_pages)
  	kmap_flush_unused();
  	flush_tlb_all();
  
@@ -5211,9 +5749,11 @@ index f5bbd9e..bfe1fa3 100644
  
  	/* No more mappings: invalidate P2M and add to balloon. */
  	for (i = 0; i < nr_pages; i++) {
-@@ -329,7 +307,7 @@ static int decrease_reservation(unsigned long nr_pages)
+@@ -327,9 +306,8 @@ static int decrease_reservation(unsigned long nr_pages)
+ 	BUG_ON(ret != nr_pages);
+ 
  	balloon_stats.current_pages -= nr_pages;
- 	totalram_pages = balloon_stats.current_pages;
+-	totalram_pages = balloon_stats.current_pages;
  
 -	spin_unlock_irqrestore(&balloon_lock, flags);
 +	spin_unlock_irqrestore(&xen_reservation_lock, flags);
@@ -5228,7 +5768,12 @@ index f5bbd9e..bfe1fa3 100644
  	balloon_stats.target_pages = target;
  	schedule_work(&balloon_worker);
  }
-@@ -431,7 +408,6 @@ static int __init balloon_init(void)
+@@ -426,12 +403,10 @@ static int __init balloon_init(void)
+ 	pr_info("xen_balloon: Initialising balloon driver.\n");
+ 
+ 	balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+-	totalram_pages   = balloon_stats.current_pages;
+ 	balloon_stats.target_pages  = balloon_stats.current_pages;
  	balloon_stats.balloon_low   = 0;
  	balloon_stats.balloon_high  = 0;
  	balloon_stats.driver_pages  = 0UL;
@@ -5236,7 +5781,17 @@ index f5bbd9e..bfe1fa3 100644
  
  	init_timer(&balloon_timer);
  	balloon_timer.data = 0;
-@@ -464,6 +440,101 @@ static void balloon_exit(void)
+@@ -442,8 +417,7 @@ static int __init balloon_init(void)
+ 	/* Initialise the balloon with excess memory space. */
+ 	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+ 		page = pfn_to_page(pfn);
+-		if (!PageReserved(page))
+-			balloon_append(page);
++		balloon_append(page);
+ 	}
+ 
+ 	target_watch.callback = watch_target;
+@@ -464,6 +438,101 @@ static void balloon_exit(void)
  
  module_exit(balloon_exit);
  
@@ -5338,7 +5893,7 @@ index f5bbd9e..bfe1fa3 100644
  #define BALLOON_SHOW(name, format, args...)				\
  	static ssize_t show_##name(struct sys_device *dev,		\
  				   struct sysdev_attribute *attr,	\
-@@ -476,9 +547,6 @@ module_exit(balloon_exit);
+@@ -476,9 +545,6 @@ module_exit(balloon_exit);
  BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
  BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
  BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
@@ -5348,7 +5903,7 @@ index f5bbd9e..bfe1fa3 100644
  BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
  
  static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
-@@ -548,7 +616,6 @@ static struct attribute *balloon_info_attrs[] = {
+@@ -548,7 +614,6 @@ static struct attribute *balloon_info_attrs[] = {
  	&attr_current_kb.attr,
  	&attr_low_kb.attr,
  	&attr_high_kb.attr,
@@ -10210,6 +10765,128 @@ index 0000000..a492288
 +	printk(KERN_CRIT "registering netback\n");
 +	(void)xenbus_register_backend(&netback);
 +}
+diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
+new file mode 100644
+index 0000000..b13e054
+--- /dev/null
++++ b/drivers/xen/pci.c
+@@ -0,0 +1,116 @@
++/*
++ * Copyright (c) 2009, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++ * Place - Suite 330, Boston, MA 02111-1307 USA.
++ *
++ * Author: Weidong Han <weidong.han at intel.com>
++ */
++
++#include <linux/pci.h>
++#include <xen/interface/physdev.h>
++#include <asm/xen/hypercall.h>
++#include "../pci/pci.h"
++
++
++#ifdef CONFIG_PCI_IOV
++#define HANDLE_PCI_IOV	1
++#else
++#define HANDLE_PCI_IOV	0
++#endif
++
++static int xen_add_device(struct device *dev)
++{
++	int r;
++	struct pci_dev *pci_dev = to_pci_dev(dev);
++
++	if (HANDLE_PCI_IOV && pci_dev->is_virtfn) {
++		struct physdev_manage_pci_ext manage_pci_ext = {
++			.bus		= pci_dev->bus->number,
++			.devfn		= pci_dev->devfn,
++			.is_virtfn 	= 1,
++#ifdef CONFIG_PCI_IOV
++			.physfn.bus	= pci_dev->physfn->bus->number,
++			.physfn.devfn	= pci_dev->physfn->devfn,
++#endif
++		};
++
++		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
++			&manage_pci_ext);
++	} else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
++		struct physdev_manage_pci_ext manage_pci_ext = {
++			.bus		= pci_dev->bus->number,
++			.devfn		= pci_dev->devfn,
++			.is_extfn	= 1,
++		};
++
++		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
++			&manage_pci_ext);
++	} else {
++		struct physdev_manage_pci manage_pci = {
++			.bus 	= pci_dev->bus->number,
++			.devfn	= pci_dev->devfn,
++		};
++
++		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add,
++			&manage_pci);
++	}
++
++	return r;
++}
++
++static int xen_remove_device(struct device *dev)
++{
++	int r;
++	struct pci_dev *pci_dev = to_pci_dev(dev);
++	struct physdev_manage_pci manage_pci;
++
++	manage_pci.bus = pci_dev->bus->number;
++	manage_pci.devfn = pci_dev->devfn;
++
++	r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
++		&manage_pci);
++
++	return r;
++}
++
++static int xen_pci_notifier(struct notifier_block *nb,
++			    unsigned long action, void *data)
++{
++	struct device *dev = data;
++	int r = 0;
++
++	switch (action) {
++	case BUS_NOTIFY_ADD_DEVICE:
++		r = xen_add_device(dev);
++		break;
++	case BUS_NOTIFY_DEL_DEVICE:
++		r = xen_remove_device(dev);
++		break;
++	default:
++		break;
++	}
++
++	return r;
++}
++
++struct notifier_block device_nb = {
++	.notifier_call = xen_pci_notifier,
++};
++
++static int __init register_xen_pci_notifier(void)
++{
++	return bus_register_notifier(&pci_bus_type, &device_nb);
++}
++
++fs_initcall(register_xen_pci_notifier);
 diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
 index 5571f5b..8dca685 100644
 --- a/drivers/xen/xenbus/Makefile
@@ -12121,10 +12798,10 @@ index b4326b5..bad75d1 100644
  #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
  static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
-index 2721f07..99264c3 100644
+index 35e7df1..c7277ce 100644
 --- a/include/linux/interrupt.h
 +++ b/include/linux/interrupt.h
-@@ -58,6 +58,7 @@
+@@ -59,6 +59,7 @@
  #define IRQF_PERCPU		0x00000400
  #define IRQF_NOBALANCING	0x00000800
  #define IRQF_IRQPOLL		0x00001000
@@ -12645,6 +13322,38 @@ index af36ead..eac3ce1 100644
 + */
 +extern spinlock_t xen_reservation_lock;
  #endif /* __XEN_PUBLIC_MEMORY_H__ */
+diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
+index cd69391..7a7d007 100644
+--- a/include/xen/interface/physdev.h
++++ b/include/xen/interface/physdev.h
+@@ -106,6 +106,27 @@ struct physdev_irq {
+ 	uint32_t vector;
+ };
+ 
++#define PHYSDEVOP_manage_pci_add	15
++#define PHYSDEVOP_manage_pci_remove	16
++struct physdev_manage_pci {
++	/* IN */
++	uint8_t bus;
++	uint8_t devfn;
++};
++
++#define PHYSDEVOP_manage_pci_add_ext	20
++struct physdev_manage_pci_ext {
++	/* IN */
++	uint8_t bus;
++	uint8_t devfn;
++	unsigned is_extfn;
++	unsigned is_virtfn;
++	struct {
++		uint8_t bus;
++		uint8_t devfn;
++	} physfn;
++};
++
+ /*
+  * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
+  * hypercall since 0x00030202.
 diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
 new file mode 100644
 index 0000000..83e4714
@@ -13085,10 +13794,10 @@ index b9763ba..542ca7c 100644
  	int (*read_otherend_details)(struct xenbus_device *dev);
  	int (*is_ready)(struct xenbus_device *dev);
 diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 50da676..3dc4e74 100644
+index 61c679d..c077438 100644
 --- a/kernel/irq/manage.c
 +++ b/kernel/irq/manage.c
-@@ -192,7 +192,8 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
+@@ -200,7 +200,8 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
  void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
  {
  	if (suspend) {
@@ -13127,7 +13836,7 @@ index bffe6d7..cec5f62 100644
  		panic("Cannot allocate SWIOTLB overflow buffer!\n");
  
 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index caa9268..1f29611 100644
+index d052abb..087cbf5 100644
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
 @@ -566,6 +566,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
@@ -13144,7 +13853,7 @@ index caa9268..1f29611 100644
  	if (!PageHighMem(page)) {
  		debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order);
  		debug_check_no_obj_freed(page_address(page),
-@@ -1025,6 +1032,13 @@ static void free_hot_cold_page(struct page *page, int cold)
+@@ -1028,6 +1035,13 @@ static void free_hot_cold_page(struct page *page, int cold)
  
  	kmemcheck_free_shadow(page, 0);
  

xen.pvops.post.patch:
 arch/x86/kernel/cpu/Makefile                 |    2 ++
 b/arch/x86/include/asm/paravirt_types.h      |    3 +++
 b/arch/x86/kernel/process_32.c               |    2 ++
 b/arch/x86/pci/common.c                      |   16 ++++++++++++++++
 drivers/pci/pci.h                            |    2 ++
 linux-2.6.29.x86_64/arch/x86/xen/Makefile    |    4 ++++
 linux-2.6.29.x86_64/arch/x86/xen/enlighten.c |    2 +-
 linux-2.6.29.x86_64/drivers/xen/Makefile     |    3 +++
 8 files changed, 33 insertions(+), 1 deletion(-)

Index: xen.pvops.post.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/Attic/xen.pvops.post.patch,v
retrieving revision 1.1.2.18
retrieving revision 1.1.2.19
diff -u -p -r1.1.2.18 -r1.1.2.19
--- xen.pvops.post.patch	27 Jun 2009 11:05:24 -0000	1.1.2.18
+++ xen.pvops.post.patch	8 Aug 2009 18:06:06 -0000	1.1.2.19
@@ -49,7 +49,6 @@ index 2202b62..f371fe8 100644
  		pci_cache_line_size = 64 >> 2;  /* K7 & K8 */
  	else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
  		pci_cache_line_size = 128 >> 2;	/* P4 */
-+
 +	if (c->x86_clflush_size != (pci_cache_line_size <<2))
 +		printk(KERN_DEBUG "PCI: old code would have set cacheline "
 +			"size to %d bytes, but clflush_size = %d\n",
@@ -60,11 +59,12 @@ index 2202b62..f371fe8 100644
 +	if (c->x86_clflush_size > 0) {
 +		pci_cache_line_size = c->x86_clflush_size >> 2;
 +		printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n",
-+			pci_cache_line_size >> 2);
++			pci_cache_line_size << 2);
 +	} else {
 +		pci_cache_line_size = 32 >> 2;
 +		printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n");
 +	}
++
  }
  
  int __init pcibios_init(void)

xen.pvops.pre.patch:
 b/arch/x86/include/asm/paravirt.h |    3 ---
 b/arch/x86/kernel/process_32.c    |    3 +--
 b/arch/x86/pci/common.c           |   16 ----------------
 drivers/pci/pci.h                 |    2 --
 4 files changed, 1 insertion(+), 23 deletions(-)

Index: xen.pvops.pre.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/Attic/xen.pvops.pre.patch,v
retrieving revision 1.1.2.14
retrieving revision 1.1.2.15
diff -u -p -r1.1.2.14 -r1.1.2.15
--- xen.pvops.pre.patch	27 Jun 2009 11:05:24 -0000	1.1.2.14
+++ xen.pvops.pre.patch	8 Aug 2009 18:06:06 -0000	1.1.2.15
@@ -2,7 +2,6 @@ temporarily revert various Fedora change
 Affected patches;
 linux-2.6-defaults-pci_no_msi.patch - drivers/pci/pci.h
 linux-2.6-execshield.patch - arch/x86/include/asm/paravirt.h arch/x86/kernel/process_32.c
-linux-2.6-pci-cacheline-sizing.patch - arch/x86/pci/common.c
 
 --- a/drivers/pci/pci.h	2009-04-24 20:46:50.000000000 +0100
 +++ b/drivers/pci/pci.h	2009-04-23 20:13:43.000000000 +0100
@@ -64,7 +63,7 @@ index 2202b62..f371fe8 100644
 -	if (c->x86_clflush_size > 0) {
 -		pci_cache_line_size = c->x86_clflush_size >> 2;
 -		printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n",
--			pci_cache_line_size >> 2);
+-			pci_cache_line_size << 2);
 -	} else {
 -		pci_cache_line_size = 32 >> 2;
 -		printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n");




More information about the fedora-extras-commits mailing list