rpms/kernel/devel linux-2.6-ksm-fix-munlock.patch, NONE, 1.1 kernel.spec, 1.1741, 1.1742 linux-2.6-ksm-kvm.patch, 1.2, 1.3 linux-2.6-ksm.patch, 1.2, 1.3

Justin M. Forbes jforbes at fedoraproject.org
Wed Aug 26 19:25:58 UTC 2009


Author: jforbes

Update of /cvs/pkgs/rpms/kernel/devel
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv24574

Modified Files:
	kernel.spec linux-2.6-ksm-kvm.patch linux-2.6-ksm.patch 
Added Files:
	linux-2.6-ksm-fix-munlock.patch 
Log Message:
Fix munlock with KSM (#516909) and re-enable KSM

linux-2.6-ksm-fix-munlock.patch:
 include/linux/ksm.h |   11 ++++-------
 kernel/fork.c       |    1 +
 mm/ksm.c            |    5 +----
 mm/memory.c         |    4 ++--
 mm/mmap.c           |    7 -------
 5 files changed, 8 insertions(+), 20 deletions(-)

--- NEW FILE linux-2.6-ksm-fix-munlock.patch ---
From: Andrea Arcangeli <aarcange at redhat.com>

Allowing page faults triggered by drivers tracking the mm during
exit_mmap with mm_users already zero is asking for troubles. And we
can't stop page faults from happening during exit_mmap or munlock fails
(munlock also better stop triggering page faults with mm_users zero).

ksm_exit if there are rmap_items still chained on this mm slot, will
take mmap_sem write side so preventing ksm to keep working on a mm while
exit_mmap runs. And ksm will bail out as soon as it notices that
mm_users is already zero thanks to the ksm_test_exit checks. So that
when a task is killed by OOM killer or the user, ksm will not
indefinitely prevent it to run exit_mmap and release its memory. 

Signed-off-by: Andrea Arcangeli <aarcange at redhat.com>
---

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 2d64ff3..0e26de6 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -18,8 +18,7 @@ struct mmu_gather;
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
 int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm,
-		struct mmu_gather **tlbp, unsigned long end);
+void __ksm_exit(struct mm_struct *mm);
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
@@ -41,11 +40,10 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return atomic_read(&mm->mm_users) == 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm,
-			    struct mmu_gather **tlbp, unsigned long end)
+static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm, tlbp, end);
+		__ksm_exit(mm);
 }
 
 /*
@@ -86,8 +84,7 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm,
-			    struct mmu_gather **tlbp, unsigned long end)
+static inline void ksm_exit(struct mm_struct *mm)
 {
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 9a16c21..6f93809 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -515,6 +515,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
+		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/ksm.c b/mm/ksm.c
index d03627f..329ebe9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1428,8 +1428,7 @@ int __ksm_enter(struct mm_struct *mm)
 	return 0;
 }
 
-void __ksm_exit(struct mm_struct *mm,
-		struct mmu_gather **tlbp, unsigned long end)
+void __ksm_exit(struct mm_struct *mm)
 {
 	struct mm_slot *mm_slot;
 	int easy_to_free = 0;
@@ -1462,10 +1461,8 @@ void __ksm_exit(struct mm_struct *mm,
 		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
 		mmdrop(mm);
 	} else if (mm_slot) {
-		tlb_finish_mmu(*tlbp, 0, end);
 		down_write(&mm->mmap_sem);
 		up_write(&mm->mmap_sem);
-		*tlbp = tlb_gather_mmu(mm, 1);
 	}
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 4a2c60d..025431e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2603,7 +2603,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table) || ksm_test_exit(mm))
+	if (!pte_none(*page_table))
 		goto release;
 
 	inc_mm_counter(mm, anon_rss);
@@ -2753,7 +2753,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (likely(pte_same(*page_table, orig_pte) && !ksm_test_exit(mm))) {
+	if (likely(pte_same(*page_table, orig_pte))) {
 		flush_icache_page(vma, page);
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
diff --git a/mm/mmap.c b/mm/mmap.c
index 1b0a709..f3f2a22 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2111,13 +2111,6 @@ void exit_mmap(struct mm_struct *mm)
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
 
-	/*
-	 * For KSM to handle OOM without deadlock when it's breaking COW in a
-	 * likely victim of the OOM killer, we must serialize with ksm_exit()
-	 * after freeing mm's pages but before freeing its page tables.
-	 */
-	ksm_exit(mm, &tlb, end);
-
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 


Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/kernel.spec,v
retrieving revision 1.1741
retrieving revision 1.1742
diff -u -p -r1.1741 -r1.1742
--- kernel.spec	26 Aug 2009 15:50:19 -0000	1.1741
+++ kernel.spec	26 Aug 2009 19:25:56 -0000	1.1742
@@ -678,6 +678,7 @@ Patch1518: hid-ignore-all-recent-imon-de
 Patch1550: linux-2.6-ksm.patch
 Patch1551: linux-2.6-ksm-kvm.patch
 Patch1552: linux-2.6-ksm-updates.patch
+Patch1553: linux-2.6-ksm-fix-munlock.patch
 
 # nouveau + drm fixes
 Patch1812: drm-next.patch
@@ -1298,10 +1299,11 @@ ApplyPatch hdpvr-ir-enable.patch
 ApplyPatch hid-ignore-all-recent-imon-devices.patch
 
 # Add kernel KSM support
-#ApplyPatch linux-2.6-ksm.patch
-#ApplyPatch linux-2.6-ksm-updates.patch
+ApplyPatch linux-2.6-ksm.patch
+ApplyPatch linux-2.6-ksm-updates.patch
+ApplyPatch linux-2.6-ksm-fix-munlock.patch
 # Optimize KVM for KSM support
-#ApplyPatch linux-2.6-ksm-kvm.patch
+ApplyPatch linux-2.6-ksm-kvm.patch
 
 # Fix block I/O errors in KVM
 ApplyPatch linux-2.6-block-silently-error-unsupported-empty-barriers-too.patch
@@ -2011,6 +2013,10 @@ fi
 # and build.
 
 %changelog
+* Wed Aug 26 2009 Justin M. Forbes <jforbes at redhat.com>
+- Fix munlock with KSM (#516909)
+- Re-enable KSM
+
 * Wed Aug 26 2009 Chuck Ebbert <cebbert at redhat.com>
 - 2.6.31-rc7-git4
 - Drop patches merged upstream:

linux-2.6-ksm-kvm.patch:
 arch/x86/include/asm/kvm_host.h |    1 
 arch/x86/kvm/mmu.c              |   91 ++++++++++++++++++++++++++++++++--------
 arch/x86/kvm/paging_tmpl.h      |   15 +++++-
 virt/kvm/kvm_main.c             |   14 ++++++
 4 files changed, 101 insertions(+), 20 deletions(-)

Index: linux-2.6-ksm-kvm.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/linux-2.6-ksm-kvm.patch,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -p -r1.2 -r1.3
--- linux-2.6-ksm-kvm.patch	23 Jul 2009 20:42:08 -0000	1.2
+++ linux-2.6-ksm-kvm.patch	26 Aug 2009 19:25:56 -0000	1.3
@@ -22,8 +22,8 @@ directly map pages into its shadow page 
 Signed-off-by: Izik Eidus <ieidus at redhat.com>
 Signed-off-by: Justin M. Forbes <jforbes at redhat.com>
 ---
---- linux-2.6.30.x86_64/arch/x86/include/asm/kvm_host.h	2009-07-23 14:58:56.000000000 -0500
-+++ linux-2.6.30.x86_64-ksm/arch/x86/include/asm/kvm_host.h	2009-07-23 15:00:04.000000000 -0500
+--- linux-2.6.30.x86_64/arch/x86/include/asm/kvm_host.h	2009-08-20 10:37:37.784886414 -0500
++++ linux-2.6.30.x86_64.kvm/arch/x86/include/asm/kvm_host.h	2009-08-20 10:39:33.742641558 -0500
 @@ -796,5 +796,6 @@ asmlinkage void kvm_handle_fault_on_rebo
  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
  int kvm_age_hva(struct kvm *kvm, unsigned long hva);
@@ -31,8 +31,8 @@ Signed-off-by: Justin M. Forbes <jforbes
 +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
  
  #endif /* _ASM_X86_KVM_HOST_H */
---- linux-2.6.30.x86_64/arch/x86/kvm/mmu.c	2009-07-23 14:58:56.000000000 -0500
-+++ linux-2.6.30.x86_64-ksm/arch/x86/kvm/mmu.c	2009-07-23 15:00:04.000000000 -0500
+--- linux-2.6.30.x86_64/arch/x86/kvm/mmu.c	2009-08-20 10:37:37.964887039 -0500
++++ linux-2.6.30.x86_64.kvm/arch/x86/kvm/mmu.c	2009-08-20 10:41:15.231638028 -0500
 @@ -139,6 +139,8 @@ module_param(oos_shadow, bool, 0644);
  #define ACC_USER_MASK    PT_USER_MASK
  #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
@@ -54,7 +54,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  static gfn_t pse36_gfn_delta(u32 gpte)
  {
  	int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
-@@ -566,9 +573,7 @@ static void rmap_remove(struct kvm *kvm,
+@@ -573,9 +580,7 @@ static void rmap_remove(struct kvm *kvm,
  	if (*spte & shadow_accessed_mask)
  		kvm_set_pfn_accessed(pfn);
  	if (is_writeble_pte(*spte))
@@ -65,7 +65,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
  	if (!*rmapp) {
  		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
-@@ -677,7 +682,8 @@ static int rmap_write_protect(struct kvm
+@@ -684,7 +689,8 @@ static int rmap_write_protect(struct kvm
  	return write_protected;
  }
  
@@ -75,7 +75,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  {
  	u64 *spte;
  	int need_tlb_flush = 0;
-@@ -692,8 +698,48 @@ static int kvm_unmap_rmapp(struct kvm *k
+@@ -699,8 +705,48 @@ static int kvm_unmap_rmapp(struct kvm *k
  	return need_tlb_flush;
  }
  
@@ -125,7 +125,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  {
  	int i;
  	int retval = 0;
-@@ -714,11 +760,13 @@ static int kvm_handle_hva(struct kvm *kv
+@@ -721,11 +767,13 @@ static int kvm_handle_hva(struct kvm *kv
  		end = start + (memslot->npages << PAGE_SHIFT);
  		if (hva >= start && hva < end) {
  			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
@@ -141,7 +141,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  		}
  	}
  
-@@ -727,10 +775,16 @@ static int kvm_handle_hva(struct kvm *kv
+@@ -734,10 +782,16 @@ static int kvm_handle_hva(struct kvm *kv
  
  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
  {
@@ -160,7 +160,14 @@ Signed-off-by: Justin M. Forbes <jforbes
  {
  	u64 *spte;
  	int young = 0;
-@@ -756,7 +810,7 @@ static int kvm_age_rmapp(struct kvm *kvm
+@@ -770,13 +824,13 @@ static void rmap_recycle(struct kvm_vcpu
+ 	gfn = unalias_gfn(vcpu->kvm, gfn);
+ 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
+ 
+-	kvm_unmap_rmapp(vcpu->kvm, rmapp);
++	kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
+ 	kvm_flush_remote_tlbs(vcpu->kvm);
+ }
  
  int kvm_age_hva(struct kvm *kvm, unsigned long hva)
  {
@@ -169,7 +176,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  }
  
  #ifdef MMU_DEBUG
-@@ -1665,7 +1719,7 @@ static int set_spte(struct kvm_vcpu *vcp
+@@ -1686,7 +1740,7 @@ static int set_spte(struct kvm_vcpu *vcp
  		    unsigned pte_access, int user_fault,
  		    int write_fault, int dirty, int largepage,
  		    gfn_t gfn, pfn_t pfn, bool speculative,
@@ -178,7 +185,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  {
  	u64 spte;
  	int ret = 0;
-@@ -1723,6 +1777,8 @@ static int set_spte(struct kvm_vcpu *vcp
+@@ -1744,6 +1798,8 @@ static int set_spte(struct kvm_vcpu *vcp
  				spte &= ~PT_WRITABLE_MASK;
  		}
  	}
@@ -187,7 +194,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  
  	if (pte_access & ACC_WRITE_MASK)
  		mark_page_dirty(vcpu->kvm, gfn);
-@@ -1736,7 +1792,8 @@ static void mmu_set_spte(struct kvm_vcpu
+@@ -1757,7 +1813,8 @@ static void mmu_set_spte(struct kvm_vcpu
  			 unsigned pt_access, unsigned pte_access,
  			 int user_fault, int write_fault, int dirty,
  			 int *ptwrite, int largepage, gfn_t gfn,
@@ -197,7 +204,7 @@ Signed-off-by: Justin M. Forbes <jforbes
  {
  	int was_rmapped = 0;
  	int was_writeble = is_writeble_pte(*shadow_pte);
-@@ -1765,7 +1822,8 @@ static void mmu_set_spte(struct kvm_vcpu
+@@ -1787,7 +1844,8 @@ static void mmu_set_spte(struct kvm_vcpu
  			was_rmapped = 1;
  	}
  	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
@@ -207,17 +214,17 @@ Signed-off-by: Justin M. Forbes <jforbes
  		if (write_fault)
  			*ptwrite = 1;
  		kvm_x86_ops->tlb_flush(vcpu);
-@@ -1782,8 +1840,7 @@ static void mmu_set_spte(struct kvm_vcpu
+@@ -1804,8 +1862,7 @@ static void mmu_set_spte(struct kvm_vcpu
  	page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
  	if (!was_rmapped) {
- 		rmap_add(vcpu, shadow_pte, gfn, largepage);
+ 		rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
 -		if (!is_rmap_pte(*shadow_pte))
 -			kvm_release_pfn_clean(pfn);
 +		kvm_release_pfn_clean(pfn);
+ 		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+ 			rmap_recycle(vcpu, gfn, largepage);
  	} else {
- 		if (was_writeble)
- 			kvm_release_pfn_dirty(pfn);
-@@ -1813,7 +1870,7 @@ static int __direct_map(struct kvm_vcpu 
+@@ -1837,7 +1894,7 @@ static int __direct_map(struct kvm_vcpu 
  		    || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
  			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
  				     0, write, 1, &pt_write,
@@ -226,8 +233,8 @@ Signed-off-by: Justin M. Forbes <jforbes
  			++vcpu->stat.pf_fixed;
  			break;
  		}
---- linux-2.6.30.x86_64/arch/x86/kvm/paging_tmpl.h	2009-07-23 14:58:56.000000000 -0500
-+++ linux-2.6.30.x86_64-ksm/arch/x86/kvm/paging_tmpl.h	2009-07-23 15:01:49.000000000 -0500
+--- linux-2.6.30.x86_64/arch/x86/kvm/paging_tmpl.h	2009-08-20 10:37:37.966889166 -0500
++++ linux-2.6.30.x86_64.kvm/arch/x86/kvm/paging_tmpl.h	2009-08-20 10:39:33.747636180 -0500
 @@ -266,9 +266,13 @@ static void FNAME(update_pte)(struct kvm
  	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
  		return;
@@ -275,8 +282,8 @@ Signed-off-by: Justin M. Forbes <jforbes
  	}
  
  	return !nr_present;
---- linux-2.6.30.x86_64/virt/kvm/kvm_main.c	2009-07-23 14:58:58.000000000 -0500
-+++ linux-2.6.30.x86_64-ksm/virt/kvm/kvm_main.c	2009-07-23 15:00:04.000000000 -0500
+--- linux-2.6.30.x86_64/virt/kvm/kvm_main.c	2009-08-20 10:37:45.448886340 -0500
++++ linux-2.6.30.x86_64.kvm/virt/kvm/kvm_main.c	2009-08-20 10:39:33.749636212 -0500
 @@ -859,6 +859,19 @@ static void kvm_mmu_notifier_invalidate_
  
  }

linux-2.6-ksm.patch:
 b/arch/alpha/include/asm/mman.h     |    3 
 b/arch/mips/include/asm/mman.h      |    3 
 b/arch/parisc/include/asm/mman.h    |    3 
 b/arch/xtensa/include/asm/mman.h    |    3 
 b/fs/proc/page.c                    |    5 
 b/include/asm-generic/mman-common.h |    5 
 b/include/linux/ksm.h               |   50 +
 b/include/linux/mm.h                |    1 
 b/include/linux/mmu_notifier.h      |   34 
 b/include/linux/rmap.h              |    6 
 b/include/linux/sched.h             |    7 
 b/kernel/fork.c                     |    8 
 b/mm/Kconfig                        |   11 
 b/mm/Makefile                       |    1 
 b/mm/ksm.c                          |   56 +
 b/mm/madvise.c                      |   41 
 b/mm/memory.c                       |    9 
 b/mm/mmu_notifier.c                 |   22 
 b/mm/mremap.c                       |   14 
 b/mm/rmap.c                         |   23 
 include/linux/ksm.h                 |   29 
 mm/ksm.c                            | 1506 +++++++++++++++++++++++++++++++++++-
 mm/madvise.c                        |   16 
 mm/memory.c                         |    7 
 24 files changed, 1780 insertions(+), 83 deletions(-)

Index: linux-2.6-ksm.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/linux-2.6-ksm.patch,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -p -r1.2 -r1.3
--- linux-2.6-ksm.patch	31 Jul 2009 01:00:04 -0000	1.2
+++ linux-2.6-ksm.patch	26 Aug 2009 19:25:57 -0000	1.3
@@ -541,14 +541,14 @@ index bd29592..ac312a4 100644
  		struct file *file;
 @@ -425,7 +429,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
  	atomic_set(&mm->mm_count, 1);
- 	init_rwsem(&mm->mmap_sem);
+	init_rwsem(&mm->mmap_sem);
  	INIT_LIST_HEAD(&mm->mmlist);
 -	mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
 +	mm->flags = (current->mm) ?
 +		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
- 	mm->oom_adj = (current->mm) ? current->mm->oom_adj : 0;
  	mm->core_state = NULL;
  	mm->nr_ptes = 0;
+	set_mm_counter(mm, file_rss, 0);
 @@ -486,6 +491,7 @@ void mmput(struct mm_struct *mm)
  
  	if (atomic_dec_and_test(&mm->mm_users)) {




More information about the fedora-extras-commits mailing list