rpms/kernel/devel linux-2.6-kvm-pvmmu-do-not-batch-pte-updates-from-interrupt-context.patch, NONE, 1.1 linux-2.6-kvm-vmx-check-cpl-before-emulating-debug-register-access.patch, NONE, 1.1 linux-2.6-use-__pa_symbol-to-calculate-address-of-C-symbol.patch, NONE, 1.1 linux-2.6-xen-stack-protector-fix.patch, NONE, 1.1 kernel.spec, 1.1762, 1.1763

Thu Sep 3 14:14:22 UTC 2009

Author: jforbes

Update of /cvs/pkgs/rpms/kernel/devel
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv30161

Modified Files:
	kernel.spec 
Added Files:
	linux-2.6-kvm-pvmmu-do-not-batch-pte-updates-from-interrupt-context.patch 
	linux-2.6-kvm-vmx-check-cpl-before-emulating-debug-register-access.patch 
	linux-2.6-use-__pa_symbol-to-calculate-address-of-C-symbol.patch 
	linux-2.6-xen-stack-protector-fix.patch 
Log Message:
Fix xen guest boot with stack protector and small kvm fixes

linux-2.6-kvm-pvmmu-do-not-batch-pte-updates-from-interrupt-context.patch:
 kvm.c |    7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

--- NEW FILE linux-2.6-kvm-pvmmu-do-not-batch-pte-updates-from-interrupt-context.patch ---
Date:  Tue, 25 Aug 2009 01:13:10 -0300
From: Marcelo Tosatti <mtosatti at redhat.com>
To: Avi Kivity <avi at redhat.com>
Cc: kvm <kvm at vger.kernel.org>, Jeremy Fitzhardinge <jeremy at goop.org>
Subject: KVM pvmmu: do not batch pte updates from interrupt context


Commit b8bcfe997e4 made paravirt pte updates synchronous in interrupt
context.

Unfortunately the KVM pv mmu code caches the lazy/nonlazy mode
internally, so a pte update from interrupt context during a lazy mmu
operation can be batched while it should be performed synchronously.

https://bugzilla.redhat.com/show_bug.cgi?id=518022

Drop the internal mode variable and use paravirt_get_lazy_mode(), which
returns the correct state.

Signed-off-by: Marcelo Tosatti <mtosatti at redhat.com>

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c664d51..63b0ec8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,7 +34,6 @@
 struct kvm_para_state {
 	u8 mmu_queue[MMU_QUEUE_SIZE];
 	int mmu_queue_len;
-	enum paravirt_lazy_mode mode;
 };
 
 static DEFINE_PER_CPU(struct kvm_para_state, para_state);
@@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
 {
 	struct kvm_para_state *state = kvm_para_state();
 
-	if (state->mode != PARAVIRT_LAZY_MMU) {
+	if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
 		kvm_mmu_op(buffer, len);
 		return;
 	}
@@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn)
 
 static void kvm_enter_lazy_mmu(void)
 {
-	struct kvm_para_state *state = kvm_para_state();
-
 	paravirt_enter_lazy_mmu();
-	state->mode = paravirt_get_lazy_mode();
 }
 
 static void kvm_leave_lazy_mmu(void)
@@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void)
 
 	mmu_queue_flush(state);
 	paravirt_leave_lazy_mmu();
-	state->mode = paravirt_get_lazy_mode();
 }
 
 static void __init paravirt_ops_setup(void)


linux-2.6-kvm-vmx-check-cpl-before-emulating-debug-register-access.patch:
 include/asm/kvm_host.h |    1 +
 kvm/vmx.c              |    2 ++
 kvm/x86.c              |   14 +++++++++++++-
 3 files changed, 16 insertions(+), 1 deletion(-)

--- NEW FILE linux-2.6-kvm-vmx-check-cpl-before-emulating-debug-register-access.patch ---
Date: Tue,  1 Sep 2009 12:03:25 +0300
From: Avi Kivity <avi at redhat.com>
To: Marcelo Tosatti <mtosatti at redhat.com>
Cc: kvm at vger.kernel.org
Subject: [PATCH] KVM: VMX: Check cpl before emulating debug register access

Debug registers may only be accessed from cpl 0.  Unfortunately, vmx will
code to emulate the instruction even though it was issued from guest
userspace, possibly leading to an unexpected trap later.

Cc: stable at kernel.org
Signed-off-by: Avi Kivity <avi at redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/vmx.c              |    2 ++
 arch/x86/kvm/x86.c              |   13 +++++++++++++
 3 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a3f637f..6046e6f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -621,6 +621,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
 			   u32 error_code);
+bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
 
 int kvm_pic_set_irq(void *opaque, int irq, int level);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 78101dd..05cd554 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2931,6 +2931,8 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 	unsigned long val;
 	int dr, reg;
 
+	if (!kvm_require_cpl(vcpu, 0))
+		return 1;
 	dr = vmcs_readl(GUEST_DR7);
 	if (dr & DR7_GD) {
 		/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b3a169..891234b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -223,6 +223,19 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
 
 /*
+ * Checks if cpl <= required_cpl; if true, return true.  Otherwise queue
+ * a #GP and return false.
+ */
+bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
+{
+	if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
+		return true;
+	kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_cpl);
+
+/*
  * Load the pae pdptrs.  Return true is they are all valid.
  */
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
-- 
1.6.4.1


linux-2.6-use-__pa_symbol-to-calculate-address-of-C-symbol.patch:
 kvmclock.c |    5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

--- NEW FILE linux-2.6-use-__pa_symbol-to-calculate-address-of-C-symbol.patch ---
Date:  Mon, 31 Aug 2009 03:04:31 -0400
From: Glauber Costa <glommer at redhat.com>
To: kvm at vger.kernel.org
Cc: linux-kernel at vger.kernel.org, avi at redhat.com
Subject: [PATCH] use __pa_symbol to calculate the address of a C symbol.

The use of __pa() to calculate the address of a C-visible symbol
is wrong, and can lead to unpredictable results. See arch/x86/include/asm/page.h
for details.

It should be replaced with __pa_symbol(), that does the correct math here,
by taking relocations into account.

Signed-off-by: Glauber Costa <glommer at redhat.com>
---
 arch/x86/kernel/kvmclock.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 223af43..e5efcdc 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -50,8 +50,8 @@ static unsigned long kvm_get_wallclock(void)
 	struct timespec ts;
 	int low, high;
 
-	low = (int)__pa(&wall_clock);
-	high = ((u64)__pa(&wall_clock) >> 32);
+	low = (int)__pa_symbol(&wall_clock);
+	high = ((u64)__pa_symbol(&wall_clock) >> 32);
 	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
 
 	vcpu_time = &get_cpu_var(hv_clock);
-- 
1.6.2.2

linux-2.6-xen-stack-protector-fix.patch:
 arch/x86/mm/Makefile     |    4 +
 arch/x86/xen/Makefile    |    2 
 arch/x86/xen/enlighten.c |  133 +++++++++++++++++++++++++++++++++++++++--------
 arch/x86/xen/smp.c       |    1 
 drivers/xen/Makefile     |    3 +
 5 files changed, 123 insertions(+), 20 deletions(-)

--- NEW FILE linux-2.6-xen-stack-protector-fix.patch ---
>From 27512accbc440c1460b2df6024ed03fc40feff7d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
Date: Thu, 27 Aug 2009 12:46:35 -0700
Subject: [PATCH] xen: make -fstack-protector work under Xen

-fstack-protector uses a special per-cpu "stack canary" value.
gcc generates special code in each function to test the canary to make
sure that the function's stack hasn't been overrun.

On x86-64, this is simply an offset of %gs, which is the usual per-cpu
base segment register, so setting it up simply requires loading %gs's
base as normal.

On i386, the stack protector segment is %gs (rather than the usual kernel
percpu %fs segment register).  This requires setting up the full kernel
GDT and then loading %gs accordingly.  (We also need to make sure %fs is
initialized on secondary cpus too.)

To keep things consistent, we do the full GDT/segment register setup on
both architectures.

Because we need to avoid -fstack-protected code before setting up the GDT
and because there's no way to disable it on a per-function basis, several
files need to have stack-protector inhibited.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index eefdeee..72bb3a2 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,6 +1,10 @@
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
 	    pat.o pgtable.o gup.o
 
+# Make sure __phys_addr has no stackprotector
+nostackp := $(call cc-option, -fno-stack-protector)
+CFLAGS_ioremap.o		:= $(nostackp)
+
 obj-$(CONFIG_SMP)		+= tlb.o
 
 obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 7410640..3bb4fc2 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -8,6 +8,7 @@ endif
 # Make sure early boot has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_enlighten.o		:= $(nostackp)
+CFLAGS_mmu.o			:= $(nostackp)
 
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm.o xen-asm_$(BITS).o \
@@ -16,3 +17,4 @@ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
+
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e5b903b..596a7d7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -51,6 +51,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/reboot.h>
+#include <asm/stackprotector.h>
 
 #include "xen-ops.h"
 #include "mmu.h"
@@ -339,18 +340,28 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 	unsigned long frames[pages];
 	int f;
 
-	/* A GDT can be up to 64k in size, which corresponds to 8192
-	   8-byte entries, or 16 4k pages.. */
+	/* 
+	 * A GDT can be up to 64k in size, which corresponds to 8192
+	 * 8-byte entries, or 16 4k pages..
+	 */
 
 	BUG_ON(size > 65536);
 	BUG_ON(va & ~PAGE_MASK);
 
 	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
 		int level;
-		pte_t *ptep = lookup_address(va, &level);
+		pte_t *ptep;
 		unsigned long pfn, mfn;
 		void *virt;
 
+		/*
+		 * The GDT is per-cpu and is in the percpu data area.
+		 * That can be virtually mapped, so we need to do a
+		 * page-walk to get the underlying MFN for the
+		 * hypercall.  The page can also be in the kernel's
+		 * linear range, so we need to RO that mapping too.
+		 */
+		ptep = lookup_address(va, &level);
 		BUG_ON(ptep == NULL);
 
 		pfn = pte_pfn(*ptep);
@@ -367,6 +378,44 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 		BUG();
 }
 
+/* 
+ * load_gdt for early boot, when the gdt is only mapped once
+ */
+static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+{
+	unsigned long va = dtr->address;
+	unsigned int size = dtr->size + 1;
+	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+	unsigned long frames[pages];
+	int f;
+
+	/* 
+	 * A GDT can be up to 64k in size, which corresponds to 8192
+	 * 8-byte entries, or 16 4k pages..
+	 */
+
+	BUG_ON(size > 65536);
+	BUG_ON(va & ~PAGE_MASK);
+
+	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
+		pte_t pte;
+		unsigned long pfn, mfn;
+
+		pfn = virt_to_pfn(va);
+		mfn = pfn_to_mfn(pfn);
+
+		pte = pfn_pte(pfn, PAGE_KERNEL_RO);
+
+		if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+			BUG();
+
+		frames[f] = mfn;
+	}
+
+	if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+		BUG();
+}
+
 static void load_TLS_descriptor(struct thread_struct *t,
 				unsigned int cpu, unsigned int i)
 {
@@ -590,6 +639,29 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
 	preempt_enable();
 }
 
+/* 
+ * Version of write_gdt_entry for use at early boot-time needed to
+ * update an entry as simply as possible.
+ */
+static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
+					    const void *desc, int type)
+{
+	switch (type) {
+	case DESC_LDT:
+	case DESC_TSS:
+		/* ignore */
+		break;
+
+	default: {
+		xmaddr_t maddr = virt_to_machine(&dt[entry]);
+
+		if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
+			dt[entry] = *(struct desc_struct *)desc;
+	}
+
+	}	
+}
+
 static void xen_load_sp0(struct tss_struct *tss,
 			 struct thread_struct *thread)
 {
@@ -974,6 +1046,23 @@ static const struct machine_ops __initdata xen_machine_ops = {
 	.emergency_restart = xen_emergency_restart,
 };
 
+/* 
+ * Set up the GDT and segment registers for -fstack-protector.  Until
+ * we do this, we have to be careful not to call any stack-protected
+ * function, which is most of the kernel.
+ */
+static void __init xen_setup_stackprotector(void)
+{
+	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
+	pv_cpu_ops.load_gdt = xen_load_gdt_boot;
+
+	setup_stack_canary_segment(0);
+	switch_to_new_gdt(0);
+
+	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;       
+	pv_cpu_ops.load_gdt = xen_load_gdt;
+}
+
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -992,13 +1081,28 @@ asmlinkage void __init xen_start_kernel(void)
 	pv_apic_ops = xen_apic_ops;
 	pv_mmu_ops = xen_mmu_ops;
 
-#ifdef CONFIG_X86_64
-	/*
-	 * Setup percpu state.  We only need to do this for 64-bit
-	 * because 32-bit already has %fs set properly.
+	/* 
+	 * Set up some pagetable state before starting to set any ptes.
 	 */
-	load_percpu_segment(0);
-#endif
+
+	/* Prevent unwanted bits from being set in PTEs. */
+	__supported_pte_mask &= ~_PAGE_GLOBAL;
+	if (!xen_initial_domain())
+		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
+	__supported_pte_mask |= _PAGE_IOMAP;
+
+	xen_setup_features();
+
+	/* Get mfn list */
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		xen_build_dynamic_phys_to_machine();
+
+	/* 
+	 * Set up kernel GDT and segment registers, mainly so that
+	 * -fstack-protector code can be executed.
+	 */
+	xen_setup_stackprotector();
 
 	xen_init_irq_ops();
 	xen_init_cpuid_mask();
@@ -1010,8 +1114,6 @@ asmlinkage void __init xen_start_kernel(void)
 	set_xen_basic_apic_ops();
 #endif
 
-	xen_setup_features();
-
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
 		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
 		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
@@ -1028,17 +1130,8 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_smp_init();
 
-	/* Get mfn list */
-	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		xen_build_dynamic_phys_to_machine();
-
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
-	/* Prevent unwanted bits from being set in PTEs. */
-	__supported_pte_mask &= ~_PAGE_GLOBAL;
-	if (!xen_initial_domain())
-		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
-
 #ifdef CONFIG_X86_64
 	/* Work out if we support NX */
 	check_efer();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 429834e..fe03eee 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -236,6 +236,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	ctxt->user_regs.ss = __KERNEL_DS;
 #ifdef CONFIG_X86_32
 	ctxt->user_regs.fs = __KERNEL_PERCPU;
+	ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
 #else
 	ctxt->gs_base_kernel = per_cpu_offset(cpu);
 #endif
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ec2a39b..7c28434 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,6 +1,9 @@
 obj-y	+= grant-table.o features.o events.o manage.o
 obj-y	+= xenbus/
 
+nostackp := $(call cc-option, -fno-stack-protector)
+CFLAGS_features.o			:= $(nostackp)
+
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o


Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/kernel.spec,v
retrieving revision 1.1762
retrieving revision 1.1763
diff -u -p -r1.1762 -r1.1763
--- kernel.spec	3 Sep 2009 00:44:23 -0000	1.1762
+++ kernel.spec	3 Sep 2009 14:14:21 -0000	1.1763
@@ -676,10 +676,15 @@ Patch1515: lirc-2.6.31.patch
 Patch1517: hdpvr-ir-enable.patch
 Patch1518: hid-ignore-all-recent-imon-devices.patch
 
+# virt + ksm patches
 Patch1550: linux-2.6-ksm.patch
 Patch1551: linux-2.6-ksm-kvm.patch
 Patch1552: linux-2.6-ksm-updates.patch
 Patch1553: linux-2.6-ksm-fix-munlock.patch
+Patch1575: linux-2.6-kvm-vmx-check-cpl-before-emulating-debug-register-access.patch
+Patch1576: linux-2.6-use-__pa_symbol-to-calculate-address-of-C-symbol.patch
+Patch1577: linux-2.6-kvm-pvmmu-do-not-batch-pte-updates-from-interrupt-context.patch
+Patch1578: linux-2.6-xen-stack-protector-fix.patch
 
 # nouveau + drm fixes
 Patch1812: drm-next.patch
@@ -1303,6 +1308,12 @@ ApplyPatch linux-2.6-ksm-fix-munlock.pat
 # Optimize KVM for KSM support
 ApplyPatch linux-2.6-ksm-kvm.patch
 
+# Assorted Virt Fixes
+ApplyPatch linux-2.6-kvm-vmx-check-cpl-before-emulating-debug-register-access.patch
+ApplyPatch linux-2.6-use-__pa_symbol-to-calculate-address-of-C-symbol.patch
+ApplyPatch linux-2.6-kvm-pvmmu-do-not-batch-pte-updates-from-interrupt-context.patch
+ApplyPatch linux-2.6-xen-stack-protector-fix.patch
+
 # Fix block I/O errors in KVM
 ApplyPatch linux-2.6-block-silently-error-unsupported-empty-barriers-too.patch
 
@@ -2009,6 +2020,10 @@ fi
 # and build.
 
 %changelog
+* Thu Sep 03 2009 Justin M.Forbes <jforbes at redhat.com>
+- Fix xen guest with stack protector. (#508120)
+- Small kvm fixes.
+
 * Wed Sep 02 2009 Adam Jackson <ajax at redhat.com> 2.6.31-0.199.rc8.git2
 - drm-intel-pm.patch: Disable by default, too flickery on too many machines.
   Enable with i915.powersave=1.