aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile13
-rw-r--r--arch/x86/kernel/acpi/cstate.c23
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/entry_64.S1
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/kvm.c72
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/module.c37
-rw-r--r--arch/x86/kernel/paravirt.c13
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/probe_roms.c2
-rw-r--r--arch/x86/kernel/process.c23
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/rtc.c23
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/vmlinux.lds.S41
-rw-r--r--arch/x86/kernel/vsyscall_64.c90
-rw-r--r--arch/x86/kernel/vsyscall_emu_64.S36
-rw-r--r--arch/x86/kernel/vsyscall_trace.h29
32 files changed, 258 insertions, 198 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 04105574c8e9..82f2912155a5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,19 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
-#
-# vsyscalls (which work on the user stack) should have
-# no stack-protector checks:
-#
-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
-CFLAGS_hpet.o := $(nostackp)
-CFLAGS_paravirt.o := $(nostackp)
-GCOV_PROFILE_vsyscall_64.o := n
-GCOV_PROFILE_hpet.o := n
-GCOV_PROFILE_tsc.o := n
-GCOV_PROFILE_paravirt.o := n
-
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 5812404a0d4c..f50e7fb2a201 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -149,6 +149,29 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
+{
+ if (!need_resched()) {
+ if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+ clflush((void *)&current_thread_info()->flags);
+
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (!need_resched())
+ __mwait(ax, cx);
+ }
+}
+
void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
{
unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b117efd24f71..8a439d364b94 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -30,7 +30,7 @@
#include <linux/syscore_ops.h>
#include <linux/io.h>
#include <linux/gfp.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b24be38c8cf8..52fa56399a50 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -38,7 +38,7 @@
#include <asm/perf_event.h>
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/mpspec.h>
#include <asm/i8259.h>
#include <asm/proto.h>
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 9536b3fe43f8..5d513bc47b6b 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -48,7 +48,7 @@
#include <linux/io.h>
#include <asm/apicdef.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index adc66c3a1fef..34b18594e724 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_INIT;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
- mdelay(10);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22a073d7fbff..62184390a601 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,7 +21,7 @@
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <asm/pgtable.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/proto.h>
#include <asm/setup.h>
#include <asm/apic.h>
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 08119a37e53c..6b96110bb0c3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -149,7 +149,6 @@ struct set_mtrr_data {
*/
static int mtrr_rendezvous_handler(void *info)
{
-#ifdef CONFIG_SMP
struct set_mtrr_data *data = info;
/*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
mtrr_if->set_all();
}
-#endif
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4ee3abf20ed6..cfa62ec090ec 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1900,6 +1900,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
perf_callchain_store(entry, regs->ip);
+ if (!current->mm)
+ return;
+
if (perf_callchain_user32(regs, entry))
return;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 45fbb8f7f549..f88af2c2a561 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1590,6 +1590,7 @@ static __init int intel_pmu_init(void)
break;
case 42: /* SandyBridge */
+ case 45: /* SandyBridge, "Romely-EP" */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c1a91974918..f3f6f5344001 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,7 @@
#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
661: pushl_cfi $do_general_protection
662:
.section .altinstructions,"a"
- .balign 4
- .long 661b
- .long 663f
- .word X86_FEATURE_XMM
- .byte 662b-661b
- .byte 664f-663f
+ altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
.previous
.section .altinstr_replacement,"ax"
663: pushl $do_simd_coprocessor_error
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e13329d800c8..6419bb05ecd5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1111,7 +1111,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
zeroentry coprocessor_error do_coprocessor_error
errorentry alignment_check do_alignment_check
zeroentry simd_coprocessor_error do_simd_coprocessor_error
-zeroentry emulate_vsyscall do_emulate_vsyscall
/* Reload gs selector with exception handling */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 12aff2537682..739d8598f789 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -321,7 +321,7 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
return tmp;
}
-#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
+#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16)
#define FP_EXP_TAG_VALID 0
#define FP_EXP_TAG_ZERO 1
#define FP_EXP_TAG_SPECIAL 2
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 65b8f5c2eebf..610485223bdb 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -14,7 +14,7 @@
#include <linux/io.h>
#include <linux/delay.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/system.h>
#include <asm/timer.h>
#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f09d4bbe2d2d..b3300e6bacef 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -15,7 +15,7 @@
#include <linux/io.h>
#include <linux/delay.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/system.h>
#include <asm/timer.h>
#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 33c07b0b122e..a9c2116001d6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -51,6 +51,15 @@ static int parse_no_kvmapf(char *arg)
early_param("no-kvmapf", parse_no_kvmapf);
+static int steal_acc = 1;
+static int parse_no_stealacc(char *arg)
+{
+ steal_acc = 0;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
struct kvm_para_state {
u8 mmu_queue[MMU_QUEUE_SIZE];
int mmu_queue_len;
@@ -58,6 +67,8 @@ struct kvm_para_state {
static DEFINE_PER_CPU(struct kvm_para_state, para_state);
static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
+static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static int has_steal_clock = 0;
static struct kvm_para_state *kvm_para_state(void)
{
@@ -441,6 +452,21 @@ static void __init paravirt_ops_setup(void)
#endif
}
+static void kvm_register_steal_time(void)
+{
+ int cpu = smp_processor_id();
+ struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
+
+ if (!has_steal_clock)
+ return;
+
+ memset(st, 0, sizeof(*st));
+
+ wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED));
+ printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n",
+ cpu, __pa(st));
+}
+
void __cpuinit kvm_guest_cpu_init(void)
{
if (!kvm_para_available())
@@ -457,6 +483,9 @@ void __cpuinit kvm_guest_cpu_init(void)
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id());
}
+
+ if (has_steal_clock)
+ kvm_register_steal_time();
}
static void kvm_pv_disable_apf(void *unused)
@@ -483,6 +512,31 @@ static struct notifier_block kvm_pv_reboot_nb = {
.notifier_call = kvm_pv_reboot_notify,
};
+static u64 kvm_steal_clock(int cpu)
+{
+ u64 steal;
+ struct kvm_steal_time *src;
+ int version;
+
+ src = &per_cpu(steal_time, cpu);
+ do {
+ version = src->version;
+ rmb();
+ steal = src->steal;
+ rmb();
+ } while ((version & 1) || (version != src->version));
+
+ return steal;
+}
+
+void kvm_disable_steal_time(void)
+{
+ if (!has_steal_clock)
+ return;
+
+ wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+}
+
#ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu(void)
{
@@ -500,6 +554,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
static void kvm_guest_cpu_offline(void *dummy)
{
+ kvm_disable_steal_time();
kvm_pv_disable_apf(NULL);
apf_task_wake_all();
}
@@ -548,6 +603,11 @@ void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
x86_init.irqs.trap_init = kvm_apf_trap_init;
+ if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ has_steal_clock = 1;
+ pv_time_ops.steal_clock = kvm_steal_clock;
+ }
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
register_cpu_notifier(&kvm_cpu_notifier);
@@ -555,3 +615,15 @@ void __init kvm_guest_init(void)
kvm_guest_cpu_init();
#endif
}
+
+static __init int activate_jump_labels(void)
+{
+ if (has_steal_clock) {
+ jump_label_inc(&paravirt_steal_enabled);
+ if (steal_acc)
+ jump_label_inc(&paravirt_steal_rq_enabled);
+ }
+
+ return 0;
+}
+arch_initcall(activate_jump_labels);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 6389a6bca11b..c1a0188e29ae 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -160,6 +160,7 @@ static void __cpuinit kvm_setup_secondary_clock(void)
static void kvm_crash_shutdown(struct pt_regs *regs)
{
native_write_msr(msr_kvm_system_time, 0, 0);
+ kvm_disable_steal_time();
native_machine_crash_shutdown(regs);
}
#endif
@@ -167,6 +168,7 @@ static void kvm_crash_shutdown(struct pt_regs *regs)
static void kvm_shutdown(void)
{
native_write_msr(msr_kvm_system_time, 0, 0);
+ kvm_disable_steal_time();
native_machine_shutdown();
}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 52f256f2cc81..925179f871de 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -45,21 +45,6 @@ void *module_alloc(unsigned long size)
-1, __builtin_return_address(0));
}
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
- vfree(module_region);
-}
-
-/* We don't need anything special. */
-int module_frob_arch_sections(Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- char *secstrings,
- struct module *mod)
-{
- return 0;
-}
-
#ifdef CONFIG_X86_32
int apply_relocate(Elf32_Shdr *sechdrs,
const char *strtab,
@@ -100,17 +85,6 @@ int apply_relocate(Elf32_Shdr *sechdrs,
}
return 0;
}
-
-int apply_relocate_add(Elf32_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *me)
-{
- printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
- me->name);
- return -ENOEXEC;
-}
#else /*X86_64*/
int apply_relocate_add(Elf64_Shdr *sechdrs,
const char *strtab,
@@ -181,17 +155,6 @@ overflow:
me->name);
return -ENOEXEC;
}
-
-int apply_relocate(Elf_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *me)
-{
- printk(KERN_ERR "non add relocation not supported\n");
- return -ENOSYS;
-}
-
#endif
int module_finalize(const Elf_Ehdr *hdr,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1aeeb71b..d90272e6bc40 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -202,6 +202,14 @@ static void native_flush_tlb_single(unsigned long addr)
__native_flush_tlb_single(addr);
}
+struct jump_label_key paravirt_steal_enabled;
+struct jump_label_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
/* These are in entry.S */
extern void native_iret(void);
extern void native_irq_enable_sysexit(void);
@@ -299,6 +307,10 @@ struct pv_info pv_info = {
.paravirt_enabled = 0,
.kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
+
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = __USER_CS,
+#endif
};
struct pv_init_ops pv_init_ops = {
@@ -307,6 +319,7 @@ struct pv_init_ops pv_init_ops = {
struct pv_time_ops pv_time_ops = {
.sched_clock = native_sched_clock,
+ .steal_clock = native_steal_clock,
};
struct pv_irq_ops pv_irq_ops = {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e8c33a302006..726494b58345 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1553,7 +1553,7 @@ static void __init calgary_fixup_one_tce_space(struct pci_dev *dev)
continue;
/* cover the whole region */
- npages = (r->end - r->start) >> PAGE_SHIFT;
+ npages = resource_size(r) >> PAGE_SHIFT;
npages++;
iommu_range_reserve(tbl, r->start, npages);
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index ba0a4cce53be..63228035f9d7 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -234,7 +234,7 @@ void __init probe_roms(void)
/* check for extension rom (ignore length byte!) */
rom = isa_bus_to_virt(extension_rom_resource.start);
if (romsignature(rom)) {
- length = extension_rom_resource.end - extension_rom_resource.start + 1;
+ length = resource_size(&extension_rom_resource);
if (romchecksum(rom, length)) {
request_resource(&iomem_resource, &extension_rom_resource);
upper = extension_rom_resource.start;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e1ba8cb24e4e..e7e3b019c439 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -438,29 +438,6 @@ void cpu_idle_wait(void)
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
- if (!need_resched()) {
- if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
- clflush((void *)&current_thread_info()->flags);
-
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __mwait(ax, cx);
- }
-}
-
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc59067b..7a3b65107a27 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -109,7 +110,8 @@ void cpu_idle(void)
local_irq_disable();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ if (cpuidle_idle_call())
+ pm_idle();
start_critical_timings();
}
tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca6f7ab8df33..f693e44e1bf6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -136,7 +137,8 @@ void cpu_idle(void)
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ if (cpuidle_idle_call())
+ pm_idle();
start_critical_timings();
/* In many cases the interrupt that ended idle
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 3f2ad2640d85..ccdbc16b8941 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -42,8 +42,11 @@ int mach_set_rtc_mmss(unsigned long nowtime)
{
int real_seconds, real_minutes, cmos_minutes;
unsigned char save_control, save_freq_select;
+ unsigned long flags;
int retval = 0;
+ spin_lock_irqsave(&rtc_lock, flags);
+
/* tell the clock it's being set */
save_control = CMOS_READ(RTC_CONTROL);
CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
@@ -93,12 +96,17 @@ int mach_set_rtc_mmss(unsigned long nowtime)
CMOS_WRITE(save_control, RTC_CONTROL);
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
return retval;
}
unsigned long mach_get_cmos_time(void)
{
unsigned int status, year, mon, day, hour, min, sec, century = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
/*
* If UIP is clear, then we have >= 244 microseconds before
@@ -125,6 +133,8 @@ unsigned long mach_get_cmos_time(void)
status = CMOS_READ(RTC_CONTROL);
WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
sec = bcd2bin(sec);
min = bcd2bin(min);
@@ -169,24 +179,15 @@ EXPORT_SYMBOL(rtc_cmos_write);
int update_persistent_clock(struct timespec now)
{
- unsigned long flags;
- int retval;
-
- spin_lock_irqsave(&rtc_lock, flags);
- retval = x86_platform.set_wallclock(now.tv_sec);
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- return retval;
+ return x86_platform.set_wallclock(now.tv_sec);
}
/* not static: needed by APM */
void read_persistent_clock(struct timespec *ts)
{
- unsigned long retval, flags;
+ unsigned long retval;
- spin_lock_irqsave(&rtc_lock, flags);
retval = x86_platform.get_wallclock();
- spin_unlock_irqrestore(&rtc_lock, flags);
ts->tv_sec = retval;
ts->tv_nsec = 0;
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7977f0cfe339..c346d1161488 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
#ifdef CONFIG_X86_64
case 0x40 ... 0x4f:
- if (regs->cs != __USER_CS)
+ if (!user_64bit_mode(regs))
/* 32-bit mode: register increment */
return 0;
/* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index fbb0a045a1a2..bc19be332bc9 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
.long ptregs_vm86
.long sys_ni_syscall /* Old sys_query_module */
.long sys_poll
- .long sys_nfsservctl
+ .long sys_ni_syscall /* Old nfsservctl */
.long sys_setresgid16 /* 170 */
.long sys_getresgid16
.long sys_prctl
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fbc097a085ca..6913369c234c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -49,7 +49,7 @@
#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/system.h>
#include <asm/traps.h>
#include <asm/desc.h>
@@ -872,12 +872,6 @@ void __init trap_init(void)
set_bit(SYSCALL_VECTOR, used_vectors);
#endif
-#ifdef CONFIG_X86_64
- BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
- set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
- set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
-#endif
-
/*
* Should be a barrier for any external CPU state:
*/
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4aa9c54a9b76..0f703f10901a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -71,7 +71,6 @@ PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(6); /* RW_ */
#ifdef CONFIG_X86_64
- user PT_LOAD FLAGS(5); /* R_E */
#ifdef CONFIG_SMP
percpu PT_LOAD FLAGS(6); /* RW_ */
#endif
@@ -154,44 +153,16 @@ SECTIONS
#ifdef CONFIG_X86_64
-#define VSYSCALL_ADDR (-10*1024*1024)
-
-#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
-#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-
-#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
-#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
- . = ALIGN(4096);
- __vsyscall_0 = .;
-
- . = VSYSCALL_ADDR;
- .vsyscall : AT(VLOAD(.vsyscall)) {
- *(.vsyscall_0)
-
- . = 1024;
- *(.vsyscall_1)
-
- . = 2048;
- *(.vsyscall_2)
-
- . = 4096; /* Pad the whole page. */
- } :user =0xcc
- . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
-
-#undef VSYSCALL_ADDR
-#undef VLOAD_OFFSET
-#undef VLOAD
-#undef VVIRT_OFFSET
-#undef VVIRT
-
+ . = ALIGN(PAGE_SIZE);
__vvar_page = .;
.vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
+ /* work around gold bug 13023 */
+ __vvar_beginning_hack = .;
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) \
- . = offset; \
+ /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) \
+ . = __vvar_beginning_hack + offset; \
*(.vvar_ ## name)
#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dda7dff9cef7..18ae83dd1cd7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,9 +18,6 @@
* use the vDSO.
*/
-/* Disable profiling for userspace code: */
-#define DISABLE_BRANCH_PROFILING
-
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -50,12 +47,36 @@
#include <asm/vgtod.h>
#include <asm/traps.h>
+#define CREATE_TRACE_POINTS
+#include "vsyscall_trace.h"
+
DEFINE_VVAR(int, vgetcpu_mode);
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
{
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
};
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+
+static int __init vsyscall_setup(char *str)
+{
+ if (str) {
+ if (!strcmp("emulate", str))
+ vsyscall_mode = EMULATE;
+ else if (!strcmp("native", str))
+ vsyscall_mode = NATIVE;
+ else if (!strcmp("none", str))
+ vsyscall_mode = NONE;
+ else
+ return -EINVAL;
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+early_param("vsyscall", vsyscall_setup);
+
void update_vsyscall_tz(void)
{
unsigned long flags;
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
level, tsk->comm, task_pid_nr(tsk),
- message, regs->ip - 2, regs->cs,
+ message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
}
@@ -118,46 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
}
-void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
+bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr;
long ret;
- local_irq_enable();
-
/*
- * Real 64-bit user mode code has cs == __USER_CS. Anything else
- * is bogus.
+ * No point in checking CS -- the only way to get here is a user mode
+ * trap to a high address, which means that we're in 64-bit user code.
*/
- if (regs->cs != __USER_CS) {
- /*
- * If we trapped from kernel mode, we might as well OOPS now
- * instead of returning to some random address and OOPSing
- * then.
- */
- BUG_ON(!user_mode(regs));
- /* Compat mode and non-compat 32-bit CS should both segfault. */
- warn_bad_vsyscall(KERN_WARNING, regs,
- "illegal int 0xcc from 32-bit mode");
- goto sigsegv;
+ WARN_ON_ONCE(address != regs->ip);
+
+ if (vsyscall_mode == NONE) {
+ warn_bad_vsyscall(KERN_INFO, regs,
+ "vsyscall attempted with vsyscall=none");
+ return false;
}
- /*
- * x86-ism here: regs->ip points to the instruction after the int 0xcc,
- * and int 0xcc is two bytes long.
- */
- vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
+ vsyscall_nr = addr_to_vsyscall_nr(address);
+
+ trace_emulate_vsyscall(vsyscall_nr);
+
if (vsyscall_nr < 0) {
warn_bad_vsyscall(KERN_WARNING, regs,
- "illegal int 0xcc (exploit attempt?)");
+ "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
goto sigsegv;
}
if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
- warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)");
+ warn_bad_vsyscall(KERN_WARNING, regs,
+ "vsyscall with bad stack (exploit attempt?)");
goto sigsegv;
}
@@ -202,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
regs->ip = caller;
regs->sp += 8;
- local_irq_disable();
- return;
+ return true;
sigsegv:
- regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
force_sig(SIGSEGV, current);
- local_irq_disable();
+ return true;
}
/*
@@ -256,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
void __init map_vsyscall(void)
{
- extern char __vsyscall_0;
- unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+ extern char __vsyscall_page;
+ unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
extern char __vvar_page;
unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
- /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
- __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
+ vsyscall_mode == NATIVE
+ ? PAGE_KERNEL_VSYSCALL
+ : PAGE_KERNEL_VVAR);
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
+ (unsigned long)VSYSCALL_START);
+
__set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
- BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS);
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
+ (unsigned long)VVAR_ADDRESS);
}
static int __init vsyscall_init(void)
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845eae5ca..c9596a9af159 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,21 +7,31 @@
*/
#include <linux/linkage.h>
+
#include <asm/irq_vectors.h>
+#include <asm/page_types.h>
+#include <asm/unistd_64.h>
+
+__PAGE_ALIGNED_DATA
+ .globl __vsyscall_page
+ .balign PAGE_SIZE, 0xcc
+ .type __vsyscall_page, @object
+__vsyscall_page:
+
+ mov $__NR_gettimeofday, %rax
+ syscall
+ ret
-/* The unused parts of the page are filled with 0xcc by the linker script. */
+ .balign 1024, 0xcc
+ mov $__NR_time, %rax
+ syscall
+ ret
-.section .vsyscall_0, "a"
-ENTRY(vsyscall_0)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_0)
+ .balign 1024, 0xcc
+ mov $__NR_getcpu, %rax
+ syscall
+ ret
-.section .vsyscall_1, "a"
-ENTRY(vsyscall_1)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_1)
+ .balign 4096, 0xcc
-.section .vsyscall_2, "a"
-ENTRY(vsyscall_2)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_2)
+ .size __vsyscall_page, 4096
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h
new file mode 100644
index 000000000000..a8b2edec54fe
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_trace.h
@@ -0,0 +1,29 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vsyscall
+
+#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __VSYSCALL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(emulate_vsyscall,
+
+ TP_PROTO(int nr),
+
+ TP_ARGS(nr),
+
+ TP_STRUCT__entry(__field(int, nr)),
+
+ TP_fast_assign(
+ __entry->nr = nr;
+ ),
+
+ TP_printk("nr = %d", __entry->nr)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
+#define TRACE_INCLUDE_FILE vsyscall_trace
+#include <trace/define_trace.h>