diff options
Diffstat (limited to 'trunk/2.6.22/20046_xen3-patch-2.6.21.patch1')
-rw-r--r-- | trunk/2.6.22/20046_xen3-patch-2.6.21.patch1 | 5107 |
1 files changed, 5107 insertions, 0 deletions
diff --git a/trunk/2.6.22/20046_xen3-patch-2.6.21.patch1 b/trunk/2.6.22/20046_xen3-patch-2.6.21.patch1 new file mode 100644 index 0000000..7211fb9 --- /dev/null +++ b/trunk/2.6.22/20046_xen3-patch-2.6.21.patch1 @@ -0,0 +1,5107 @@ +From: www.kernel.org +Subject: Linux 2.6.21 +Patch-mainline: 2.6.21 + +Automatically created from "patches.kernel.org/patch-2.6.21" by xen-port-patches.py + +Acked-by: jbeulich@novell.com + +Index: 10.3-2007-11-26/arch/i386/Kconfig +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/Kconfig 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/Kconfig 2007-10-22 13:58:46.000000000 +0200 +@@ -21,15 +21,17 @@ config GENERIC_TIME + config CLOCKSOURCE_WATCHDOG + bool + default y ++ depends on !X86_XEN + + config GENERIC_CLOCKEVENTS + bool + default y ++ depends on !X86_XEN + + config GENERIC_CLOCKEVENTS_BROADCAST + bool + default y +- depends on X86_LOCAL_APIC ++ depends on X86_LOCAL_APIC && !X86_XEN + + config LOCKDEP_SUPPORT + bool +Index: 10.3-2007-11-26/arch/i386/kernel/Makefile +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/Makefile 2007-09-03 09:52:56.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/Makefile 2007-10-22 13:58:46.000000000 +0200 +@@ -98,7 +98,7 @@ include $(srctree)/scripts/Makefile.xen + + obj-y += fixup.o + microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o +-n-obj-xen := i8253.o i8259.o reboot.o smpboot.o trampoline.o tsc.o ++n-obj-xen := i8253.o i8259.o reboot.o smpboot.o trampoline.o tsc.o tsc_sync.o + + obj-y := $(call filterxen, $(obj-y), $(n-obj-xen)) + obj-y := $(call cherrypickxen, $(obj-y)) +Index: 10.3-2007-11-26/arch/i386/kernel/acpi/boot-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/acpi/boot-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/acpi/boot-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -25,6 +25,7 @@ + + #include <linux/init.h> + #include <linux/acpi.h> ++#include <linux/acpi_pmtmr.h> + #include <linux/efi.h> + #include <linux/cpumask.h> + #include <linux/module.h> +@@ -66,7 +67,7 @@ static inline int acpi_madt_oem_check(ch + + #define BAD_MADT_ENTRY(entry, end) ( \ + (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ +- ((acpi_table_entry_header *)entry)->length < sizeof(*entry)) ++ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) + + #define PREFIX "ACPI: " + +@@ -79,7 +80,7 @@ int acpi_ioapic; + int acpi_strict; + EXPORT_SYMBOL(acpi_strict); + +-acpi_interrupt_flags acpi_sci_flags __initdata; ++u8 acpi_sci_flags __initdata; + int acpi_sci_override_gsi __initdata; + int acpi_skip_timer_override __initdata; + int acpi_use_timer_override __initdata; +@@ -92,11 +93,6 @@ static u64 acpi_lapic_addr __initdata = + #warning ACPI uses CMPXCHG, i486 and later hardware + #endif + +-#define MAX_MADT_ENTRIES 256 +-u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] = +- {[0 ... MAX_MADT_ENTRIES - 1] = 0xff }; +-EXPORT_SYMBOL(x86_acpiid_to_apicid); +- + /* -------------------------------------------------------------------------- + Boot-time Configuration + -------------------------------------------------------------------------- */ +@@ -168,30 +164,26 @@ char *__acpi_map_table(unsigned long phy + + #ifdef CONFIG_PCI_MMCONFIG + /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ +-struct acpi_table_mcfg_config *pci_mmcfg_config; ++struct acpi_mcfg_allocation *pci_mmcfg_config; + int pci_mmcfg_config_num; + +-int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) ++int __init acpi_parse_mcfg(struct acpi_table_header *header) + { + struct acpi_table_mcfg *mcfg; + unsigned long i; + int config_size; + +- if (!phys_addr || !size) ++ if (!header) + return -EINVAL; + +- mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size); +- if (!mcfg) { +- printk(KERN_WARNING PREFIX "Unable to map MCFG\n"); +- return -ENODEV; +- } ++ mcfg = (struct acpi_table_mcfg *)header; + + /* how many config structures do we have */ + pci_mmcfg_config_num = 0; +- i = size - sizeof(struct acpi_table_mcfg); +- while (i >= sizeof(struct acpi_table_mcfg_config)) { ++ i = header->length - sizeof(struct acpi_table_mcfg); ++ while (i >= sizeof(struct acpi_mcfg_allocation)) { + ++pci_mmcfg_config_num; +- i -= sizeof(struct acpi_table_mcfg_config); ++ i -= sizeof(struct acpi_mcfg_allocation); + }; + if (pci_mmcfg_config_num == 0) { + printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); +@@ -206,9 +198,9 @@ int __init acpi_parse_mcfg(unsigned long + return -ENOMEM; + } + +- memcpy(pci_mmcfg_config, &mcfg->config, config_size); ++ memcpy(pci_mmcfg_config, &mcfg[1], config_size); + for (i = 0; i < pci_mmcfg_config_num; ++i) { +- if (mcfg->config[i].base_reserved) { ++ if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { + printk(KERN_ERR PREFIX + "MMCONFIG not in low 4GB of memory\n"); + kfree(pci_mmcfg_config); +@@ -222,24 +214,24 @@ int __init acpi_parse_mcfg(unsigned long + #endif /* CONFIG_PCI_MMCONFIG */ + + #ifdef CONFIG_X86_LOCAL_APIC +-static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size) ++static int __init acpi_parse_madt(struct acpi_table_header *table) + { + struct acpi_table_madt *madt = NULL; + +- if (!phys_addr || !size || !cpu_has_apic) ++ if (!cpu_has_apic) + return -EINVAL; + +- madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size); ++ madt = (struct acpi_table_madt *)table; + if (!madt) { + printk(KERN_WARNING PREFIX "Unable to map MADT\n"); + return -ENODEV; + } + +- if (madt->lapic_address) { +- acpi_lapic_addr = (u64) madt->lapic_address; ++ if (madt->address) { ++ acpi_lapic_addr = (u64) madt->address; + + printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", +- madt->lapic_address); ++ madt->address); + } + + acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); +@@ -248,21 +240,17 @@ static int __init acpi_parse_madt(unsign + } + + static int __init +-acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end) ++acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) + { +- struct acpi_table_lapic *processor = NULL; ++ struct acpi_madt_local_apic *processor = NULL; + +- processor = (struct acpi_table_lapic *)header; ++ processor = (struct acpi_madt_local_apic *)header; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + +- /* Record local apic id only when enabled */ +- if (processor->flags.enabled) +- x86_acpiid_to_apicid[processor->acpi_id] = processor->id; +- + /* + * We need to register disabled CPU as well to permit + * counting disabled CPUs. This allows us to size +@@ -271,18 +259,18 @@ acpi_parse_lapic(acpi_table_entry_header + * when we use CPU hotplug. + */ + mp_register_lapic(processor->id, /* APIC ID */ +- processor->flags.enabled); /* Enabled? */ ++ processor->lapic_flags & ACPI_MADT_ENABLED); /* Enabled? */ + + return 0; + } + + static int __init +-acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header, ++acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, + const unsigned long end) + { +- struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; ++ struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; + +- lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header; ++ lapic_addr_ovr = (struct acpi_madt_local_apic_override *)header; + + if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) + return -EINVAL; +@@ -293,11 +281,11 @@ acpi_parse_lapic_addr_ovr(acpi_table_ent + } + + static int __init +-acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end) ++acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) + { +- struct acpi_table_lapic_nmi *lapic_nmi = NULL; ++ struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; + +- lapic_nmi = (struct acpi_table_lapic_nmi *)header; ++ lapic_nmi = (struct acpi_madt_local_apic_nmi *)header; + + if (BAD_MADT_ENTRY(lapic_nmi, end)) + return -EINVAL; +@@ -315,11 +303,11 @@ acpi_parse_lapic_nmi(acpi_table_entry_he + #ifdef CONFIG_X86_IO_APIC + + static int __init +-acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) ++acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) + { +- struct acpi_table_ioapic *ioapic = NULL; ++ struct acpi_madt_io_apic *ioapic = NULL; + +- ioapic = (struct acpi_table_ioapic *)header; ++ ioapic = (struct acpi_madt_io_apic *)header; + + if (BAD_MADT_ENTRY(ioapic, end)) + return -EINVAL; +@@ -344,11 +332,11 @@ static void __init acpi_sci_ioapic_setup + polarity = 3; + + /* Command-line over-ride via acpi_sci= */ +- if (acpi_sci_flags.trigger) +- trigger = acpi_sci_flags.trigger; ++ if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) ++ trigger = (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2; + +- if (acpi_sci_flags.polarity) +- polarity = acpi_sci_flags.polarity; ++ if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) ++ polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; + + /* + * mp_config_acpi_legacy_irqs() already setup IRQs < 16 +@@ -359,51 +347,52 @@ static void __init acpi_sci_ioapic_setup + + /* + * stash over-ride to indicate we've been here +- * and for later update of acpi_fadt ++ * and for later update of acpi_gbl_FADT + */ + acpi_sci_override_gsi = gsi; + return; + } + + static int __init +-acpi_parse_int_src_ovr(acpi_table_entry_header * header, ++acpi_parse_int_src_ovr(struct acpi_subtable_header * header, + const unsigned long end) + { +- struct acpi_table_int_src_ovr *intsrc = NULL; ++ struct acpi_madt_interrupt_override *intsrc = NULL; + +- intsrc = (struct acpi_table_int_src_ovr *)header; ++ intsrc = (struct acpi_madt_interrupt_override *)header; + + if (BAD_MADT_ENTRY(intsrc, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + +- if (intsrc->bus_irq == acpi_fadt.sci_int) { ++ if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { + acpi_sci_ioapic_setup(intsrc->global_irq, +- intsrc->flags.polarity, +- intsrc->flags.trigger); ++ intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, ++ (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2); + return 0; + } + + if (acpi_skip_timer_override && +- intsrc->bus_irq == 0 && intsrc->global_irq == 2) { ++ intsrc->source_irq == 0 && intsrc->global_irq == 2) { + printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + return 0; + } + +- mp_override_legacy_irq(intsrc->bus_irq, +- intsrc->flags.polarity, +- intsrc->flags.trigger, intsrc->global_irq); ++ mp_override_legacy_irq(intsrc->source_irq, ++ intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, ++ (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, ++ intsrc->global_irq); + + return 0; + } + + static int __init +-acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end) ++acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) + { +- struct acpi_table_nmi_src *nmi_src = NULL; ++ struct acpi_madt_nmi_source *nmi_src = NULL; + +- nmi_src = (struct acpi_table_nmi_src *)header; ++ nmi_src = (struct acpi_madt_nmi_source *)header; + + if (BAD_MADT_ENTRY(nmi_src, end)) + return -EINVAL; +@@ -419,7 +408,7 @@ acpi_parse_nmi_src(acpi_table_entry_head + + /* + * acpi_pic_sci_set_trigger() +- * ++ * + * use ELCR to set PIC-mode trigger type for SCI + * + * If a PIC-mode SCI is not recognized or gives spurious IRQ7's +@@ -513,7 +502,7 @@ int acpi_map_lsapic(acpi_handle handle, + { + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *obj; +- struct acpi_table_lapic *lapic; ++ struct acpi_madt_local_apic *lapic; + cpumask_t tmp_map, new_map; + u8 physid; + int cpu; +@@ -531,10 +520,10 @@ int acpi_map_lsapic(acpi_handle handle, + return -EINVAL; + } + +- lapic = (struct acpi_table_lapic *)obj->buffer.pointer; ++ lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer; + +- if ((lapic->header.type != ACPI_MADT_LAPIC) || +- (!lapic->flags.enabled)) { ++ if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC || ++ !(lapic->lapic_flags & ACPI_MADT_ENABLED)) { + kfree(buffer.pointer); + return -EINVAL; + } +@@ -546,7 +535,7 @@ int acpi_map_lsapic(acpi_handle handle, + buffer.pointer = NULL; + + tmp_map = cpu_present_map; +- mp_register_lapic(physid, lapic->flags.enabled); ++ mp_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); + + /* + * If mp_register_lapic successfully generates a new logical cpu +@@ -568,14 +557,6 @@ EXPORT_SYMBOL(acpi_map_lsapic); + + int acpi_unmap_lsapic(int cpu) + { +- int i; +- +- for_each_possible_cpu(i) { +- if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) { +- x86_acpiid_to_apicid[i] = -1; +- break; +- } +- } + x86_cpu_to_apicid[cpu] = -1; + cpu_clear(cpu, cpu_present_map); + num_processors--; +@@ -622,42 +603,37 @@ acpi_scan_rsdp(unsigned long start, unsi + return 0; + } + +-static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) ++static int __init acpi_parse_sbf(struct acpi_table_header *table) + { +- struct acpi_table_sbf *sb; +- +- if (!phys_addr || !size) +- return -EINVAL; ++ struct acpi_table_boot *sb; + +- sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size); ++ sb = (struct acpi_table_boot *)table; + if (!sb) { + printk(KERN_WARNING PREFIX "Unable to map SBF\n"); + return -ENODEV; + } + +- sbf_port = sb->sbf_cmos; /* Save CMOS port */ ++ sbf_port = sb->cmos_index; /* Save CMOS port */ + + return 0; + } + + #ifdef CONFIG_HPET_TIMER ++#include <asm/hpet.h> + +-static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) ++static int __init acpi_parse_hpet(struct acpi_table_header *table) + { + struct acpi_table_hpet *hpet_tbl; + struct resource *hpet_res; + resource_size_t res_start; + +- if (!phys || !size) +- return -EINVAL; +- +- hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size); ++ hpet_tbl = (struct acpi_table_hpet *)table; + if (!hpet_tbl) { + printk(KERN_WARNING PREFIX "Unable to map HPET\n"); + return -ENODEV; + } + +- if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) { ++ if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) { + printk(KERN_WARNING PREFIX "HPET timers must be located in " + "memory.\n"); + return -1; +@@ -670,29 +646,15 @@ static int __init acpi_parse_hpet(unsign + hpet_res->name = (void *)&hpet_res[1]; + hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, +- "HPET %u", hpet_tbl->number); ++ "HPET %u", hpet_tbl->sequence); + hpet_res->end = (1 * 1024) - 1; + } + +-#ifdef CONFIG_X86_64 +- vxtime.hpet_address = hpet_tbl->addr.addrl | +- ((long)hpet_tbl->addr.addrh << 32); +- ++ hpet_address = hpet_tbl->address.address; + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", +- hpet_tbl->id, vxtime.hpet_address); +- +- res_start = vxtime.hpet_address; +-#else /* X86 */ +- { +- extern unsigned long hpet_address; +- +- hpet_address = hpet_tbl->addr.addrl; +- printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", +- hpet_tbl->id, hpet_address); ++ hpet_tbl->id, hpet_address); + +- res_start = hpet_address; +- } +-#endif /* X86 */ ++ res_start = hpet_address; + + if (hpet_res) { + hpet_res->start = res_start; +@@ -706,46 +668,28 @@ static int __init acpi_parse_hpet(unsign + #define acpi_parse_hpet NULL + #endif + +-#ifdef CONFIG_X86_PM_TIMER +-extern u32 pmtmr_ioport; +-#endif +- +-static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) ++static int __init acpi_parse_fadt(struct acpi_table_header *table) + { +- struct fadt_descriptor *fadt = NULL; +- +- fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size); +- if (!fadt) { +- printk(KERN_WARNING PREFIX "Unable to map FADT\n"); +- return 0; +- } +- /* initialize sci_int early for INT_SRC_OVR MADT parsing */ +- acpi_fadt.sci_int = fadt->sci_int; + +- /* initialize rev and apic_phys_dest_mode for x86_64 genapic */ +- acpi_fadt.revision = fadt->revision; +- acpi_fadt.force_apic_physical_destination_mode = +- fadt->force_apic_physical_destination_mode; +- +-#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN) ++#ifdef CONFIG_X86_PM_TIMER + /* detect the location of the ACPI PM Timer */ +- if (fadt->revision >= FADT2_REVISION_ID) { ++ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) { + /* FADT rev. 2 */ +- if (fadt->xpm_tmr_blk.address_space_id != ++ if (acpi_gbl_FADT.xpm_timer_block.space_id != + ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + +- pmtmr_ioport = fadt->xpm_tmr_blk.address; ++ pmtmr_ioport = acpi_gbl_FADT.xpm_timer_block.address; + /* + * "X" fields are optional extensions to the original V1.0 + * fields, so we must selectively expand V1.0 fields if the + * corresponding X field is zero. + */ + if (!pmtmr_ioport) +- pmtmr_ioport = fadt->V1_pm_tmr_blk; ++ pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; + } else { + /* FADT rev. 1 */ +- pmtmr_ioport = fadt->V1_pm_tmr_blk; ++ pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; + } + if (pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", +@@ -787,13 +731,13 @@ static int __init acpi_parse_madt_lapic_ + if (!cpu_has_apic) + return -ENODEV; + +- /* ++ /* + * Note that the LAPIC address is obtained from the MADT (32-bit value) + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + */ + + count = +- acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, ++ acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); + if (count < 0) { + printk(KERN_ERR PREFIX +@@ -803,7 +747,7 @@ static int __init acpi_parse_madt_lapic_ + + mp_register_lapic_address(acpi_lapic_addr); + +- count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic, ++ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic, + MAX_APICS); + if (!count) { + printk(KERN_ERR PREFIX "No LAPIC entries present\n"); +@@ -816,7 +760,7 @@ static int __init acpi_parse_madt_lapic_ + } + + count = +- acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); ++ acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); + if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ +@@ -845,7 +789,7 @@ static int __init acpi_parse_madt_ioapic + return -ENODEV; + } + +- if (!cpu_has_apic) ++ if (!cpu_has_apic) + return -ENODEV; + + /* +@@ -858,7 +802,7 @@ static int __init acpi_parse_madt_ioapic + } + + count = +- acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, ++ acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, + MAX_IO_APICS); + if (!count) { + printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); +@@ -869,7 +813,7 @@ static int __init acpi_parse_madt_ioapic + } + + count = +- acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, ++ acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, + NR_IRQ_VECTORS); + if (count < 0) { + printk(KERN_ERR PREFIX +@@ -883,13 +827,13 @@ static int __init acpi_parse_madt_ioapic + * pretend we got one so we can set the SCI flags. + */ + if (!acpi_sci_override_gsi) +- acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); ++ acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); + + /* Fill in identity legacy mapings where no override */ + mp_config_acpi_legacy_irqs(); + + count = +- acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, ++ acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, + NR_IRQ_VECTORS); + if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); +@@ -909,10 +853,9 @@ static inline int acpi_parse_madt_ioapic + static void __init acpi_process_madt(void) + { + #ifdef CONFIG_X86_LOCAL_APIC +- int count, error; ++ int error; + +- count = acpi_table_parse(ACPI_APIC, acpi_parse_madt); +- if (count >= 1) { ++ if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + + /* + * Parse MADT LAPIC entries +@@ -1132,7 +1075,28 @@ static struct dmi_system_id __initdata a + "ASUS A7V ACPI BIOS Revision 1007"), + }, + }, +- ++ { ++ /* ++ * Latest BIOS for IBM 600E (1.16) has bad pcinum ++ * for LPC bridge, which is needed for the PCI ++ * interrupt links to work. DSDT fix is in bug 5966. ++ * 2645, 2646 model numbers are shared with 600/600E/600X ++ */ ++ .callback = disable_acpi_irq, ++ .ident = "IBM Thinkpad 600 Series 2645", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), ++ DMI_MATCH(DMI_BOARD_NAME, "2645"), ++ }, ++ }, ++ { ++ .callback = disable_acpi_irq, ++ .ident = "IBM Thinkpad 600 Series 2646", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), ++ DMI_MATCH(DMI_BOARD_NAME, "2646"), ++ }, ++ }, + /* + * Boxes that need ACPI PCI IRQ routing and PCI scan disabled + */ +@@ -1198,7 +1162,7 @@ int __init acpi_boot_table_init(void) + if (acpi_disabled && !acpi_ht) + return 1; + +- /* ++ /* + * Initialize the ACPI boot-time table parser. + */ + error = acpi_table_init(); +@@ -1207,7 +1171,7 @@ int __init acpi_boot_table_init(void) + return error; + } + +- acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); ++ acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); + + /* + * blacklist may disable ACPI entirely +@@ -1235,19 +1199,19 @@ int __init acpi_boot_init(void) + if (acpi_disabled && !acpi_ht) + return 1; + +- acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); ++ acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); + + /* + * set sci_int and PM timer address + */ +- acpi_table_parse(ACPI_FADT, acpi_parse_fadt); ++ acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt); + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + acpi_process_madt(); + +- acpi_table_parse(ACPI_HPET, acpi_parse_hpet); ++ acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); + + return 0; + } +@@ -1318,13 +1282,17 @@ static int __init setup_acpi_sci(char *s + if (!s) + return -EINVAL; + if (!strcmp(s, "edge")) +- acpi_sci_flags.trigger = 1; ++ acpi_sci_flags = ACPI_MADT_TRIGGER_EDGE | ++ (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); + else if (!strcmp(s, "level")) +- acpi_sci_flags.trigger = 3; ++ acpi_sci_flags = ACPI_MADT_TRIGGER_LEVEL | ++ (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); + else if (!strcmp(s, "high")) +- acpi_sci_flags.polarity = 1; ++ acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_HIGH | ++ (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); + else if (!strcmp(s, "low")) +- acpi_sci_flags.polarity = 3; ++ acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_LOW | ++ (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); + else + return -EINVAL; + return 0; +Index: 10.3-2007-11-26/arch/i386/kernel/apic-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/apic-xen.c 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/apic-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -25,6 +25,8 @@ + #include <linux/kernel_stat.h> + #include <linux/sysdev.h> + #include <linux/cpu.h> ++#include <linux/clockchips.h> ++#include <linux/acpi_pmtmr.h> + #include <linux/module.h> + + #include <asm/atomic.h> +@@ -56,83 +58,26 @@ static cpumask_t timer_bcast_ipi; + */ + + /* +- * Debug level ++ * Debug level, exported for io_apic.c + */ + int apic_verbosity; + + #ifndef CONFIG_XEN + static int modern_apic(void) + { +- unsigned int lvr, version; + /* AMD systems use old APIC versions, so check the CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && +- boot_cpu_data.x86 >= 0xf) ++ boot_cpu_data.x86 >= 0xf) + return 1; +- lvr = apic_read(APIC_LVR); +- version = GET_APIC_VERSION(lvr); +- return version >= 0x14; ++ return lapic_get_version() >= 0x14; + } + #endif /* !CONFIG_XEN */ + +-/* +- * 'what should we do if we get a hw irq event on an illegal vector'. +- * each architecture has to answer this themselves. +- */ +-void ack_bad_irq(unsigned int irq) +-{ +- printk("unexpected IRQ trap at vector %02x\n", irq); +- /* +- * Currently unexpected vectors happen only on SMP and APIC. +- * We _must_ ack these because every local APIC has only N +- * irq slots per priority level, and a 'hanging, unacked' IRQ +- * holds up an irq slot - in excessive cases (when multiple +- * unexpected vectors occur) that might lock up the APIC +- * completely. +- * But only ack when the APIC is enabled -AK +- */ +- if (cpu_has_apic) +- ack_APIC_irq(); +-} +- + int get_physical_broadcast(void) + { + return 0xff; + } + +-#ifndef CONFIG_XEN +-#ifndef CONFIG_SMP +-static void up_apic_timer_interrupt_call(void) +-{ +- int cpu = smp_processor_id(); +- +- /* +- * the NMI deadlock-detector uses this. +- */ +- per_cpu(irq_stat, cpu).apic_timer_irqs++; +- +- smp_local_timer_interrupt(); +-} +-#endif +- +-void smp_send_timer_broadcast_ipi(void) +-{ +- cpumask_t mask; +- +- cpus_and(mask, cpu_online_map, timer_bcast_ipi); +- if (!cpus_empty(mask)) { +-#ifdef CONFIG_SMP +- send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +-#else +- /* +- * We can directly call the apic timer interrupt handler +- * in UP case. Minus all irq related functions +- */ +- up_apic_timer_interrupt_call(); +-#endif +- } +-} +-#endif +- + int setup_profiling_timer(unsigned int multiplier) + { + return -EINVAL; +Index: 10.3-2007-11-26/arch/i386/kernel/cpu/common-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/cpu/common-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/cpu/common-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -610,7 +610,7 @@ void __init early_cpu_init(void) + struct pt_regs * __devinit idle_regs(struct pt_regs *regs) + { + memset(regs, 0, sizeof(struct pt_regs)); +- regs->xgs = __KERNEL_PDA; ++ regs->xfs = __KERNEL_PDA; + return regs; + } + +@@ -667,12 +667,12 @@ struct i386_pda boot_pda = { + .pcurrent = &init_task, + }; + +-static inline void set_kernel_gs(void) ++static inline void set_kernel_fs(void) + { +- /* Set %gs for this CPU's PDA. Memory clobber is to create a ++ /* Set %fs for this CPU's PDA. Memory clobber is to create a + barrier with respect to any PDA operations, so the compiler + doesn't move any before here. */ +- asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); ++ asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); + } + + /* Initialize the CPU's GDT and PDA. The boot CPU does this for +@@ -730,7 +730,7 @@ void __cpuinit cpu_set_gdt(int cpu) + } + BUG_ON(HYPERVISOR_set_gdt(frames, cpu_gdt_descr->size / 8)); + +- set_kernel_gs(); ++ set_kernel_fs(); + } + + /* Common CPU init for both boot and secondary CPUs */ +@@ -775,8 +775,8 @@ static void __cpuinit _cpu_init(int cpu, + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); + #endif + +- /* Clear %fs. */ +- asm volatile ("mov %0, %%fs" : : "r" (0)); ++ /* Clear %gs. */ ++ asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); +Index: 10.3-2007-11-26/arch/i386/kernel/e820-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/e820-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/e820-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -14,6 +14,7 @@ + #include <asm/pgtable.h> + #include <asm/page.h> + #include <asm/e820.h> ++#include <asm/setup.h> + #include <xen/interface/memory.h> + + #ifdef CONFIG_EFI +@@ -157,21 +158,22 @@ static struct resource standard_io_resou + .flags = IORESOURCE_BUSY | IORESOURCE_IO + } }; + +-static int romsignature(const unsigned char *x) ++#define ROMSIGNATURE 0xaa55 ++ ++static int __init romsignature(const unsigned char *rom) + { + unsigned short sig; +- int ret = 0; +- if (probe_kernel_address((const unsigned short *)x, sig) == 0) +- ret = (sig == 0xaa55); +- return ret; ++ ++ return probe_kernel_address((const unsigned short *)rom, sig) == 0 && ++ sig == ROMSIGNATURE; + } + + static int __init romchecksum(unsigned char *rom, unsigned long length) + { +- unsigned char *p, sum = 0; ++ unsigned char sum; + +- for (p = rom; p < rom + length; p++) +- sum += *p; ++ for (sum = 0; length; length--) ++ sum += *rom++; + return sum == 0; + } + +Index: 10.3-2007-11-26/arch/i386/kernel/entry-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/entry-xen.S 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/entry-xen.S 2007-10-22 13:58:46.000000000 +0200 +@@ -30,7 +30,7 @@ + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es +- * 24(%esp) - %gs ++ * 24(%esp) - %fs + * 28(%esp) - orig_eax + * 2C(%esp) - %eip + * 30(%esp) - %cs +@@ -102,9 +102,9 @@ NMI_MASK = 0x80000000 + + #define SAVE_ALL \ + cld; \ +- pushl %gs; \ ++ pushl %fs; \ + CFI_ADJUST_CFA_OFFSET 4;\ +- /*CFI_REL_OFFSET gs, 0;*/\ ++ /*CFI_REL_OFFSET fs, 0;*/\ + pushl %es; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET es, 0;*/\ +@@ -136,7 +136,7 @@ NMI_MASK = 0x80000000 + movl %edx, %ds; \ + movl %edx, %es; \ + movl $(__KERNEL_PDA), %edx; \ +- movl %edx, %gs ++ movl %edx, %fs + + #define RESTORE_INT_REGS \ + popl %ebx; \ +@@ -169,9 +169,9 @@ NMI_MASK = 0x80000000 + 2: popl %es; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE es;*/\ +-3: popl %gs; \ ++3: popl %fs; \ + CFI_ADJUST_CFA_OFFSET -4;\ +- /*CFI_RESTORE gs;*/\ ++ /*CFI_RESTORE fs;*/\ + .pushsection .fixup,"ax"; \ + 4: movl $0,(%esp); \ + jmp 1b; \ +@@ -230,6 +230,7 @@ ENTRY(ret_from_fork) + CFI_ADJUST_CFA_OFFSET -4 + jmp syscall_exit + CFI_ENDPROC ++END(ret_from_fork) + + /* + * Return to user mode is not as complex as all this looks, +@@ -261,6 +262,7 @@ ENTRY(resume_userspace) + # int/exception return? + jne work_pending + jmp restore_all ++END(ret_from_exception) + + #ifdef CONFIG_PREEMPT + ENTRY(resume_kernel) +@@ -275,6 +277,7 @@ need_resched: + jz restore_all + call preempt_schedule_irq + jmp need_resched ++END(resume_kernel) + #endif + CFI_ENDPROC + +@@ -352,16 +355,17 @@ sysenter_past_esp: + movl PT_OLDESP(%esp), %ecx + xorl %ebp,%ebp + TRACE_IRQS_ON +-1: mov PT_GS(%esp), %gs ++1: mov PT_FS(%esp), %fs + ENABLE_INTERRUPTS_SYSEXIT + CFI_ENDPROC + .pushsection .fixup,"ax" +-2: movl $0,PT_GS(%esp) ++2: movl $0,PT_FS(%esp) + jmp 1b + .section __ex_table,"a" + .align 4 + .long 1b,2b + .popsection ++ENDPROC(sysenter_entry) + + # system call handler stub + ENTRY(system_call) +@@ -507,6 +511,7 @@ hypervisor_iret: + jmp hypercall_page + (__HYPERVISOR_iret * 32) + #endif + CFI_ENDPROC ++ENDPROC(system_call) + + # perform work that needs to be done immediately before resumption + ALIGN +@@ -552,6 +557,7 @@ work_notifysig_v86: + xorl %edx, %edx + call do_notify_resume + jmp resume_userspace_sig ++END(work_pending) + + # perform syscall exit tracing + ALIGN +@@ -567,6 +573,7 @@ syscall_trace_entry: + cmpl $(nr_syscalls), %eax + jnae syscall_call + jmp syscall_exit ++END(syscall_trace_entry) + + # perform syscall exit tracing + ALIGN +@@ -580,6 +587,7 @@ syscall_exit_work: + movl $1, %edx + call do_syscall_trace + jmp resume_userspace ++END(syscall_exit_work) + CFI_ENDPROC + + RING0_INT_FRAME # can't unwind into user space anyway +@@ -590,16 +598,18 @@ syscall_fault: + GET_THREAD_INFO(%ebp) + movl $-EFAULT,PT_EAX(%esp) + jmp resume_userspace ++END(syscall_fault) + + syscall_badsys: + movl $-ENOSYS,PT_EAX(%esp) + jmp resume_userspace ++END(syscall_badsys) + CFI_ENDPROC + + #ifndef CONFIG_XEN + #define FIXUP_ESPFIX_STACK \ + /* since we are on a wrong stack, we cant make it a C code :( */ \ +- movl %gs:PDA_cpu, %ebx; \ ++ movl %fs:PDA_cpu, %ebx; \ + PER_CPU(cpu_gdt_descr, %ebx); \ + movl GDS_address(%ebx), %ebx; \ + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ +@@ -630,9 +640,9 @@ syscall_badsys: + ENTRY(interrupt) + .text + +-vector=0 + ENTRY(irq_entries_start) + RING0_INT_FRAME ++vector=0 + .rept NR_IRQS + ALIGN + .if vector +@@ -641,11 +651,16 @@ ENTRY(irq_entries_start) + 1: pushl $~(vector) + CFI_ADJUST_CFA_OFFSET 4 + jmp common_interrupt +-.data ++ .previous + .long 1b +-.text ++ .text + vector=vector+1 + .endr ++END(irq_entries_start) ++ ++.previous ++END(interrupt) ++.previous + + /* + * the CPU automatically disables interrupts when executing an IRQ vector, +@@ -658,6 +673,7 @@ common_interrupt: + movl %esp,%eax + call do_IRQ + jmp ret_from_intr ++ENDPROC(common_interrupt) + CFI_ENDPROC + + #define BUILD_INTERRUPT(name, nr) \ +@@ -670,10 +686,16 @@ ENTRY(name) \ + movl %esp,%eax; \ + call smp_/**/name; \ + jmp ret_from_intr; \ +- CFI_ENDPROC ++ CFI_ENDPROC; \ ++ENDPROC(name) + + /* The include is where all of the SMP etc. interrupts come from */ + #include "entry_arch.h" ++ ++/* This alternate entry is needed because we hijack the apic LVTT */ ++#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) ++BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) ++#endif + #else + #define UNWIND_ESPFIX_STACK + #endif +@@ -684,7 +706,7 @@ KPROBE_ENTRY(page_fault) + CFI_ADJUST_CFA_OFFSET 4 + ALIGN + error_code: +- /* the function address is in %gs's slot on the stack */ ++ /* the function address is in %fs's slot on the stack */ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ +@@ -713,20 +735,20 @@ error_code: + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + cld +- pushl %gs ++ pushl %fs + CFI_ADJUST_CFA_OFFSET 4 +- /*CFI_REL_OFFSET gs, 0*/ ++ /*CFI_REL_OFFSET fs, 0*/ + movl $(__KERNEL_PDA), %ecx +- movl %ecx, %gs ++ movl %ecx, %fs + UNWIND_ESPFIX_STACK + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ +- movl PT_GS(%esp), %edi # get the function address ++ movl PT_FS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart +- mov %ecx, PT_GS(%esp) +- /*CFI_REL_OFFSET gs, ES*/ ++ mov %ecx, PT_FS(%esp) ++ /*CFI_REL_OFFSET fs, ES*/ + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es +@@ -813,7 +835,7 @@ critical_fixup_table: + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es +- .byte 0x24,0x24 # pop %gs ++ .byte 0x24,0x24 # pop %fs + .byte 0x28,0x28,0x28 # add $4,%esp + .byte 0x2c # iret + .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) +@@ -879,6 +901,7 @@ ENTRY(coprocessor_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(coprocessor_error) + + ENTRY(simd_coprocessor_error) + RING0_INT_FRAME +@@ -888,6 +911,7 @@ ENTRY(simd_coprocessor_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(simd_coprocessor_error) + + ENTRY(device_not_available) + RING0_INT_FRAME +@@ -910,6 +934,7 @@ device_available_emulate: + call math_state_restore + jmp ret_from_exception + CFI_ENDPROC ++END(device_not_available) + + #ifndef CONFIG_XEN + /* +@@ -1071,10 +1096,12 @@ ENTRY(native_iret) + .align 4 + .long 1b,iret_exc + .previous ++END(native_iret) + + ENTRY(native_irq_enable_sysexit) + sti + sysexit ++END(native_irq_enable_sysexit) + #endif + + KPROBE_ENTRY(int3) +@@ -1097,6 +1124,7 @@ ENTRY(overflow) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(overflow) + + ENTRY(bounds) + RING0_INT_FRAME +@@ -1106,6 +1134,7 @@ ENTRY(bounds) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(bounds) + + ENTRY(invalid_op) + RING0_INT_FRAME +@@ -1115,6 +1144,7 @@ ENTRY(invalid_op) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(invalid_op) + + ENTRY(coprocessor_segment_overrun) + RING0_INT_FRAME +@@ -1124,6 +1154,7 @@ ENTRY(coprocessor_segment_overrun) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(coprocessor_segment_overrun) + + ENTRY(invalid_TSS) + RING0_EC_FRAME +@@ -1131,6 +1162,7 @@ ENTRY(invalid_TSS) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(invalid_TSS) + + ENTRY(segment_not_present) + RING0_EC_FRAME +@@ -1138,6 +1170,7 @@ ENTRY(segment_not_present) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(segment_not_present) + + ENTRY(stack_segment) + RING0_EC_FRAME +@@ -1145,6 +1178,7 @@ ENTRY(stack_segment) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(stack_segment) + + KPROBE_ENTRY(general_protection) + RING0_EC_FRAME +@@ -1160,6 +1194,7 @@ ENTRY(alignment_check) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(alignment_check) + + ENTRY(divide_error) + RING0_INT_FRAME +@@ -1169,6 +1204,7 @@ ENTRY(divide_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(divide_error) + + #ifdef CONFIG_X86_MCE + ENTRY(machine_check) +@@ -1179,6 +1215,7 @@ ENTRY(machine_check) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(machine_check) + #endif + + #ifndef CONFIG_XEN +@@ -1198,6 +1235,7 @@ ENTRY(fixup_4gb_segment) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC ++END(spurious_interrupt_bug) + + ENTRY(kernel_thread_helper) + pushl $0 # fake return address for unwinder +Index: 10.3-2007-11-26/arch/i386/kernel/head-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/head-xen.S 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/head-xen.S 2007-10-22 13:58:46.000000000 +0200 +@@ -27,6 +27,7 @@ + #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability + #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id + ++.section .text.head,"ax",@progbits + #define VIRT_ENTRY_OFFSET 0x0 + .org VIRT_ENTRY_OFFSET + ENTRY(startup_32) +@@ -60,11 +61,11 @@ ENTRY(startup_32) + + movb $1,X86_HARD_MATH + +- xorl %eax,%eax # Clear FS +- movl %eax,%fs ++ xorl %eax,%eax # Clear GS ++ movl %eax,%gs + + movl $(__KERNEL_PDA),%eax +- mov %eax,%gs ++ mov %eax,%fs + + cld # gcc2 wants the direction flag cleared at all times + +@@ -75,7 +76,7 @@ ENTRY(startup_32) + * Point the GDT at this CPU's PDA. This will be + * cpu_gdt_table and boot_pda. + */ +-setup_pda: ++ENTRY(setup_pda) + /* get the PDA pointer */ + movl $boot_pda, %eax + +Index: 10.3-2007-11-26/arch/i386/kernel/io_apic-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/io_apic-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/io_apic-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -164,7 +164,7 @@ static inline void io_apic_write(unsigne + */ + static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) + { +- volatile struct io_apic *io_apic = io_apic_base(apic); ++ volatile struct io_apic __iomem *io_apic = io_apic_base(apic); + if (sis_apic_bug) + writel(reg, &io_apic->index); + writel(value, &io_apic->data); +@@ -387,7 +387,7 @@ static void set_ioapic_affinity_irq(unsi + break; + entry = irq_2_pin + entry->next; + } +- set_native_irq_info(irq, cpumask); ++ irq_desc[irq].affinity = cpumask; + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -526,8 +526,8 @@ static void do_irq_balance(void) + package_index = CPU_TO_PACKAGEINDEX(i); + for (j = 0; j < NR_IRQS; j++) { + unsigned long value_now, delta; +- /* Is this an active IRQ? */ +- if (!irq_desc[j].action) ++ /* Is this an active IRQ or balancing disabled ? */ ++ if (!irq_desc[j].action || irq_balancing_disabled(j)) + continue; + if ( package_index == i ) + IRQ_DELTA(package_index,j) = 0; +@@ -780,7 +780,7 @@ failed: + return 0; + } + +-int __init irqbalance_disable(char *str) ++int __devinit irqbalance_disable(char *str) + { + irqbalance_disabled = 1; + return 1; +@@ -1319,11 +1319,9 @@ static void ioapic_register_intr(int irq + trigger == IOAPIC_LEVEL) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, "fasteoi"); +- else { +- irq_desc[irq].status |= IRQ_DELAYED_DISABLE; ++ else + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_edge_irq, "edge"); +- } + set_intr_gate(vector, interrupt[irq]); + } + #else +@@ -1397,7 +1395,6 @@ static void __init setup_IO_APIC_irqs(vo + } + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, entry); +- set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + } +@@ -1630,7 +1627,7 @@ void /*__init*/ print_local_APIC(void * + v = apic_read(APIC_LVR); + printk(KERN_INFO "... APIC VERSION: %08x\n", v); + ver = GET_APIC_VERSION(v); +- maxlvt = get_maxlvt(); ++ maxlvt = lapic_get_maxlvt(); + + v = apic_read(APIC_TASKPRI); + printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); +@@ -1969,7 +1966,7 @@ static void __init setup_ioapic_ids_from + #endif + + #ifndef CONFIG_XEN +-static int no_timer_check __initdata; ++int no_timer_check __initdata; + + static int __init notimercheck(char *s) + { +@@ -2362,7 +2359,7 @@ static inline void __init check_timer(vo + + disable_8259A_irq(0); + set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, +- "fasteio"); ++ "fasteoi"); + apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + enable_8259A_irq(0); + +@@ -2655,7 +2652,7 @@ static void set_msi_irq_affinity(unsigne + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + write_msi_msg(irq, &msg); +- set_native_irq_info(irq, mask); ++ irq_desc[irq].affinity = mask; + } + #endif /* CONFIG_SMP */ + +@@ -2674,25 +2671,32 @@ static struct irq_chip msi_chip = { + .retrigger = ioapic_retrigger_irq, + }; + +-int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) ++int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) + { + struct msi_msg msg; +- int ret; ++ int irq, ret; ++ irq = create_irq(); ++ if (irq < 0) ++ return irq; ++ ++ set_irq_msi(irq, desc); + ret = msi_compose_msg(dev, irq, &msg); +- if (ret < 0) ++ if (ret < 0) { ++ destroy_irq(irq); + return ret; ++ } + + write_msi_msg(irq, &msg); + + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, + "edge"); + +- return 0; ++ return irq; + } + + void arch_teardown_msi_irq(unsigned int irq) + { +- return; ++ destroy_irq(irq); + } + + #endif /* CONFIG_PCI_MSI */ +@@ -2732,7 +2736,7 @@ static void set_ht_irq_affinity(unsigned + dest = cpu_mask_to_apicid(mask); + + target_ht_irq(irq, dest); +- set_native_irq_info(irq, mask); ++ irq_desc[irq].affinity = mask; + } + #endif + +@@ -2940,7 +2944,6 @@ int io_apic_set_pci_routing (int ioapic, + + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(ioapic, pin, entry); +- set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; +Index: 10.3-2007-11-26/arch/i386/kernel/irq-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/irq-xen.c 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/irq-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -10,7 +10,6 @@ + * io_apic.c.) + */ + +-#include <asm/uaccess.h> + #include <linux/module.h> + #include <linux/seq_file.h> + #include <linux/interrupt.h> +@@ -19,19 +18,34 @@ + #include <linux/cpu.h> + #include <linux/delay.h> + ++#include <asm/apic.h> ++#include <asm/uaccess.h> ++ + DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; + EXPORT_PER_CPU_SYMBOL(irq_stat); + +-#ifndef CONFIG_X86_LOCAL_APIC + /* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ + void ack_bad_irq(unsigned int irq) + { +- printk("unexpected IRQ trap at vector %02x\n", irq); +-} ++ printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); ++ ++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) ++ /* ++ * Currently unexpected vectors happen only on SMP and APIC. ++ * We _must_ ack these because every local APIC has only N ++ * irq slots per priority level, and a 'hanging, unacked' IRQ ++ * holds up an irq slot - in excessive cases (when multiple ++ * unexpected vectors occur) that might lock up the APIC ++ * completely. ++ * But only ack when the APIC is enabled -AK ++ */ ++ if (cpu_has_apic) ++ ack_APIC_irq(); + #endif ++} + + #ifdef CONFIG_4KSTACKS + /* +Index: 10.3-2007-11-26/arch/i386/kernel/microcode-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/microcode-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/microcode-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -108,7 +108,7 @@ static ssize_t microcode_write (struct f + return ret; + } + +-static struct file_operations microcode_fops = { ++static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +Index: 10.3-2007-11-26/arch/i386/kernel/mpparse-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/mpparse-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/mpparse-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -1079,7 +1079,7 @@ int mp_register_gsi(u32 gsi, int trigger + static int gsi_to_irq[MAX_GSI_NUM]; + + /* Don't set up the ACPI SCI because it's already set up */ +- if (acpi_fadt.sci_int == gsi) ++ if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); +@@ -1136,7 +1136,7 @@ int mp_register_gsi(u32 gsi, int trigger + /* + * Don't assign IRQ used by ACPI SCI + */ +- if (gsi == acpi_fadt.sci_int) ++ if (gsi == acpi_gbl_FADT.sci_interrupt) + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { +Index: 10.3-2007-11-26/arch/i386/kernel/pci-dma-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/pci-dma-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/pci-dma-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -284,7 +284,7 @@ int dma_declare_coherent_memory(struct d + return DMA_MEMORY_IO; + + free1_out: +- kfree(dev->dma_mem->bitmap); ++ kfree(dev->dma_mem); + out: + if (mem_base) + iounmap(mem_base); +Index: 10.3-2007-11-26/arch/i386/kernel/pcspeaker.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/pcspeaker.c 2007-12-06 17:27:31.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/pcspeaker.c 2007-10-22 13:58:46.000000000 +0200 +@@ -7,6 +7,11 @@ static __init int add_pcspkr(void) + struct platform_device *pd; + int ret; + ++#ifdef CONFIG_XEN ++ if (!is_initial_xendomain()) ++ return 0; ++#endif ++ + pd = platform_device_alloc("pcspkr", -1); + if (!pd) + return -ENOMEM; +Index: 10.3-2007-11-26/arch/i386/kernel/process-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/process-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/process-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -38,6 +38,7 @@ + #include <linux/ptrace.h> + #include <linux/random.h> + #include <linux/personality.h> ++#include <linux/tick.h> + + #include <asm/uaccess.h> + #include <asm/pgtable.h> +@@ -160,6 +161,7 @@ void cpu_idle(void) + + /* endless idle loop with no priority at all */ + while (1) { ++ tick_nohz_stop_sched_tick(); + while (!need_resched()) { + void (*idle)(void); + +@@ -175,6 +177,7 @@ void cpu_idle(void) + __get_cpu_var(irq_stat).idle_timestamp = jiffies; + idle(); + } ++ tick_nohz_restart_sched_tick(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); +@@ -247,8 +250,8 @@ void show_regs(struct pt_regs * regs) + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); +- printk(" DS: %04x ES: %04x GS: %04x\n", +- 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); ++ printk(" DS: %04x ES: %04x FS: %04x\n", ++ 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); + + cr0 = read_cr0(); + cr2 = read_cr2(); +@@ -279,7 +282,7 @@ int kernel_thread(int (*fn)(void *), voi + + regs.xds = __USER_DS; + regs.xes = __USER_DS; +- regs.xgs = __KERNEL_PDA; ++ regs.xfs = __KERNEL_PDA; + regs.orig_eax = -1; + regs.eip = (unsigned long) kernel_thread_helper; + regs.xcs = __KERNEL_CS | get_kernel_rpl(); +@@ -355,7 +358,7 @@ int copy_thread(int nr, unsigned long cl + + p->thread.eip = (unsigned long) ret_from_fork; + +- savesegment(fs,p->thread.fs); ++ savesegment(gs,p->thread.gs); + + tsk = current; + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { +@@ -433,8 +436,8 @@ void dump_thread(struct pt_regs * regs, + dump->regs.eax = regs->eax; + dump->regs.ds = regs->xds; + dump->regs.es = regs->xes; +- savesegment(fs,dump->regs.fs); +- dump->regs.gs = regs->xgs; ++ dump->regs.fs = regs->xfs; ++ savesegment(gs,dump->regs.gs); + dump->regs.orig_eax = regs->orig_eax; + dump->regs.eip = regs->eip; + dump->regs.cs = regs->xcs; +@@ -613,16 +616,6 @@ struct task_struct fastcall * __switch_t + prefetch(&next->i387.fxsave); + + /* +- * Restore %fs if needed. +- * +- * Glibc normally makes %fs be zero. +- */ +- if (unlikely(next->fs)) +- loadsegment(fs, next->fs); +- +- write_pda(pcurrent, next_p); +- +- /* + * Now maybe handle debug registers + */ + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) +@@ -630,6 +623,15 @@ struct task_struct fastcall * __switch_t + + disable_tsc(prev_p, next_p); + ++ /* ++ * Leave lazy mode, flushing any hypercalls made here. ++ * This must be done before restoring TLS segments so ++ * the GDT and LDT are properly updated, and must be ++ * done before math_state_restore, so the TS bit is up ++ * to date. ++ */ ++ arch_leave_lazy_cpu_mode(); ++ + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now +@@ -637,6 +639,14 @@ struct task_struct fastcall * __switch_t + if (next_p->fpu_counter > 5) + math_state_restore(); + ++ /* ++ * Restore %gs if needed (which is common) ++ */ ++ if (prev->gs | next->gs) ++ loadsegment(gs, next->gs); ++ ++ write_pda(pcurrent, next_p); ++ + return prev_p; + } + +Index: 10.3-2007-11-26/arch/i386/kernel/setup-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/setup-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/setup-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -33,7 +33,6 @@ + #include <linux/initrd.h> + #include <linux/bootmem.h> + #include <linux/seq_file.h> +-#include <linux/platform_device.h> + #include <linux/console.h> + #include <linux/mca.h> + #include <linux/root_dev.h> +@@ -151,7 +150,7 @@ unsigned long saved_videomode; + #define RAMDISK_PROMPT_FLAG 0x8000 + #define RAMDISK_LOAD_FLAG 0x4000 + +-static char command_line[COMMAND_LINE_SIZE]; ++static char __initdata command_line[COMMAND_LINE_SIZE]; + + unsigned char __initdata boot_params[PARAM_SIZE]; + +@@ -671,8 +670,8 @@ void __init setup_arch(char **cmdline_p) + + if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) + i = COMMAND_LINE_SIZE; +- memcpy(saved_command_line, xen_start_info->cmd_line, i); +- saved_command_line[i - 1] = '\0'; ++ memcpy(boot_command_line, xen_start_info->cmd_line, i); ++ boot_command_line[i - 1] = '\0'; + parse_early_param(); + + if (user_defined_memmap) { +@@ -680,11 +679,19 @@ void __init setup_arch(char **cmdline_p) + print_memory_map("user"); + } + +- strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); ++ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + max_low_pfn = setup_memory(); + ++#ifdef CONFIG_VMI ++ /* ++ * Must be after max_low_pfn is determined, and before kernel ++ * pagetables are setup. ++ */ ++ vmi_init(); ++#endif ++ + /* + * NOTE: before this point _nobody_ is allowed to allocate + * any memory using the bootmem allocator. Although the +@@ -816,7 +823,6 @@ void __init setup_arch(char **cmdline_p) + conswitchp = &dummy_con; + #endif + } +- tsc_init(); + + xencons_early_setup(); + } +@@ -828,31 +834,3 @@ xen_panic_event(struct notifier_block *t + /* we're never actually going to get here... */ + return NOTIFY_DONE; + } +- +-static __init int add_pcspkr(void) +-{ +- struct platform_device *pd; +- int ret; +- +- if (!is_initial_xendomain()) +- return 0; +- +- pd = platform_device_alloc("pcspkr", -1); +- if (!pd) +- return -ENOMEM; +- +- ret = platform_device_add(pd); +- if (ret) +- platform_device_put(pd); +- +- return ret; +-} +-device_initcall(add_pcspkr); +- +-/* +- * Local Variables: +- * mode:c +- * c-file-style:"k&r" +- * c-basic-offset:8 +- * End: +- */ +Index: 10.3-2007-11-26/arch/i386/kernel/smp-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/smp-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/smp-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -335,8 +335,7 @@ static void flush_tlb_others(cpumask_t c + /* + * i'm not happy about this global shared spinlock in the + * MM hot path, but we'll see how contended it is. +- * Temporarily this turns IRQs off, so that lockups are +- * detected by the NMI watchdog. ++ * AK: x86-64 has a faster method that could be ported. + */ + spin_lock(&tlbstate_lock); + +@@ -361,7 +360,7 @@ static void flush_tlb_others(cpumask_t c + + while (!cpus_empty(flush_cpumask)) + /* nothing. lockup detection does not belong here */ +- mb(); ++ cpu_relax(); + + flush_mm = NULL; + flush_va = 0; +Index: 10.3-2007-11-26/arch/i386/kernel/swiotlb.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/swiotlb.c 2007-12-06 17:27:31.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/swiotlb.c 2007-10-22 13:58:46.000000000 +0200 +@@ -138,8 +138,8 @@ __setup("swiotlb=", setup_io_tlb_npages) + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the PCI DMA API. + */ +-void +-swiotlb_init_with_default_size (size_t default_size) ++void __init ++swiotlb_init_with_default_size(size_t default_size) + { + unsigned long i, bytes; + int rc; +@@ -227,7 +227,7 @@ swiotlb_init_with_default_size (size_t d + dma_bits); + } + +-void ++void __init + swiotlb_init(void) + { + long ram_end; +@@ -463,7 +463,7 @@ swiotlb_full(struct device *dev, size_t + * When the mapping is small enough return a static buffer to limit + * the damage, or panic when the transfer is too big. + */ +- printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " ++ printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at " + "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?"); + + if (size > io_tlb_overflow && do_panic) { +@@ -608,7 +608,7 @@ swiotlb_map_sg(struct device *hwdev, str + sg[0].dma_length = 0; + return 0; + } +- sg->dma_address = (dma_addr_t)virt_to_bus(map); ++ sg->dma_address = virt_to_bus(map); + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; +@@ -630,8 +630,7 @@ swiotlb_unmap_sg(struct device *hwdev, s + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) +- unmap_single(hwdev, +- (void *)bus_to_virt(sg->dma_address), ++ unmap_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + else + gnttab_dma_unmap_page(sg->dma_address); +@@ -654,8 +653,7 @@ swiotlb_sync_sg_for_cpu(struct device *h + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) +- sync_single(hwdev, +- (void *)bus_to_virt(sg->dma_address), ++ sync_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + } + +@@ -669,8 +667,7 @@ swiotlb_sync_sg_for_device(struct device + + for (i = 0; i < nelems; i++, sg++) + if (in_swiotlb_aperture(sg->dma_address)) +- sync_single(hwdev, +- (void *)bus_to_virt(sg->dma_address), ++ sync_single(hwdev, bus_to_virt(sg->dma_address), + sg->dma_length, dir); + } + +Index: 10.3-2007-11-26/arch/i386/kernel/time-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/time-xen.c 2007-12-06 17:32:10.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/time-xen.c 2007-12-06 17:32:21.000000000 +0100 +@@ -66,6 +66,7 @@ + #include "mach_time.h" + + #include <linux/timex.h> ++#include <linux/clocksource.h> + + #include <asm/hpet.h> + +@@ -74,25 +75,17 @@ + #include <xen/evtchn.h> + #include <xen/interface/vcpu.h> + +-#if defined (__i386__) +-#include <asm/i8259.h> ++#ifdef CONFIG_X86_32 + #include <asm/i8253.h> + DEFINE_SPINLOCK(i8253_lock); + EXPORT_SYMBOL(i8253_lock); +-#endif +- +-#define XEN_SHIFT 22 +- + int pit_latch_buggy; /* extern */ +- +-#if defined(__x86_64__) +-unsigned long vxtime_hz = PIT_TICK_RATE; +-struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ ++#else + volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; +-struct timespec __xtime __section_xtime; +-struct timezone __sys_tz __section_sys_tz; + #endif + ++#define XEN_SHIFT 22 ++ + unsigned int cpu_khz; /* Detected as we calibrate the TSC */ + EXPORT_SYMBOL(cpu_khz); + +@@ -224,7 +217,7 @@ int read_current_timer(unsigned long *ti + } + #endif + +-void init_cpu_khz(void) ++static void init_cpu_khz(void) + { + u64 __cpu_khz = 1000000ULL << 32; + struct vcpu_time_info *info = &vcpu_info(0)->time; +@@ -243,16 +236,6 @@ static u64 get_nsec_offset(struct shadow + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); + } + +-#ifdef CONFIG_X86_64 +-static unsigned long get_usec_offset(struct shadow_time_info *shadow) +-{ +- u64 now, delta; +- rdtscll(now); +- delta = now - shadow->tsc_timestamp; +- return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift); +-} +-#endif +- + static void __update_wallclock(time_t sec, long nsec) + { + long wtm_nsec, xtime_nsec; +@@ -360,130 +343,6 @@ void rtc_cmos_write(unsigned char val, u + } + EXPORT_SYMBOL(rtc_cmos_write); + +-#ifdef CONFIG_X86_64 +- +-/* +- * This version of gettimeofday has microsecond resolution +- * and better than microsecond precision on fast x86 machines with TSC. +- */ +-void do_gettimeofday(struct timeval *tv) +-{ +- unsigned long seq; +- unsigned long usec, sec; +- unsigned long max_ntp_tick; +- s64 nsec; +- unsigned int cpu; +- struct shadow_time_info *shadow; +- u32 local_time_version; +- +- cpu = get_cpu(); +- shadow = &per_cpu(shadow_time, cpu); +- +- do { +- local_time_version = shadow->version; +- seq = read_seqbegin(&xtime_lock); +- +- usec = get_usec_offset(shadow); +- +- /* +- * If time_adjust is negative then NTP is slowing the clock +- * so make sure not to go into next possible interval. +- * Better to lose some accuracy than have time go backwards.. +- */ +- if (unlikely(time_adjust < 0)) { +- max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; +- usec = min(usec, max_ntp_tick); +- } +- +- sec = xtime.tv_sec; +- usec += (xtime.tv_nsec / NSEC_PER_USEC); +- +- nsec = shadow->system_timestamp - processed_system_time; +- __normalize_time(&sec, &nsec); +- usec += (long)nsec / NSEC_PER_USEC; +- +- if (unlikely(!time_values_up_to_date(cpu))) { +- /* +- * We may have blocked for a long time, +- * rendering our calculations invalid +- * (e.g. the time delta may have +- * overflowed). Detect that and recalculate +- * with fresh values. +- */ +- get_time_values_from_xen(cpu); +- continue; +- } +- } while (read_seqretry(&xtime_lock, seq) || +- (local_time_version != shadow->version)); +- +- put_cpu(); +- +- while (usec >= USEC_PER_SEC) { +- usec -= USEC_PER_SEC; +- sec++; +- } +- +- tv->tv_sec = sec; +- tv->tv_usec = usec; +-} +- +-EXPORT_SYMBOL(do_gettimeofday); +- +-int do_settimeofday(struct timespec *tv) +-{ +- time_t sec; +- s64 nsec; +- unsigned int cpu; +- struct shadow_time_info *shadow; +- struct xen_platform_op op; +- +- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) +- return -EINVAL; +- +- cpu = get_cpu(); +- shadow = &per_cpu(shadow_time, cpu); +- +- write_seqlock_irq(&xtime_lock); +- +- /* +- * Ensure we don't get blocked for a long time so that our time delta +- * overflows. If that were to happen then our shadow time values would +- * be stale, so we can retry with fresh ones. +- */ +- for (;;) { +- nsec = tv->tv_nsec - get_nsec_offset(shadow); +- if (time_values_up_to_date(cpu)) +- break; +- get_time_values_from_xen(cpu); +- } +- sec = tv->tv_sec; +- __normalize_time(&sec, &nsec); +- +- if (is_initial_xendomain() && !independent_wallclock) { +- op.cmd = XENPF_settime; +- op.u.settime.secs = sec; +- op.u.settime.nsecs = nsec; +- op.u.settime.system_time = shadow->system_timestamp; +- HYPERVISOR_platform_op(&op); +- update_wallclock(); +- } else if (independent_wallclock) { +- nsec -= shadow->system_timestamp; +- __normalize_time(&sec, &nsec); +- __update_wallclock(sec, nsec); +- } +- +- write_sequnlock_irq(&xtime_lock); +- +- put_cpu(); +- +- clock_was_set(); +- return 0; +-} +- +-EXPORT_SYMBOL(do_settimeofday); +- +-#endif +- + static void sync_xen_wallclock(unsigned long dummy); + static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0); + static void sync_xen_wallclock(unsigned long dummy) +@@ -532,15 +391,7 @@ static int set_rtc_mmss(unsigned long no + return retval; + } + +-#ifdef CONFIG_X86_64 +-/* monotonic_clock(): returns # of nanoseconds passed since time_init() +- * Note: This function is required to return accurate +- * time even in the absence of multiple timer ticks. +- */ +-unsigned long long monotonic_clock(void) +-#else + unsigned long long sched_clock(void) +-#endif + { + int cpu = get_cpu(); + struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); +@@ -560,14 +411,6 @@ unsigned long long sched_clock(void) + + return time; + } +-#ifdef CONFIG_X86_64 +-EXPORT_SYMBOL(monotonic_clock); +- +-unsigned long long sched_clock(void) +-{ +- return monotonic_clock(); +-} +-#endif + + unsigned long profile_pc(struct pt_regs *regs) + { +@@ -590,15 +433,13 @@ unsigned long profile_pc(struct pt_regs + } + #else + #ifdef CONFIG_SMP +- if (!user_mode_vm(regs) && in_lock_functions(pc)) { ++ if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) && ++ in_lock_functions(pc)) { + #ifdef CONFIG_FRAME_POINTER + return *(unsigned long *)(regs->ebp + 4); + #else +- unsigned long *sp; +- if ((regs->xcs & 2) == 0) +- sp = (unsigned long *)®s->esp; +- else +- sp = (unsigned long *)regs->esp; ++ unsigned long *sp = (unsigned long *)®s->esp; ++ + /* Return address is either directly at stack pointer + or above a saved eflags. Eflags has bits 22-31 zero, + kernel addresses don't. */ +@@ -752,19 +593,6 @@ irqreturn_t timer_interrupt(int irq, voi + return IRQ_HANDLED; + } + +-#ifndef CONFIG_X86_64 +- +-void tsc_init(void) +-{ +- init_cpu_khz(); +- printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", +- cpu_khz / 1000, cpu_khz % 1000); +- +- use_tsc_delay(); +-} +- +-#include <linux/clocksource.h> +- + void mark_tsc_unstable(void) + { + #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */ +@@ -818,21 +646,9 @@ static struct clocksource clocksource_xe + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */ + .shift = XEN_SHIFT, +- .is_continuous = 1, ++ .flags = CLOCK_SOURCE_IS_CONTINUOUS, + }; + +-static int __init init_xen_clocksource(void) +-{ +- clocksource_xen.mult = clocksource_khz2mult(cpu_khz, +- clocksource_xen.shift); +- +- return clocksource_register(&clocksource_xen); +-} +- +-module_init(init_xen_clocksource); +- +-#endif +- + static void init_missing_ticks_accounting(int cpu) + { + struct vcpu_register_runstate_memory_area area; +@@ -851,7 +667,7 @@ static void init_missing_ticks_accountin + } + + /* not static: needed by APM */ +-unsigned long get_cmos_time(void) ++unsigned long read_persistent_clock(void) + { + unsigned long retval; + unsigned long flags; +@@ -864,11 +680,11 @@ unsigned long get_cmos_time(void) + + return retval; + } +-EXPORT_SYMBOL(get_cmos_time); + + static void sync_cmos_clock(unsigned long dummy); + + static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); ++int no_sync_cmos_clock; + + static void sync_cmos_clock(unsigned long dummy) + { +@@ -912,7 +728,8 @@ static void sync_cmos_clock(unsigned lon + + void notify_arch_cmos_timer(void) + { +- mod_timer(&sync_cmos_timer, jiffies + 1); ++ if (!no_sync_cmos_clock) ++ mod_timer(&sync_cmos_timer, jiffies + 1); + mod_timer(&sync_xen_wallclock_timer, jiffies + 1); + } + +@@ -924,7 +741,7 @@ static int timer_suspend(struct sys_devi + /* + * Estimate time zone so that set_time can update the clock + */ +- unsigned long ctime = get_cmos_time(); ++ unsigned long ctime = read_persistent_clock(); + + clock_cmos_diff = -ctime; + clock_cmos_diff += get_seconds(); +@@ -936,7 +753,7 @@ static int timer_resume(struct sys_devic + { + unsigned long flags; + unsigned long sec; +- unsigned long ctime = get_cmos_time(); ++ unsigned long ctime = read_persistent_clock(); + long sleep_length = (ctime - sleep_start) * HZ; + struct timespec ts; + +@@ -948,10 +765,6 @@ static int timer_resume(struct sys_devic + sleep_length = 0; + ctime = sleep_start; + } +-#ifdef CONFIG_HPET_TIMER +- if (is_hpet_enabled()) +- hpet_reenable(); +-#endif + + sec = ctime + clock_cmos_diff; + ts.tv_sec = sec; +@@ -987,29 +800,11 @@ static int time_init_device(void) + + device_initcall(time_init_device); + +-#ifdef CONFIG_HPET_TIMER + extern void (*late_time_init)(void); +-/* Duplicate of time_init() below, with hpet_enable part added */ +-static void __init hpet_time_init(void) +-{ +- struct timespec ts; +- ts.tv_sec = get_cmos_time(); +- ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); +- +- do_settimeofday(&ts); +- +- if ((hpet_enable() >= 0) && hpet_use_timer) { +- printk("Using HPET for base-timer\n"); +- } +- +- do_time_init(); +-} +-#endif + + /* Dynamically-mapped IRQ. */ + DEFINE_PER_CPU(int, timer_irq); + +-extern void (*late_time_init)(void); + static void setup_cpu0_timer_irq(void) + { + per_cpu(timer_irq, 0) = +@@ -1029,16 +824,9 @@ static struct vcpu_set_periodic_timer xe + + void __init time_init(void) + { +-#ifdef CONFIG_HPET_TIMER +- if (is_hpet_capable()) { +- /* +- * HPET initialization needs to do memory-mapped io. So, let +- * us do a late initialization after mem_init(). +- */ +- late_time_init = hpet_time_init; +- return; +- } +-#endif ++ init_cpu_khz(); ++ printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", ++ cpu_khz / 1000, cpu_khz % 1000); + + HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0, + &xen_set_periodic_tick); +@@ -1049,18 +837,12 @@ void __init time_init(void) + per_cpu(processed_system_time, 0) = processed_system_time; + init_missing_ticks_accounting(0); + +- update_wallclock(); ++ clocksource_register(&clocksource_xen); + +-#ifdef CONFIG_X86_64 +- init_cpu_khz(); +- printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", +- cpu_khz / 1000, cpu_khz % 1000); ++ update_wallclock(); + +- vxtime.mode = VXTIME_TSC; +- vxtime.quot = (1000000L << 32) / vxtime_hz; +- vxtime.tsc_quot = (1000L << 32) / cpu_khz; +- sync_core(); +- rdtscll(vxtime.last_tsc); ++#ifndef CONFIG_X86_64 ++ use_tsc_delay(); + #endif + + /* Cannot request_irq() until kmem is initialised. */ +@@ -1251,7 +1033,7 @@ static ctl_table xen_table[] = { + }; + static int __init xen_sysctl_init(void) + { +- (void)register_sysctl_table(xen_table, 0); ++ (void)register_sysctl_table(xen_table); + return 0; + } + __initcall(xen_sysctl_init); +Index: 10.3-2007-11-26/arch/i386/kernel/traps-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/traps-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/kernel/traps-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -100,6 +100,7 @@ asmlinkage void fixup_4gb_segment(void); + asmlinkage void machine_check(void); + + int kstack_depth_to_print = 24; ++static unsigned int code_bytes = 64; + ATOMIC_NOTIFIER_HEAD(i386die_chain); + + int register_die_notifier(struct notifier_block *nb) +@@ -297,10 +298,11 @@ void show_registers(struct pt_regs *regs + int i; + int in_kernel = 1; + unsigned long esp; +- unsigned short ss; ++ unsigned short ss, gs; + + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); ++ savesegment(gs, gs); + if (user_mode_vm(regs)) { + in_kernel = 0; + esp = regs->esp; +@@ -319,8 +321,8 @@ void show_registers(struct pt_regs *regs + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); +- printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", +- regs->xds & 0xffff, regs->xes & 0xffff, ss); ++ printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", ++ regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", + TASK_COMM_LEN, current->comm, current->pid, + current_thread_info(), current, current->thread_info); +@@ -330,7 +332,8 @@ void show_registers(struct pt_regs *regs + */ + if (in_kernel) { + u8 *eip; +- int code_bytes = 64; ++ unsigned int code_prologue = code_bytes * 43 / 64; ++ unsigned int code_len = code_bytes; + unsigned char c; + + printk("\n" KERN_EMERG "Stack: "); +@@ -338,14 +341,14 @@ void show_registers(struct pt_regs *regs + + printk(KERN_EMERG "Code: "); + +- eip = (u8 *)regs->eip - 43; ++ eip = (u8 *)regs->eip - code_prologue; + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { + /* try starting at EIP */ + eip = (u8 *)regs->eip; +- code_bytes = 32; ++ code_len = code_len - code_prologue + 1; + } +- for (i = 0; i < code_bytes; i++, eip++) { ++ for (i = 0; i < code_len; i++, eip++) { + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { + printk(" Bad EIP value."); +@@ -1130,3 +1133,13 @@ static int __init kstack_setup(char *s) + return 1; + } + __setup("kstack=", kstack_setup); ++ ++static int __init code_bytes_setup(char *s) ++{ ++ code_bytes = simple_strtoul(s, NULL, 0); ++ if (code_bytes > 8192) ++ code_bytes = 8192; ++ ++ return 1; ++} ++__setup("code_bytes=", code_bytes_setup); +Index: 10.3-2007-11-26/arch/i386/mm/fault-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/fault-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/mm/fault-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -46,43 +46,17 @@ int unregister_page_fault_notifier(struc + } + EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + +-static inline int notify_page_fault(enum die_val val, const char *str, +- struct pt_regs *regs, long err, int trap, int sig) ++static inline int notify_page_fault(struct pt_regs *regs, long err) + { + struct die_args args = { + .regs = regs, +- .str = str, ++ .str = "page fault", + .err = err, +- .trapnr = trap, +- .signr = sig ++ .trapnr = 14, ++ .signr = SIGSEGV + }; +- return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); +-} +- +-/* +- * Unlock any spinlocks which will prevent us from getting the +- * message out +- */ +-void bust_spinlocks(int yes) +-{ +- int loglevel_save = console_loglevel; +- +- if (yes) { +- oops_in_progress = 1; +- return; +- } +-#ifdef CONFIG_VT +- unblank_screen(); +-#endif +- oops_in_progress = 0; +- /* +- * OK, the message is on the console. Now we call printk() +- * without oops_in_progress set so that printk will give klogd +- * a poke. Hold onto your hats... +- */ +- console_loglevel = 15; /* NMI oopser may have shut the console up */ +- printk(" "); +- console_loglevel = loglevel_save; ++ return atomic_notifier_call_chain(¬ify_page_fault_chain, ++ DIE_PAGE_FAULT, &args); + } + + /* +@@ -476,8 +450,7 @@ fastcall void __kprobes do_page_fault(st + /* Can take a spurious fault if mapping changes R/O -> R/W. */ + if (spurious_fault(regs, address, error_code)) + return; +- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, +- SIGSEGV) == NOTIFY_STOP) ++ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + /* + * Don't take the mm semaphore here. If we fixup a prefetch +@@ -486,8 +459,7 @@ fastcall void __kprobes do_page_fault(st + goto bad_area_nosemaphore; + } + +- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, +- SIGSEGV) == NOTIFY_STOP) ++ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + + /* It's safe to allow irq's after cr2 has been saved and the vmalloc +Index: 10.3-2007-11-26/arch/i386/mm/highmem-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/highmem-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/mm/highmem-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -33,14 +33,16 @@ static void *__kmap_atomic(struct page * + + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + pagefault_disable(); ++ ++ idx = type + KM_TYPE_NR*smp_processor_id(); ++ BUG_ON(!pte_none(*(kmap_pte-idx))); ++ + if (!PageHighMem(page)) + return page_address(page); + +- idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +- if (!pte_none(*(kmap_pte-idx))) +- BUG(); + set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); ++ arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; + } +@@ -94,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn, + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); ++ arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; + } +Index: 10.3-2007-11-26/arch/i386/mm/init-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/init-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/mm/init-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -68,6 +68,7 @@ static pmd_t * __init one_md_table_init( + + #ifdef CONFIG_X86_PAE + pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); ++ paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); + make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); +@@ -89,6 +90,7 @@ static pte_t * __init one_page_table_ini + { + if (pmd_none(*pmd)) { + pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); ++ paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); + make_lowmem_page_readonly(page_table, + XENFEAT_writable_page_tables); + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); +Index: 10.3-2007-11-26/arch/i386/mm/pgtable-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/pgtable-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/mm/pgtable-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -229,6 +229,8 @@ void __set_fixmap (enum fixed_addresses + void __init reserve_top_address(unsigned long reserve) + { + BUG_ON(fixmaps > 0); ++ printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", ++ (int)-reserve); + __FIXADDR_TOP = -reserve - PAGE_SIZE; + __VMALLOC_RESERVE += reserve; + } +@@ -332,6 +334,12 @@ void pgd_ctor(void *pgd, struct kmem_cac + swapper_pg_dir + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); ++ ++ /* must happen under lock */ ++ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, ++ __pa(swapper_pg_dir) >> PAGE_SHIFT, ++ USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); ++ + pgd_list_add(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); + } +@@ -342,6 +350,7 @@ void pgd_dtor(void *pgd, struct kmem_cac + { + unsigned long flags; /* can be called from interrupt context */ + ++ paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); +@@ -366,6 +375,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + if (!pmd) + goto out_oom; ++ paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } + return pgd; +@@ -388,6 +398,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + if (!pmd[i]) + goto out_oom; ++ paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); + } + + spin_lock_irqsave(&pgd_lock, flags); +@@ -428,12 +439,17 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + + out_oom: + if (HAVE_SHARED_KERNEL_PMD) { +- for (i--; i >= 0; i--) +- kmem_cache_free(pmd_cache, +- (void *)__va(pgd_val(pgd[i])-1)); ++ for (i--; i >= 0; i--) { ++ pgd_t pgdent = pgd[i]; ++ void* pmd = (void *)__va(pgd_val(pgdent)-1); ++ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); ++ kmem_cache_free(pmd_cache, pmd); ++ } + } else { +- for (i--; i >= 0; i--) ++ for (i--; i >= 0; i--) { ++ paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT); + kmem_cache_free(pmd_cache, pmd[i]); ++ } + kfree(pmd); + } + kmem_cache_free(pgd_cache, pgd); +@@ -457,7 +473,9 @@ void pgd_free(pgd_t *pgd) + /* in the PAE case user pgd entries are overwritten before usage */ + if (PTRS_PER_PMD > 1) { + for (i = 0; i < USER_PTRS_PER_PGD; ++i) { +- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ pgd_t pgdent = pgd[i]; ++ void* pmd = (void *)__va(pgd_val(pgdent)-1); ++ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); + kmem_cache_free(pmd_cache, pmd); + } + +Index: 10.3-2007-11-26/arch/x86_64/ia32/ia32entry-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/ia32/ia32entry-xen.S 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/ia32/ia32entry-xen.S 2007-10-22 13:58:46.000000000 +0200 +@@ -542,7 +542,7 @@ ia32_sys_call_table: + .quad sys32_vm86_warning /* vm86old */ + .quad compat_sys_wait4 + .quad sys_swapoff /* 115 */ +- .quad sys32_sysinfo ++ .quad compat_sys_sysinfo + .quad sys32_ipc + .quad sys_fsync + .quad stub32_sigreturn +@@ -587,7 +587,7 @@ ia32_sys_call_table: + .quad sys_sched_yield + .quad sys_sched_get_priority_max + .quad sys_sched_get_priority_min /* 160 */ +- .quad sys_sched_rr_get_interval ++ .quad sys32_sched_rr_get_interval + .quad compat_sys_nanosleep + .quad sys_mremap + .quad sys_setresuid16 +@@ -745,4 +745,5 @@ ia32_sys_call_table: + .quad compat_sys_vmsplice + .quad compat_sys_move_pages + .quad sys_getcpu ++ .quad sys_epoll_pwait + ia32_syscall_end: +Index: 10.3-2007-11-26/arch/x86_64/ia32/syscall32-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/ia32/syscall32-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/ia32/syscall32-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -21,70 +21,36 @@ extern unsigned char syscall32_syscall[] + extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; + extern int sysctl_vsyscall32; + +-char *syscall32_page; ++static struct page *syscall32_pages[1]; + #ifndef USE_INT80 + static int use_sysenter = -1; + #endif + +-static struct page * +-syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) +-{ +- struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page); +- get_page(p); +- return p; +-} +- +-/* Prevent VMA merging */ +-static void syscall32_vma_close(struct vm_area_struct *vma) +-{ +-} +- +-static struct vm_operations_struct syscall32_vm_ops = { +- .close = syscall32_vma_close, +- .nopage = syscall32_nopage, +-}; +- + struct linux_binprm; + + /* Setup a VMA at program startup for the vsyscall page */ + int syscall32_setup_pages(struct linux_binprm *bprm, int exstack) + { +- int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT; +- struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + int ret; + +- vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); +- if (!vma) +- return -ENOMEM; +- +- memset(vma, 0, sizeof(struct vm_area_struct)); +- /* Could randomize here */ +- vma->vm_start = VSYSCALL32_BASE; +- vma->vm_end = VSYSCALL32_END; +- /* MAYWRITE to allow gdb to COW and set breakpoints */ +- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; ++ down_write(&mm->mmap_sem); + /* ++ * MAYWRITE to allow gdb to COW and set breakpoints ++ * + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ +- vma->vm_flags |= VM_ALWAYSDUMP; +- vma->vm_flags |= mm->def_flags; +- vma->vm_page_prot = protection_map[vma->vm_flags & 7]; +- vma->vm_ops = &syscall32_vm_ops; +- vma->vm_mm = mm; +- +- down_write(&mm->mmap_sem); +- if ((ret = insert_vm_struct(mm, vma))) { +- up_write(&mm->mmap_sem); +- kmem_cache_free(vm_area_cachep, vma); +- return ret; +- } +- mm->total_vm += npages; ++ /* Could randomize here */ ++ ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE, ++ VM_READ|VM_EXEC| ++ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| ++ VM_ALWAYSDUMP, ++ syscall32_pages); + up_write(&mm->mmap_sem); +- return 0; ++ return ret; + } + + const char *arch_vma_name(struct vm_area_struct *vma) +@@ -97,10 +63,11 @@ const char *arch_vma_name(struct vm_area + + static int __init init_syscall32(void) + { +- syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); ++ char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); + if (!syscall32_page) + panic("Cannot allocate syscall32 page"); + ++ syscall32_pages[0] = virt_to_page(syscall32_page); + #ifdef USE_INT80 + /* + * At this point we use int 0x80. +Index: 10.3-2007-11-26/arch/x86_64/kernel/Makefile +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/Makefile 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/Makefile 2007-10-22 13:58:46.000000000 +0200 +@@ -68,7 +68,7 @@ pci-dma-y += ../../i386/kernel/pci-dma + microcode-$(subst m,y,$(CONFIG_MICROCODE)) := ../../i386/kernel/microcode-xen.o + quirks-y := ../../i386/kernel/quirks-xen.o + +-n-obj-xen := early-quirks.o i8259.o reboot.o i8237.o smpboot.o trampoline.o ++n-obj-xen := early-quirks.o i8259.o reboot.o i8237.o smpboot.o trampoline.o tsc.o tsc_sync.o + + include $(srctree)/scripts/Makefile.xen + +Index: 10.3-2007-11-26/arch/x86_64/kernel/e820-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/e820-xen.c 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/e820-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -88,6 +88,13 @@ static inline int bad_addr(unsigned long + return 1; + } + ++#ifdef CONFIG_NUMA ++ /* NUMA memory to node map */ ++ if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { ++ *addrp = nodemap_addr + nodemap_size; ++ return 1; ++ } ++#endif + /* XXX ramdisk image here? */ + #else + if (last < (table_end<<PAGE_SHIFT)) { +@@ -206,6 +213,37 @@ unsigned long __init e820_end_of_ram(voi + } + + /* ++ * Find the hole size in the range. ++ */ ++unsigned long __init e820_hole_size(unsigned long start, unsigned long end) ++{ ++ unsigned long ram = 0; ++ int i; ++ ++ for (i = 0; i < e820.nr_map; i++) { ++ struct e820entry *ei = &e820.map[i]; ++ unsigned long last, addr; ++ ++ if (ei->type != E820_RAM || ++ ei->addr+ei->size <= start || ++ ei->addr >= end) ++ continue; ++ ++ addr = round_up(ei->addr, PAGE_SIZE); ++ if (addr < start) ++ addr = start; ++ ++ last = round_down(ei->addr + ei->size, PAGE_SIZE); ++ if (last >= end) ++ last = end; ++ ++ if (last > addr) ++ ram += last - addr; ++ } ++ return ((end - start) - ram); ++} ++ ++/* + * Mark e820 reserved areas as busy for the resource manager. + */ + void __init e820_reserve_resources(struct e820entry *e820, int nr_map) +@@ -716,7 +754,7 @@ static int __init parse_memmap_opt(char + } + early_param("memmap", parse_memmap_opt); + +-void finish_e820_parsing(void) ++void __init finish_e820_parsing(void) + { + if (userdef) { + printk(KERN_INFO "user-defined physical RAM map:\n"); +Index: 10.3-2007-11-26/arch/x86_64/kernel/entry-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/entry-xen.S 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/entry-xen.S 2007-10-22 13:58:46.000000000 +0200 +@@ -636,6 +636,9 @@ END(invalidate_interrupt\num) + ENTRY(call_function_interrupt) + apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt + END(call_function_interrupt) ++ENTRY(irq_move_cleanup_interrupt) ++ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt ++END(irq_move_cleanup_interrupt) + #endif + + ENTRY(apic_timer_interrupt) +Index: 10.3-2007-11-26/arch/x86_64/kernel/genapic-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/genapic-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/genapic-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -65,8 +65,8 @@ void __init clustered_apic_check(void) + * Some x86_64 machines use physical APIC mode regardless of how many + * procs/clusters are present (x86_64 ES7000 is an example). + */ +- if (acpi_fadt.revision > FADT2_REVISION_ID) +- if (acpi_fadt.force_apic_physical_destination_mode) { ++ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) ++ if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { + genapic = &apic_cluster; + goto print; + } +Index: 10.3-2007-11-26/arch/x86_64/kernel/head64-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/head64-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/head64-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -42,8 +42,6 @@ static void __init clear_bss(void) + #define OLD_CL_BASE_ADDR 0x90000 + #define OLD_CL_OFFSET 0x90022 + +-extern char saved_command_line[]; +- + static void __init copy_bootdata(char *real_mode_data) + { + #ifndef CONFIG_XEN +@@ -59,14 +57,14 @@ static void __init copy_bootdata(char *r + new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; + } + command_line = (char *) ((u64)(new_data)); +- memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); ++ memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); + #else + int max_cmdline; + + if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) + max_cmdline = COMMAND_LINE_SIZE; +- memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); +- saved_command_line[max_cmdline-1] = '\0'; ++ memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline); ++ boot_command_line[max_cmdline-1] = '\0'; + #endif + } + +Index: 10.3-2007-11-26/arch/x86_64/kernel/io_apic-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/io_apic-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/io_apic-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -36,6 +36,7 @@ + #include <acpi/acpi_bus.h> + #endif + ++#include <asm/idle.h> + #include <asm/io.h> + #include <asm/smp.h> + #include <asm/desc.h> +@@ -47,7 +48,20 @@ + #include <asm/msidef.h> + #include <asm/hypertransport.h> + +-static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result); ++struct irq_cfg { ++#ifndef CONFIG_XEN ++ cpumask_t domain; ++ cpumask_t old_domain; ++#endif ++ unsigned move_cleanup_count; ++ u8 vector; ++ u8 move_in_progress : 1; ++}; ++ ++/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ ++struct irq_cfg irq_cfg[NR_IRQS] __read_mostly; ++ ++static int assign_irq_vector(int irq, cpumask_t mask); + + #define __apicdebuginit __init + +@@ -88,7 +102,7 @@ int nr_ioapic_registers[MAX_IO_APICS]; + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +-#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS ++#define MAX_PLUS_SHARED_IRQS NR_IRQS + #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + + /* +@@ -259,21 +273,19 @@ static void __target_IO_APIC_irq(unsigne + + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) + { ++ struct irq_cfg *cfg = irq_cfg + irq; + unsigned long flags; + unsigned int dest; + cpumask_t tmp; +- int vector; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- cpus_and(mask, tmp, CPU_MASK_ALL); ++ return; + +- vector = assign_irq_vector(irq, mask, &tmp); +- if (vector < 0) ++ if (assign_irq_vector(irq, mask)) + return; + ++ cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + /* +@@ -282,8 +294,8 @@ static void set_ioapic_affinity_irq(unsi + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); +- __target_IO_APIC_irq(irq, dest, vector); +- set_native_irq_info(irq, mask); ++ __target_IO_APIC_irq(irq, dest, cfg->vector); ++ irq_desc[irq].affinity = mask; + spin_unlock_irqrestore(&ioapic_lock, flags); + } + #endif +@@ -329,11 +341,11 @@ static void add_pin_to_irq(unsigned int + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ ++ FINAL; \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ +- FINAL; \ + } + + #define DO_ACTION(name,R,ACTION, FINAL) \ +@@ -666,74 +678,58 @@ static int pin_2_irq(int idx, int apic, + return irq; + } + +-static inline int IO_APIC_irq_trigger(int irq) +-{ +- int apic, idx, pin; +- +- for (apic = 0; apic < nr_ioapics; apic++) { +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { +- idx = find_irq_entry(apic,pin,mp_INT); +- if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) +- return irq_trigger(idx); +- } +- } +- /* +- * nonexistent IRQs are edge default +- */ +- return 0; +-} +- +-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ +-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; +- +-static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) ++static int __assign_irq_vector(int irq, cpumask_t mask) + { +- int vector; + struct physdev_irq irq_op; ++ struct irq_cfg *cfg; + +- BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); ++ BUG_ON((unsigned)irq >= NR_IRQS); ++ cfg = &irq_cfg[irq]; + +- cpus_and(*result, mask, cpu_online_map); ++ if ((cfg->move_in_progress) || cfg->move_cleanup_count) ++ return -EBUSY; + +- if (irq_vector[irq] > 0) +- return irq_vector[irq]; ++ if (cfg->vector) ++ return 0; + + irq_op.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) + return -ENOSPC; + +- vector = irq_op.vector; +- irq_vector[irq] = vector; ++ cfg->vector = irq_op.vector; + +- return vector; ++ return 0; + } + +-static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) ++static int assign_irq_vector(int irq, cpumask_t mask) + { +- int vector; ++ int err; + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); +- vector = __assign_irq_vector(irq, mask, result); ++ err = __assign_irq_vector(irq, mask); + spin_unlock_irqrestore(&vector_lock, flags); +- return vector; ++ return err; + } + + #ifndef CONFIG_XEN + static void __clear_irq_vector(int irq) + { ++ struct irq_cfg *cfg; + cpumask_t mask; + int cpu, vector; + +- BUG_ON(!irq_vector[irq]); ++ BUG_ON((unsigned)irq >= NR_IRQS); ++ cfg = &irq_cfg[irq]; ++ BUG_ON(!cfg->vector); + +- vector = irq_vector[irq]; +- cpus_and(mask, irq_domain[irq], cpu_online_map); ++ vector = cfg->vector; ++ cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + +- irq_vector[irq] = 0; +- irq_domain[irq] = CPU_MASK_NONE; ++ cfg->vector = 0; ++ cfg->domain = CPU_MASK_NONE; + } + + void __setup_vector_irq(int cpu) +@@ -743,10 +739,10 @@ void __setup_vector_irq(int cpu) + int irq, vector; + + /* Mark the inuse vectors */ +- for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) { +- if (!cpu_isset(cpu, irq_domain[irq])) ++ for (irq = 0; irq < NR_IRQS; ++irq) { ++ if (!cpu_isset(cpu, irq_cfg[irq].domain)) + continue; +- vector = irq_vector[irq]; ++ vector = irq_cfg[irq].vector; + per_cpu(vector_irq, cpu)[vector] = irq; + } + /* Mark the free vectors */ +@@ -754,41 +750,49 @@ void __setup_vector_irq(int cpu) + irq = per_cpu(vector_irq, cpu)[vector]; + if (irq < 0) + continue; +- if (!cpu_isset(cpu, irq_domain[irq])) ++ if (!cpu_isset(cpu, irq_cfg[irq].domain)) + per_cpu(vector_irq, cpu)[vector] = -1; + } + } + +-extern void (*interrupt[NR_IRQS])(void); +- + static struct irq_chip ioapic_chip; + +-#define IOAPIC_AUTO -1 +-#define IOAPIC_EDGE 0 +-#define IOAPIC_LEVEL 1 +- +-static void ioapic_register_intr(int irq, int vector, unsigned long trigger) ++static void ioapic_register_intr(int irq, unsigned long trigger) + { +- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || +- trigger == IOAPIC_LEVEL) ++ if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, "fasteoi"); +- else { +- irq_desc[irq].status |= IRQ_DELAYED_DISABLE; ++ else + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_edge_irq, "edge"); +- } + } + #else +-#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) ++#define ioapic_register_intr(irq,trigger) ((void)0) + #endif /* !CONFIG_XEN */ + +-static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) ++static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, ++ int trigger, int polarity) + { ++ struct irq_cfg *cfg = irq_cfg + irq; + struct IO_APIC_route_entry entry; +- int vector; +- unsigned long flags; ++ cpumask_t mask; + ++ if (!IO_APIC_IRQ(irq)) ++ return; ++ ++ mask = TARGET_CPUS; ++ if (assign_irq_vector(irq, mask)) ++ return; ++ ++#ifndef CONFIG_XEN ++ cpus_and(mask, cfg->domain, mask); ++#endif ++ ++ apic_printk(APIC_VERBOSE,KERN_DEBUG ++ "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " ++ "IRQ %d Mode:%i Active:%i)\n", ++ apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, ++ irq, trigger, polarity); + + /* + * add it to the IO-APIC irq-routing table: +@@ -797,41 +801,23 @@ static void __init setup_IO_APIC_irq(int + + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; ++ entry.dest = cpu_mask_to_apicid(mask); + entry.mask = 0; /* enable IRQ */ +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.trigger = trigger; ++ entry.polarity = polarity; ++ entry.vector = cfg->vector; + +- entry.trigger = irq_trigger(idx); +- entry.polarity = irq_polarity(idx); +- +- if (irq_trigger(idx)) { +- entry.trigger = 1; ++ /* Mask level triggered irqs. ++ * Use IRQ_DELAYED_DISABLE for edge triggered irqs. ++ */ ++ if (trigger) + entry.mask = 1; +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); +- } + +- if (/* !apic && */ !IO_APIC_IRQ(irq)) +- return; +- +- if (IO_APIC_IRQ(irq)) { +- cpumask_t mask; +- vector = assign_irq_vector(irq, TARGET_CPUS, &mask); +- if (vector < 0) +- return; +- +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); +- entry.vector = vector; +- +- ioapic_register_intr(irq, vector, IOAPIC_AUTO); +- if (!apic && (irq < 16)) +- disable_8259A_irq(irq); +- } ++ ioapic_register_intr(irq, trigger); ++ if (irq < 16) ++ disable_8259A_irq(irq); + + ioapic_write_entry(apic, pin, entry); +- +- spin_lock_irqsave(&ioapic_lock, flags); +- set_native_irq_info(irq, TARGET_CPUS); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- + } + + static void __init setup_IO_APIC_irqs(void) +@@ -856,8 +842,8 @@ static void __init setup_IO_APIC_irqs(vo + irq = pin_2_irq(idx, apic, pin); + add_pin_to_irq(irq, apic, pin); + +- setup_IO_APIC_irq(apic, pin, idx, irq); +- ++ setup_IO_APIC_irq(apic, pin, irq, ++ irq_trigger(idx), irq_polarity(idx)); + } + } + +@@ -888,7 +874,7 @@ static void __init setup_ExtINT_IRQ0_pin + */ + entry.dest_mode = INT_DEST_MODE; + entry.mask = 0; /* unmask IRQ now */ +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.polarity = 0; + entry.trigger = 0; +@@ -988,18 +974,17 @@ void __apicdebuginit print_IO_APIC(void) + + printk(KERN_DEBUG ".... IRQ redirection table:\n"); + +- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" +- " Stat Dest Deli Vect: \n"); ++ printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" ++ " Stat Dmod Deli Vect: \n"); + + for (i = 0; i <= reg_01.bits.entries; i++) { + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + +- printk(KERN_DEBUG " %02x %03X %02X ", ++ printk(KERN_DEBUG " %02x %03X ", + i, +- entry.dest.logical.logical_dest, +- entry.dest.physical.physical_dest ++ entry.dest + ); + + printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", +@@ -1270,8 +1255,7 @@ void disable_IO_APIC(void) + entry.dest_mode = 0; /* Physical */ + entry.delivery_mode = dest_ExtINT; /* ExtInt */ + entry.vector = 0; +- entry.dest.physical.physical_dest = +- GET_APIC_ID(apic_read(APIC_ID)); ++ entry.dest = GET_APIC_ID(apic_read(APIC_ID)); + + /* + * Add it to the IO-APIC irq-routing table: +@@ -1356,16 +1340,15 @@ static unsigned int startup_ioapic_irq(u + + static int ioapic_retrigger_irq(unsigned int irq) + { ++ struct irq_cfg *cfg = &irq_cfg[irq]; + cpumask_t mask; +- unsigned vector; + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); +- vector = irq_vector[irq]; + cpus_clear(mask); +- cpu_set(first_cpu(irq_domain[irq]), mask); ++ cpu_set(first_cpu(cfg->domain), mask); + +- send_IPI_mask(mask, vector); ++ send_IPI_mask(mask, cfg->vector); + spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +@@ -1380,8 +1363,68 @@ static int ioapic_retrigger_irq(unsigned + * races. + */ + ++#ifdef CONFIG_SMP ++asmlinkage void smp_irq_move_cleanup_interrupt(void) ++{ ++ unsigned vector, me; ++ ack_APIC_irq(); ++ exit_idle(); ++ irq_enter(); ++ ++ me = smp_processor_id(); ++ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { ++ unsigned int irq; ++ struct irq_desc *desc; ++ struct irq_cfg *cfg; ++ irq = __get_cpu_var(vector_irq)[vector]; ++ if (irq >= NR_IRQS) ++ continue; ++ ++ desc = irq_desc + irq; ++ cfg = irq_cfg + irq; ++ spin_lock(&desc->lock); ++ if (!cfg->move_cleanup_count) ++ goto unlock; ++ ++ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) ++ goto unlock; ++ ++ __get_cpu_var(vector_irq)[vector] = -1; ++ cfg->move_cleanup_count--; ++unlock: ++ spin_unlock(&desc->lock); ++ } ++ ++ irq_exit(); ++} ++ ++static void irq_complete_move(unsigned int irq) ++{ ++ struct irq_cfg *cfg = irq_cfg + irq; ++ unsigned vector, me; ++ ++ if (likely(!cfg->move_in_progress)) ++ return; ++ ++ vector = ~get_irq_regs()->orig_rax; ++ me = smp_processor_id(); ++ if ((vector == cfg->vector) && ++ cpu_isset(smp_processor_id(), cfg->domain)) { ++ cpumask_t cleanup_mask; ++ ++ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); ++ cfg->move_cleanup_count = cpus_weight(cleanup_mask); ++ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); ++ cfg->move_in_progress = 0; ++ } ++} ++#else ++static inline void irq_complete_move(unsigned int irq) {} ++#endif ++ + static void ack_apic_edge(unsigned int irq) + { ++ irq_complete_move(irq); + move_native_irq(irq); + ack_APIC_irq(); + } +@@ -1390,6 +1433,7 @@ static void ack_apic_level(unsigned int + { + int do_unmask_irq = 0; + ++ irq_complete_move(irq); + #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) + /* If we are moving the irq we need to mask it */ + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { +@@ -1441,7 +1485,7 @@ static inline void init_IO_APIC_traps(vo + */ + for (irq = 0; irq < NR_IRQS ; irq++) { + int tmp = irq; +- if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { ++ if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 +@@ -1539,7 +1583,7 @@ static inline void unlock_ExtINT_logic(v + + entry1.dest_mode = 0; /* physical delivery */ + entry1.mask = 0; /* unmask IRQ now */ +- entry1.dest.physical.physical_dest = hard_smp_processor_id(); ++ entry1.dest = hard_smp_processor_id(); + entry1.delivery_mode = dest_ExtINT; + entry1.polarity = entry0.polarity; + entry1.trigger = 0; +@@ -1583,15 +1627,14 @@ static inline void unlock_ExtINT_logic(v + */ + static inline void check_timer(void) + { ++ struct irq_cfg *cfg = irq_cfg + 0; + int apic1, pin1, apic2, pin2; +- int vector; +- cpumask_t mask; + + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); +- vector = assign_irq_vector(0, TARGET_CPUS, &mask); ++ assign_irq_vector(0, TARGET_CPUS); + + /* + * Subtle, code in do_timer_interrupt() expects an AEOI +@@ -1611,7 +1654,7 @@ static inline void check_timer(void) + apic2 = ioapic_i8259.apic; + + apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", +- vector, apic1, pin1, apic2, pin2); ++ cfg->vector, apic1, pin1, apic2, pin2); + + if (pin1 != -1) { + /* +@@ -1642,7 +1685,7 @@ static inline void check_timer(void) + /* + * legacy devices should be connected to IO APIC #0 + */ +- setup_ExtINT_IRQ0_pin(apic2, pin2, vector); ++ setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); + if (timer_irq_works()) { + apic_printk(APIC_VERBOSE," works.\n"); + nmi_watchdog_default(); +@@ -1667,14 +1710,14 @@ static inline void check_timer(void) + + disable_8259A_irq(0); + irq_desc[0].chip = &lapic_irq_type; +- apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ ++ apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ + enable_8259A_irq(0); + + if (timer_irq_works()) { + apic_printk(APIC_VERBOSE," works.\n"); + return; + } +- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); ++ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); + apic_printk(APIC_VERBOSE," failed.\n"); + + apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); +@@ -1828,19 +1871,16 @@ int create_irq(void) + /* Allocate an unused irq */ + int irq; + int new; +- int vector = 0; + unsigned long flags; +- cpumask_t mask; + + irq = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + for (new = (NR_IRQS - 1); new >= 0; new--) { + if (platform_legacy_irq(new)) + continue; +- if (irq_vector[new] != 0) ++ if (irq_cfg[new].vector != 0) + continue; +- vector = __assign_irq_vector(new, TARGET_CPUS, &mask); +- if (likely(vector > 0)) ++ if (__assign_irq_vector(new, TARGET_CPUS) == 0) + irq = new; + break; + } +@@ -1870,12 +1910,15 @@ void destroy_irq(unsigned int irq) + #ifdef CONFIG_PCI_MSI + static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) + { +- int vector; ++ struct irq_cfg *cfg = irq_cfg + irq; ++ int err; + unsigned dest; + cpumask_t tmp; + +- vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); +- if (vector >= 0) { ++ tmp = TARGET_CPUS; ++ err = assign_irq_vector(irq, tmp); ++ if (!err) { ++ cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg->address_hi = MSI_ADDR_BASE_HI; +@@ -1895,40 +1938,38 @@ static int msi_compose_msg(struct pci_de + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | +- MSI_DATA_VECTOR(vector); ++ MSI_DATA_VECTOR(cfg->vector); + } +- return vector; ++ return err; + } + + #ifdef CONFIG_SMP + static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) + { ++ struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; +- int vector; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- cpus_and(mask, tmp, CPU_MASK_ALL); ++ return; + +- vector = assign_irq_vector(irq, mask, &tmp); +- if (vector < 0) ++ if (assign_irq_vector(irq, mask)) + return; + ++ cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + read_msi_msg(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; +- msg.data |= MSI_DATA_VECTOR(vector); ++ msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + write_msi_msg(irq, &msg); +- set_native_irq_info(irq, mask); ++ irq_desc[irq].affinity = mask; + } + #endif /* CONFIG_SMP */ + +@@ -1947,24 +1988,31 @@ static struct irq_chip msi_chip = { + .retrigger = ioapic_retrigger_irq, + }; + +-int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) ++int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) + { + struct msi_msg msg; +- int ret; ++ int irq, ret; ++ irq = create_irq(); ++ if (irq < 0) ++ return irq; ++ ++ set_irq_msi(irq, desc); + ret = msi_compose_msg(dev, irq, &msg); +- if (ret < 0) ++ if (ret < 0) { ++ destroy_irq(irq); + return ret; ++ } + + write_msi_msg(irq, &msg); + + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + +- return 0; ++ return irq; + } + + void arch_teardown_msi_irq(unsigned int irq) + { +- return; ++ destroy_irq(irq); + } + + #endif /* CONFIG_PCI_MSI */ +@@ -1992,24 +2040,22 @@ static void target_ht_irq(unsigned int i + + static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) + { ++ struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp; +- int vector; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) +- tmp = TARGET_CPUS; +- +- cpus_and(mask, tmp, CPU_MASK_ALL); ++ return; + +- vector = assign_irq_vector(irq, mask, &tmp); +- if (vector < 0) ++ if (assign_irq_vector(irq, mask)) + return; + ++ cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + +- target_ht_irq(irq, dest, vector); +- set_native_irq_info(irq, mask); ++ target_ht_irq(irq, dest, cfg->vector); ++ irq_desc[irq].affinity = mask; + } + #endif + +@@ -2026,14 +2072,17 @@ static struct irq_chip ht_irq_chip = { + + int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) + { +- int vector; ++ struct irq_cfg *cfg = irq_cfg + irq; ++ int err; + cpumask_t tmp; + +- vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); +- if (vector >= 0) { ++ tmp = TARGET_CPUS; ++ err = assign_irq_vector(irq, tmp); ++ if (!err) { + struct ht_irq_msg msg; + unsigned dest; + ++ cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); +@@ -2041,7 +2090,7 @@ int arch_setup_ht_irq(unsigned int irq, + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | +- HT_IRQ_LOW_VECTOR(vector) | ++ HT_IRQ_LOW_VECTOR(cfg->vector) | + ((INT_DEST_MODE == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | +@@ -2056,7 +2105,7 @@ int arch_setup_ht_irq(unsigned int irq, + set_irq_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); + } +- return vector; ++ return err; + } + #endif /* CONFIG_HT_IRQ */ + +@@ -2081,13 +2130,8 @@ int __init io_apic_get_redir_entries (in + } + + +-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) ++int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) + { +- struct IO_APIC_route_entry entry; +- unsigned long flags; +- int vector; +- cpumask_t mask; +- + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + ioapic); +@@ -2100,42 +2144,7 @@ int io_apic_set_pci_routing (int ioapic, + if (irq >= 16) + add_pin_to_irq(irq, ioapic, pin); + +- +- vector = assign_irq_vector(irq, TARGET_CPUS, &mask); +- if (vector < 0) +- return vector; +- +- /* +- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. +- * Note that we mask (disable) IRQs now -- these get enabled when the +- * corresponding device driver registers for this IRQ. +- */ +- +- memset(&entry,0,sizeof(entry)); +- +- entry.delivery_mode = INT_DELIVERY_MODE; +- entry.dest_mode = INT_DEST_MODE; +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); +- entry.trigger = edge_level; +- entry.polarity = active_high_low; +- entry.mask = 1; /* Disabled (masked) */ +- entry.vector = vector & 0xff; +- +- apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " +- "IRQ %d Mode:%i Active:%i)\n", ioapic, +- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, +- edge_level, active_high_low); +- +- ioapic_register_intr(irq, entry.vector, edge_level); +- +- if (!ioapic && (irq < 16)) +- disable_8259A_irq(irq); +- +- ioapic_write_entry(ioapic, pin, entry); +- +- spin_lock_irqsave(&ioapic_lock, flags); +- set_native_irq_info(irq, TARGET_CPUS); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); + + return 0; + } +@@ -2168,8 +2177,10 @@ void __init setup_ioapic_dest(void) + * when you have too many devices, because at that time only boot + * cpu is online. + */ +- if(!irq_vector[irq]) +- setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); ++ if (!irq_cfg[irq].vector) ++ setup_IO_APIC_irq(ioapic, pin, irq, ++ irq_trigger(irq_entry), ++ irq_polarity(irq_entry)); + else + set_ioapic_affinity_irq(irq, TARGET_CPUS); + } +Index: 10.3-2007-11-26/arch/x86_64/kernel/irq-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/irq-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/irq-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -18,6 +18,7 @@ + #include <asm/uaccess.h> + #include <asm/io_apic.h> + #include <asm/idle.h> ++#include <asm/smp.h> + + atomic_t irq_err_count; + +@@ -120,9 +121,15 @@ asmlinkage unsigned int do_IRQ(struct pt + + if (likely(irq < NR_IRQS)) + generic_handle_irq(irq); +- else if (printk_ratelimit()) +- printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", +- __func__, smp_processor_id(), irq); ++ else { ++#ifndef CONFIG_XEN ++ if (!disable_apic) ++ ack_APIC_irq(); ++#endif ++ if (printk_ratelimit()) ++ printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", ++ __func__, smp_processor_id(), irq); ++ } + + irq_exit(); + +Index: 10.3-2007-11-26/arch/x86_64/kernel/mpparse-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/mpparse-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/mpparse-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -60,9 +60,9 @@ unsigned long mp_lapic_addr = 0; + /* Processor that is doing the boot up */ + unsigned int boot_cpu_id = -1U; + /* Internal processor count */ +-unsigned int num_processors __initdata = 0; ++unsigned int num_processors __cpuinitdata = 0; + +-unsigned disabled_cpus __initdata; ++unsigned disabled_cpus __cpuinitdata; + + /* Bitmask of physically existing CPUs */ + physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; +@@ -808,7 +808,7 @@ int mp_register_gsi(u32 gsi, int trigger + return gsi; + + /* Don't set up the ACPI SCI because it's already set up */ +- if (acpi_fadt.sci_int == gsi) ++ if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); +Index: 10.3-2007-11-26/arch/x86_64/kernel/pci-swiotlb-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/pci-swiotlb-xen.c 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/pci-swiotlb-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -35,7 +35,7 @@ struct dma_mapping_ops swiotlb_dma_ops = + #endif + }; + +-void pci_swiotlb_init(void) ++void __init pci_swiotlb_init(void) + { + #if 0 + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ +Index: 10.3-2007-11-26/arch/x86_64/kernel/process-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/process-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/process-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -337,14 +337,17 @@ void load_gs_index(unsigned gs) + void flush_thread(void) + { + struct task_struct *tsk = current; +- struct thread_info *t = current_thread_info(); + +- if (t->flags & _TIF_ABI_PENDING) { +- t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); +- if (t->flags & _TIF_IA32) ++ if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { ++ clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); ++ if (test_tsk_thread_flag(tsk, TIF_IA32)) { ++ clear_tsk_thread_flag(tsk, TIF_IA32); ++ } else { ++ set_tsk_thread_flag(tsk, TIF_IA32); + current_thread_info()->status |= TS_COMPAT; ++ } + } +- t->flags &= ~_TIF_DEBUG; ++ clear_tsk_thread_flag(tsk, TIF_DEBUG); + + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; +Index: 10.3-2007-11-26/arch/x86_64/kernel/setup-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/setup-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/setup-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -144,7 +144,7 @@ EXPORT_SYMBOL_GPL(edid_info); + + extern int root_mountflags; + +-char command_line[COMMAND_LINE_SIZE]; ++char __initdata command_line[COMMAND_LINE_SIZE]; + + struct resource standard_io_resources[] = { + { .name = "dma1", .start = 0x00, .end = 0x1f, +@@ -182,134 +182,6 @@ struct resource code_resource = { + .flags = IORESOURCE_RAM, + }; + +-#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM) +- +-static struct resource system_rom_resource = { +- .name = "System ROM", +- .start = 0xf0000, +- .end = 0xfffff, +- .flags = IORESOURCE_ROM, +-}; +- +-static struct resource extension_rom_resource = { +- .name = "Extension ROM", +- .start = 0xe0000, +- .end = 0xeffff, +- .flags = IORESOURCE_ROM, +-}; +- +-static struct resource adapter_rom_resources[] = { +- { .name = "Adapter ROM", .start = 0xc8000, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM }, +- { .name = "Adapter ROM", .start = 0, .end = 0, +- .flags = IORESOURCE_ROM } +-}; +- +-static struct resource video_rom_resource = { +- .name = "Video ROM", +- .start = 0xc0000, +- .end = 0xc7fff, +- .flags = IORESOURCE_ROM, +-}; +- +-static struct resource video_ram_resource = { +- .name = "Video RAM area", +- .start = 0xa0000, +- .end = 0xbffff, +- .flags = IORESOURCE_RAM, +-}; +- +-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) +- +-static int __init romchecksum(unsigned char *rom, unsigned long length) +-{ +- unsigned char *p, sum = 0; +- +- for (p = rom; p < rom + length; p++) +- sum += *p; +- return sum == 0; +-} +- +-static void __init probe_roms(void) +-{ +- unsigned long start, length, upper; +- unsigned char *rom; +- int i; +- +-#ifdef CONFIG_XEN +- /* Nothing to do if not running in dom0. */ +- if (!is_initial_xendomain()) +- return; +-#endif +- +- /* video rom */ +- upper = adapter_rom_resources[0].start; +- for (start = video_rom_resource.start; start < upper; start += 2048) { +- rom = isa_bus_to_virt(start); +- if (!romsignature(rom)) +- continue; +- +- video_rom_resource.start = start; +- +- /* 0 < length <= 0x7f * 512, historically */ +- length = rom[2] * 512; +- +- /* if checksum okay, trust length byte */ +- if (length && romchecksum(rom, length)) +- video_rom_resource.end = start + length - 1; +- +- request_resource(&iomem_resource, &video_rom_resource); +- break; +- } +- +- start = (video_rom_resource.end + 1 + 2047) & ~2047UL; +- if (start < upper) +- start = upper; +- +- /* system rom */ +- request_resource(&iomem_resource, &system_rom_resource); +- upper = system_rom_resource.start; +- +- /* check for extension rom (ignore length byte!) */ +- rom = isa_bus_to_virt(extension_rom_resource.start); +- if (romsignature(rom)) { +- length = extension_rom_resource.end - extension_rom_resource.start + 1; +- if (romchecksum(rom, length)) { +- request_resource(&iomem_resource, &extension_rom_resource); +- upper = extension_rom_resource.start; +- } +- } +- +- /* check for adapter roms on 2k boundaries */ +- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; +- start += 2048) { +- rom = isa_bus_to_virt(start); +- if (!romsignature(rom)) +- continue; +- +- /* 0 < length <= 0x7f * 512, historically */ +- length = rom[2] * 512; +- +- /* but accept any length that fits if checksum okay */ +- if (!length || start + length > upper || !romchecksum(rom, length)) +- continue; +- +- adapter_rom_resources[i].start = start; +- adapter_rom_resources[i].end = start + length - 1; +- request_resource(&iomem_resource, &adapter_rom_resources[i]); +- +- start = adapter_rom_resources[i++].end & ~2047UL; +- } +-} +- + #ifdef CONFIG_PROC_VMCORE + /* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. This option will be passed +@@ -406,7 +278,7 @@ void __init setup_arch(char **cmdline_p) + #ifdef CONFIG_XEN + extern struct e820map machine_e820; + +- printk(KERN_INFO "Command line: %s\n", saved_command_line); ++ printk(KERN_INFO "Command line: %s\n", boot_command_line); + + /* Register a call for panic conditions. */ + atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); +@@ -454,7 +326,7 @@ void __init setup_arch(char **cmdline_p) + + ARCH_SETUP + #else +- printk(KERN_INFO "Command line: %s\n", saved_command_line); ++ printk(KERN_INFO "Command line: %s\n", boot_command_line); + + ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); + screen_info = SCREEN_INFO; +@@ -485,7 +357,7 @@ void __init setup_arch(char **cmdline_p) + + early_identify_cpu(&boot_cpu_data); + +- strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); ++ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + parse_early_param(); +@@ -555,6 +427,11 @@ void __init setup_arch(char **cmdline_p) + /* reserve ebda region */ + if (ebda_addr) + reserve_bootmem_generic(ebda_addr, ebda_size); ++#ifdef CONFIG_NUMA ++ /* reserve nodemap region */ ++ if (nodemap_addr) ++ reserve_bootmem_generic(nodemap_addr, nodemap_size); ++#endif + + #ifdef CONFIG_SMP + /* +@@ -724,10 +601,8 @@ void __init setup_arch(char **cmdline_p) + #endif + + /* +- * Request address space for all standard RAM and ROM resources +- * and also for regions reported as reserved by the e820. +- */ +- probe_roms(); ++ * We trust e820 completely. No explicit ROM probing in memory. ++ */ + #ifdef CONFIG_XEN + if (is_initial_xendomain()) { + struct xen_memory_map memmap; +@@ -746,8 +621,6 @@ void __init setup_arch(char **cmdline_p) + e820_mark_nosave_regions(); + #endif + +- request_resource(&iomem_resource, &video_ram_resource); +- + { + unsigned i; + /* request I/O space for devices used on all i[345]86 PCs */ +@@ -1325,7 +1198,8 @@ static int show_cpuinfo(struct seq_file + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, +- NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow", ++ NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", ++ "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, +@@ -1343,7 +1217,7 @@ static int show_cpuinfo(struct seq_file + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, +- NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, ++ NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ +@@ -1353,8 +1227,10 @@ static int show_cpuinfo(struct seq_file + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* AMD-defined (#2) */ +- "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ++ "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", ++ "altmovcr8", "abm", "sse4a", ++ "misalignsse", "3dnowprefetch", ++ "osvw", "ibs", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; +@@ -1365,6 +1241,9 @@ static int show_cpuinfo(struct seq_file + "ttp", /* thermal trip */ + "tm", + "stc", ++ "100mhzsteps", ++ "hwpstate", ++ NULL, /* tsc invariant mapped to constant_tsc */ + NULL, + /* nothing */ /* constant_tsc - moved to flags */ + }; +@@ -1481,26 +1360,3 @@ struct seq_operations cpuinfo_op = { + .stop = c_stop, + .show = show_cpuinfo, + }; +- +-#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) +-#include <linux/platform_device.h> +-static __init int add_pcspkr(void) +-{ +- struct platform_device *pd; +- int ret; +- +- if (!is_initial_xendomain()) +- return 0; +- +- pd = platform_device_alloc("pcspkr", -1); +- if (!pd) +- return -ENOMEM; +- +- ret = platform_device_add(pd); +- if (ret) +- platform_device_put(pd); +- +- return ret; +-} +-device_initcall(add_pcspkr); +-#endif +Index: 10.3-2007-11-26/arch/x86_64/kernel/vsyscall-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/vsyscall-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/vsyscall-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -26,6 +26,7 @@ + #include <linux/seqlock.h> + #include <linux/jiffies.h> + #include <linux/sysctl.h> ++#include <linux/clocksource.h> + #include <linux/getcpu.h> + #include <linux/cpu.h> + #include <linux/smp.h> +@@ -34,6 +35,7 @@ + #include <asm/vsyscall.h> + #include <asm/pgtable.h> + #include <asm/page.h> ++#include <asm/unistd.h> + #include <asm/fixmap.h> + #include <asm/errno.h> + #include <asm/io.h> +@@ -44,56 +46,41 @@ + #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) + #define __syscall_clobber "r11","rcx","memory" + +-int __sysctl_vsyscall __section_sysctl_vsyscall = 1; +-seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; ++struct vsyscall_gtod_data_t { ++ seqlock_t lock; ++ int sysctl_enabled; ++ struct timeval wall_time_tv; ++ struct timezone sys_tz; ++ cycle_t offset_base; ++ struct clocksource clock; ++}; + int __vgetcpu_mode __section_vgetcpu_mode; + +-#include <asm/unistd.h> +- +-static __always_inline void timeval_normalize(struct timeval * tv) ++struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = + { +- time_t __sec; +- +- __sec = tv->tv_usec / 1000000; +- if (__sec) { +- tv->tv_usec %= 1000000; +- tv->tv_sec += __sec; +- } +-} ++ .lock = SEQLOCK_UNLOCKED, ++ .sysctl_enabled = 1, ++}; + +-static __always_inline void do_vgettimeofday(struct timeval * tv) ++void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) + { +- long sequence, t; +- unsigned long sec, usec; ++ unsigned long flags; + +- do { +- sequence = read_seqbegin(&__xtime_lock); +- +- sec = __xtime.tv_sec; +- usec = __xtime.tv_nsec / 1000; +- +- if (__vxtime.mode != VXTIME_HPET) { +- t = get_cycles_sync(); +- if (t < __vxtime.last_tsc) +- t = __vxtime.last_tsc; +- usec += ((t - __vxtime.last_tsc) * +- __vxtime.tsc_quot) >> 32; +- /* See comment in x86_64 do_gettimeofday. */ +- } else { +- usec += ((readl((void __iomem *) +- fix_to_virt(VSYSCALL_HPET) + 0xf0) - +- __vxtime.last) * __vxtime.quot) >> 32; +- } +- } while (read_seqretry(&__xtime_lock, sequence)); +- +- tv->tv_sec = sec + usec / 1000000; +- tv->tv_usec = usec % 1000000; ++ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); ++ /* copy vsyscall data */ ++ vsyscall_gtod_data.clock = *clock; ++ vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; ++ vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; ++ vsyscall_gtod_data.sys_tz = sys_tz; ++ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + } + +-/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ ++/* RED-PEN may want to readd seq locking, but then the variable should be ++ * write-once. ++ */ + static __always_inline void do_get_tz(struct timezone * tz) + { +- *tz = __sys_tz; ++ *tz = __vsyscall_gtod_data.sys_tz; + } + + static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) +@@ -101,7 +88,8 @@ static __always_inline int gettimeofday( + int ret; + asm volatile("vsysc2: syscall" + : "=a" (ret) +- : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber ); ++ : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) ++ : __syscall_clobber ); + return ret; + } + +@@ -114,10 +102,44 @@ static __always_inline long time_syscall + return secs; + } + ++static __always_inline void do_vgettimeofday(struct timeval * tv) ++{ ++ cycle_t now, base, mask, cycle_delta; ++ unsigned long seq, mult, shift, nsec_delta; ++ cycle_t (*vread)(void); ++ do { ++ seq = read_seqbegin(&__vsyscall_gtod_data.lock); ++ ++ vread = __vsyscall_gtod_data.clock.vread; ++ if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { ++ gettimeofday(tv,NULL); ++ return; ++ } ++ now = vread(); ++ base = __vsyscall_gtod_data.clock.cycle_last; ++ mask = __vsyscall_gtod_data.clock.mask; ++ mult = __vsyscall_gtod_data.clock.mult; ++ shift = __vsyscall_gtod_data.clock.shift; ++ ++ *tv = __vsyscall_gtod_data.wall_time_tv; ++ ++ } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); ++ ++ /* calculate interval: */ ++ cycle_delta = (now - base) & mask; ++ /* convert to nsecs: */ ++ nsec_delta = (cycle_delta * mult) >> shift; ++ ++ /* convert to usecs and add to timespec: */ ++ tv->tv_usec += nsec_delta / NSEC_PER_USEC; ++ while (tv->tv_usec > USEC_PER_SEC) { ++ tv->tv_sec += 1; ++ tv->tv_usec -= USEC_PER_SEC; ++ } ++} ++ + int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) + { +- if (!__sysctl_vsyscall) +- return gettimeofday(tv,tz); + if (tv) + do_vgettimeofday(tv); + if (tz) +@@ -129,11 +151,11 @@ int __vsyscall(0) vgettimeofday(struct t + * unlikely */ + time_t __vsyscall(1) vtime(time_t *t) + { +- if (!__sysctl_vsyscall) ++ if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) + return time_syscall(t); + else if (t) +- *t = __xtime.tv_sec; +- return __xtime.tv_sec; ++ *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; ++ return __vsyscall_gtod_data.wall_time_tv.tv_sec; + } + + /* Fast way to get current CPU and node. +@@ -210,7 +232,7 @@ static int vsyscall_sysctl_change(ctl_ta + ret = -ENOMEM; + goto out; + } +- if (!sysctl_vsyscall) { ++ if (!vsyscall_gtod_data.sysctl_enabled) { + writew(SYSCALL, map1); + writew(SYSCALL, map2); + } else { +@@ -232,16 +254,17 @@ static int vsyscall_sysctl_nostrat(ctl_t + + static ctl_table kernel_table2[] = { + { .ctl_name = 99, .procname = "vsyscall64", +- .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644, ++ .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), ++ .mode = 0644, + .strategy = vsyscall_sysctl_nostrat, + .proc_handler = vsyscall_sysctl_change }, +- { 0, } ++ {} + }; + + static ctl_table kernel_root_table2[] = { + { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, + .child = kernel_table2 }, +- { 0 }, ++ {} + }; + + #endif +@@ -313,14 +336,14 @@ static int __init vsyscall_init(void) + map_vsyscall(); + #ifdef CONFIG_XEN + map_vsyscall_user(); +- sysctl_vsyscall = 0; /* disable vgettimeofay() */ ++ vsyscall_gtod_data.sysctl_enabled = 0; /* disable vgettimeofay() */ + if (boot_cpu_has(X86_FEATURE_RDTSCP)) + vgetcpu_mode = VGETCPU_RDTSCP; + else + vgetcpu_mode = VGETCPU_LSL; + #endif + #ifdef CONFIG_SYSCTL +- register_sysctl_table(kernel_root_table2, 0); ++ register_sysctl_table(kernel_root_table2); + #endif + on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); + hotcpu_notifier(cpu_vsyscall_notifier, 0); +Index: 10.3-2007-11-26/arch/x86_64/mm/fault-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/mm/fault-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/mm/fault-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -56,38 +56,17 @@ int unregister_page_fault_notifier(struc + } + EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + +-static inline int notify_page_fault(enum die_val val, const char *str, +- struct pt_regs *regs, long err, int trap, int sig) ++static inline int notify_page_fault(struct pt_regs *regs, long err) + { + struct die_args args = { + .regs = regs, +- .str = str, ++ .str = "page fault", + .err = err, +- .trapnr = trap, +- .signr = sig ++ .trapnr = 14, ++ .signr = SIGSEGV + }; +- return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); +-} +- +-void bust_spinlocks(int yes) +-{ +- int loglevel_save = console_loglevel; +- if (yes) { +- oops_in_progress = 1; +- } else { +-#ifdef CONFIG_VT +- unblank_screen(); +-#endif +- oops_in_progress = 0; +- /* +- * OK, the message is on the console. Now we call printk() +- * without oops_in_progress set so that printk will give klogd +- * a poke. Hold onto your hats... +- */ +- console_loglevel = 15; /* NMI oopser may have shut the console up */ +- printk(" "); +- console_loglevel = loglevel_save; +- } ++ return atomic_notifier_call_chain(¬ify_page_fault_chain, ++ DIE_PAGE_FAULT, &args); + } + + /* Sometimes the CPU reports invalid exceptions on prefetch. +@@ -437,8 +416,7 @@ asmlinkage void __kprobes do_page_fault( + /* Can take a spurious fault if mapping changes R/O -> R/W. */ + if (spurious_fault(regs, address, error_code)) + return; +- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, +- SIGSEGV) == NOTIFY_STOP) ++ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + /* + * Don't take the mm semaphore here. If we fixup a prefetch +@@ -447,8 +425,7 @@ asmlinkage void __kprobes do_page_fault( + goto bad_area_nosemaphore; + } + +- if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, +- SIGSEGV) == NOTIFY_STOP) ++ if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + + if (likely(regs->eflags & X86_EFLAGS_IF)) +Index: 10.3-2007-11-26/arch/x86_64/mm/init-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/mm/init-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/mm/init-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -1136,20 +1136,30 @@ int kern_addr_valid(unsigned long addr) + extern int exception_trace, page_fault_trace; + + static ctl_table debug_table2[] = { +- { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, +- proc_dointvec }, +- { 0, } ++ { ++ .ctl_name = 99, ++ .procname = "exception-trace", ++ .data = &exception_trace, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec ++ }, ++ {} + }; + + static ctl_table debug_root_table2[] = { +- { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555, +- .child = debug_table2 }, +- { 0 }, ++ { ++ .ctl_name = CTL_DEBUG, ++ .procname = "debug", ++ .mode = 0555, ++ .child = debug_table2 ++ }, ++ {} + }; + + static __init int x8664_sysctl_init(void) + { +- register_sysctl_table(debug_root_table2, 1); ++ register_sysctl_table(debug_root_table2); + return 0; + } + __initcall(x8664_sysctl_init); +Index: 10.3-2007-11-26/arch/x86_64/mm/pageattr-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/mm/pageattr-xen.c 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/mm/pageattr-xen.c 2007-10-22 13:58:46.000000000 +0200 +@@ -275,8 +275,8 @@ static void flush_kernel_map(void *arg) + void *adr = page_address(pg); + if (cpu_has_clflush) + cache_flush_page(adr); +- __flush_tlb_one(adr); + } ++ __flush_tlb_all(); + } + + static inline void flush_map(struct list_head *l) +@@ -301,6 +301,7 @@ static void revert_page(unsigned long ad + pud_t *pud; + pmd_t *pmd; + pte_t large_pte; ++ unsigned long pfn; + + pgd = pgd_offset_k(address); + BUG_ON(pgd_none(*pgd)); +@@ -308,7 +309,8 @@ static void revert_page(unsigned long ad + BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, address); + BUG_ON(pmd_val(*pmd) & _PAGE_PSE); +- large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); ++ pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; ++ large_pte = pfn_pte(pfn, ref_prot); + large_pte = pte_mkhuge(large_pte); + set_pte((pte_t *)pmd, large_pte); + } +Index: 10.3-2007-11-26/drivers/xen/balloon/sysfs.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/balloon/sysfs.c 2007-12-06 17:27:31.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/balloon/sysfs.c 2007-10-22 13:58:46.000000000 +0200 +@@ -33,6 +33,7 @@ + #include <linux/stat.h> + #include <linux/string.h> + #include <linux/sysdev.h> ++#include <linux/module.h> + #include "common.h" + + #ifdef HAVE_XEN_PLATFORM_COMPAT_H +Index: 10.3-2007-11-26/drivers/xen/core/evtchn.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/core/evtchn.c 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/drivers/xen/core/evtchn.c 2007-10-22 13:58:46.000000000 +0200 +@@ -130,7 +130,7 @@ static void bind_evtchn_to_cpu(unsigned + int irq = evtchn_to_irq[chn]; + + BUG_ON(irq == -1); +- set_native_irq_info(irq, cpumask_of_cpu(cpu)); ++ irq_desc[irq].affinity = cpumask_of_cpu(cpu); + + clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); + set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); +@@ -143,7 +143,7 @@ static void init_evtchn_cpu_bindings(voi + + /* By default all event channels notify CPU#0. */ + for (i = 0; i < NR_IRQS; i++) +- set_native_irq_info(i, cpumask_of_cpu(0)); ++ irq_desc[i].affinity = cpumask_of_cpu(0); + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +Index: 10.3-2007-11-26/drivers/xen/netfront/netfront.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/netfront/netfront.c 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/drivers/xen/netfront/netfront.c 2007-10-22 13:58:46.000000000 +0200 +@@ -1833,20 +1833,19 @@ static struct ethtool_ops network_ethtoo + }; + + #ifdef CONFIG_SYSFS +-static ssize_t show_rxbuf_min(struct class_device *cd, char *buf) ++static ssize_t show_rxbuf_min(struct device *dev, ++ struct device_attribute *attr, char *buf) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); +- struct netfront_info *info = netdev_priv(netdev); ++ struct netfront_info *info = netdev_priv(to_net_dev(dev)); + + return sprintf(buf, "%u\n", info->rx_min_target); + } + +-static ssize_t store_rxbuf_min(struct class_device *cd, ++static ssize_t store_rxbuf_min(struct device *dev, ++ struct device_attribute *attr, + const char *buf, size_t len) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); ++ struct net_device *netdev = to_net_dev(dev); + struct netfront_info *np = netdev_priv(netdev); + char *endp; + unsigned long target; +@@ -1876,20 +1875,19 @@ static ssize_t store_rxbuf_min(struct cl + return len; + } + +-static ssize_t show_rxbuf_max(struct class_device *cd, char *buf) ++static ssize_t show_rxbuf_max(struct device *dev, ++ struct device_attribute *attr, char *buf) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); +- struct netfront_info *info = netdev_priv(netdev); ++ struct netfront_info *info = netdev_priv(to_net_dev(dev)); + + return sprintf(buf, "%u\n", info->rx_max_target); + } + +-static ssize_t store_rxbuf_max(struct class_device *cd, ++static ssize_t store_rxbuf_max(struct device *dev, ++ struct device_attribute *attr, + const char *buf, size_t len) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); ++ struct net_device *netdev = to_net_dev(dev); + struct netfront_info *np = netdev_priv(netdev); + char *endp; + unsigned long target; +@@ -1919,16 +1917,15 @@ static ssize_t store_rxbuf_max(struct cl + return len; + } + +-static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf) ++static ssize_t show_rxbuf_cur(struct device *dev, ++ struct device_attribute *attr, char *buf) + { +- struct net_device *netdev = container_of(cd, struct net_device, +- class_dev); +- struct netfront_info *info = netdev_priv(netdev); ++ struct netfront_info *info = netdev_priv(to_net_dev(dev)); + + return sprintf(buf, "%u\n", info->rx_target); + } + +-static const struct class_device_attribute xennet_attrs[] = { ++static struct device_attribute xennet_attrs[] = { + __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), + __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), + __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), +@@ -1940,8 +1937,8 @@ static int xennet_sysfs_addif(struct net + int error = 0; + + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { +- error = class_device_create_file(&netdev->class_dev, +- &xennet_attrs[i]); ++ error = device_create_file(&netdev->dev, ++ &xennet_attrs[i]); + if (error) + goto fail; + } +@@ -1949,8 +1946,7 @@ static int xennet_sysfs_addif(struct net + + fail: + while (--i >= 0) +- class_device_remove_file(&netdev->class_dev, +- &xennet_attrs[i]); ++ device_remove_file(&netdev->dev, &xennet_attrs[i]); + return error; + } + +@@ -1958,10 +1954,8 @@ static void xennet_sysfs_delif(struct ne + { + int i; + +- for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { +- class_device_remove_file(&netdev->class_dev, +- &xennet_attrs[i]); +- } ++ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) ++ device_remove_file(&netdev->dev, &xennet_attrs[i]); + } + + #endif /* CONFIG_SYSFS */ +Index: 10.3-2007-11-26/include/asm-i386/i8253.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/i8253.h 2007-12-06 17:27:31.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/i8253.h 2007-10-22 13:58:46.000000000 +0200 +@@ -5,6 +5,8 @@ + + extern spinlock_t i8253_lock; + ++#ifdef CONFIG_GENERIC_CLOCKEVENTS ++ + extern struct clock_event_device *global_clock_event; + + /** +@@ -18,4 +20,6 @@ static inline void pit_interrupt_hook(vo + global_clock_event->event_handler(global_clock_event); + } + ++#endif ++ + #endif /* __ASM_I8253_H__ */ +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/desc.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/desc.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/desc.h 2007-10-22 13:58:46.000000000 +0200 +@@ -21,7 +21,7 @@ struct Xgt_desc_struct { + + extern struct Xgt_desc_struct idt_descr; + DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); +- ++extern struct Xgt_desc_struct early_gdt_descr; + + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) + { +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/io.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/io.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/io.h 2007-10-22 13:58:46.000000000 +0200 +@@ -233,12 +233,6 @@ static inline void memcpy_toio(volatile + #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN))) + + /* +- * Again, i386 does not require mem IO specific function. +- */ +- +-#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d)) +- +-/* + * Cache management + * + * This needed for two cases +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/mmu_context.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/mmu_context.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/mmu_context.h 2007-10-22 13:58:46.000000000 +0200 +@@ -27,13 +27,13 @@ static inline void enter_lazy_tlb(struct + static inline void __prepare_arch_switch(void) + { + /* +- * Save away %fs. No need to save %gs, as it was saved on the ++ * Save away %gs. No need to save %fs, as it was saved on the + * stack on entry. No need to save %es and %ds, as those are + * always kernel segments while inside the kernel. + */ +- asm volatile ( "mov %%fs,%0" +- : "=m" (current->thread.fs)); +- asm volatile ( "movl %0,%%fs" ++ asm volatile ( "mov %%gs,%0" ++ : "=m" (current->thread.gs)); ++ asm volatile ( "movl %0,%%gs" + : : "r" (0) ); + } + +@@ -95,7 +95,7 @@ static inline void switch_mm(struct mm_s + } + + #define deactivate_mm(tsk, mm) \ +- asm("movl %0,%%fs": :"r" (0)); ++ asm("movl %0,%%gs": :"r" (0)); + + static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) + { +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgalloc.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgalloc.h 2007-12-06 17:27:31.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgalloc.h 2007-10-22 13:58:46.000000000 +0200 +@@ -6,12 +6,23 @@ + #include <linux/mm.h> /* for struct page */ + #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */ + +-#define pmd_populate_kernel(mm, pmd, pte) \ +- set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) ++#define paravirt_alloc_pt(pfn) do { } while (0) ++#define paravirt_alloc_pd(pfn) do { } while (0) ++#define paravirt_alloc_pd(pfn) do { } while (0) ++#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) ++#define paravirt_release_pt(pfn) do { } while (0) ++#define paravirt_release_pd(pfn) do { } while (0) ++ ++#define pmd_populate_kernel(mm, pmd, pte) \ ++do { \ ++ paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ ++ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ ++} while (0) + + #define pmd_populate(mm, pmd, pte) \ + do { \ + unsigned long pfn = page_to_pfn(pte); \ ++ paravirt_alloc_pt(pfn); \ + if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \ + if (!PageHighMem(pte)) \ + BUG_ON(HYPERVISOR_update_va_mapping( \ +@@ -42,7 +53,11 @@ static inline void pte_free_kernel(pte_t + + extern void pte_free(struct page *pte); + +-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) ++#define __pte_free_tlb(tlb,pte) \ ++do { \ ++ paravirt_release_pt(page_to_pfn(pte)); \ ++ tlb_remove_page((tlb),(pte)); \ ++} while (0) + + #ifdef CONFIG_X86_PAE + /* +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable.h 2007-10-22 14:08:14.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable.h 2007-10-22 14:08:56.000000000 +0200 +@@ -271,6 +271,7 @@ static inline pte_t pte_mkhuge(pte_t pte + */ + #define pte_update(mm, addr, ptep) do { } while (0) + #define pte_update_defer(mm, addr, ptep) do { } while (0) ++#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) + + /* + * We only update the dirty/accessed state if we set +@@ -486,12 +487,24 @@ extern pte_t *lookup_address(unsigned lo + #endif + + #if defined(CONFIG_HIGHPTE) +-#define pte_offset_map(dir, address) \ +- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \ +- pte_index(address)) +-#define pte_offset_map_nested(dir, address) \ +- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \ +- pte_index(address)) ++#define pte_offset_map(dir, address) \ ++({ \ ++ pte_t *__ptep; \ ++ unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ ++ __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \ ++ paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ ++ __ptep = __ptep + pte_index(address); \ ++ __ptep; \ ++}) ++#define pte_offset_map_nested(dir, address) \ ++({ \ ++ pte_t *__ptep; \ ++ unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ ++ __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \ ++ paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ ++ __ptep = __ptep + pte_index(address); \ ++ __ptep; \ ++}) + #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) + #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) + #else +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/processor.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/processor.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/processor.h 2007-10-22 13:58:46.000000000 +0200 +@@ -431,7 +431,7 @@ struct thread_struct { + .vm86_info = NULL, \ + .sysenter_cs = __KERNEL_CS, \ + .io_bitmap_ptr = NULL, \ +- .gs = __KERNEL_PDA, \ ++ .fs = __KERNEL_PDA, \ + } + + /* +@@ -449,8 +449,8 @@ struct thread_struct { + } + + #define start_thread(regs, new_eip, new_esp) do { \ +- __asm__("movl %0,%%fs": :"r" (0)); \ +- regs->xgs = 0; \ ++ __asm__("movl %0,%%gs": :"r" (0)); \ ++ regs->xfs = 0; \ + set_fs(USER_DS); \ + regs->xds = __USER_DS; \ + regs->xes = __USER_DS; \ +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/ptrace.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/ptrace.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/ptrace.h 2007-10-22 13:58:46.000000000 +0200 +@@ -16,8 +16,8 @@ struct pt_regs { + long eax; + int xds; + int xes; +- /* int xfs; */ +- int xgs; ++ int xfs; ++ /* int xgs; */ + long orig_eax; + long eip; + int xcs; +@@ -49,6 +49,10 @@ static inline int user_mode_vm(struct pt + { + return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL; + } ++static inline int v8086_mode(struct pt_regs *regs) ++{ ++ return (regs->eflags & VM_MASK); ++} + + #define instruction_pointer(regs) ((regs)->eip) + #define regs_return_value(regs) ((regs)->eax) +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/segment.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/segment.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/segment.h 2007-10-22 13:58:46.000000000 +0200 +@@ -83,14 +83,8 @@ + * The GDT has 32 entries + */ + #define GDT_ENTRIES 32 +- + #define GDT_SIZE (GDT_ENTRIES * 8) + +-/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ +-#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) +-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ +-#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) +- + /* Simple and small GDT entries for booting only */ + + #define GDT_ENTRY_BOOT_CS 2 +@@ -132,4 +126,21 @@ + #define SEGMENT_GDT 0x0 + + #define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) ++ ++/* ++ * Matching rules for certain types of segments. ++ */ ++ ++/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ ++#define SEGMENT_IS_KERNEL_CODE(x) (((x) & ~3) == GDT_ENTRY_KERNEL_CS * 8 \ ++ || ((x) & ~3) == (FLAT_KERNEL_CS & ~3)) ++ ++/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ ++#define SEGMENT_IS_FLAT_CODE(x) (((x) & ~0x13) == GDT_ENTRY_KERNEL_CS * 8 \ ++ || ((x) & ~3) == (FLAT_KERNEL_CS & ~3) \ ++ || ((x) & ~3) == (FLAT_USER_CS & ~3)) ++ ++/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ ++#define SEGMENT_IS_PNP_CODE(x) (((x) & ~0x0b) == GDT_ENTRY_PNPBIOS_BASE * 8) ++ + #endif +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/setup.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/setup.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/setup.h 2007-10-22 13:58:46.000000000 +0200 +@@ -6,7 +6,7 @@ + #ifndef _i386_SETUP_H + #define _i386_SETUP_H + +-#define COMMAND_LINE_SIZE 256 ++#define COMMAND_LINE_SIZE 2048 + + #ifdef __KERNEL__ + #include <linux/pfn.h> +@@ -77,6 +77,8 @@ int __init sanitize_e820_map(struct e820 + void __init add_memory_region(unsigned long long start, + unsigned long long size, int type); + ++extern unsigned long init_pg_tables_end; ++ + #endif /* __ASSEMBLY__ */ + + #endif /* __KERNEL__ */ +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/smp.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/smp.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/smp.h 2007-10-22 13:58:46.000000000 +0200 +@@ -52,6 +52,11 @@ extern void cpu_exit_clear(void); + extern void cpu_uninit(void); + #endif + ++#ifndef CONFIG_PARAVIRT ++#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ ++do { } while (0) ++#endif ++ + /* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/bootsetup.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/bootsetup.h 2007-12-06 17:27:31.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/bootsetup.h 2007-10-22 13:58:46.000000000 +0200 +@@ -33,7 +33,7 @@ extern char x86_boot_params[BOOT_PARAM_S + #define EDD_MBR_SIG_NR (*(unsigned char *) (PARAM+EDD_MBR_SIG_NR_BUF)) + #define EDD_MBR_SIGNATURE ((unsigned int *) (PARAM+EDD_MBR_SIG_BUF)) + #define EDD_BUF ((struct edd_info *) (PARAM+EDDBUF)) +-#define COMMAND_LINE saved_command_line ++#define COMMAND_LINE boot_command_line + + #define RAMDISK_IMAGE_START_MASK 0x07FF + #define RAMDISK_PROMPT_FLAG 0x8000 +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/dma-mapping.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/dma-mapping.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/dma-mapping.h 2007-10-22 13:58:46.000000000 +0200 +@@ -9,7 +9,6 @@ + + #include <asm/scatterlist.h> + #include <asm/io.h> +-#include <asm/swiotlb.h> + + struct dma_mapping_ops { + int (*mapping_error)(dma_addr_t dma_addr); +@@ -67,6 +66,9 @@ static inline int dma_mapping_error(dma_ + #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) + #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + ++#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) ++#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) ++ + extern void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp); + extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr, +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/e820.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/e820.h 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/e820.h 2007-10-22 13:58:46.000000000 +0200 +@@ -46,6 +46,7 @@ extern void e820_mark_nosave_regions(voi + extern void e820_print_map(char *who); + extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); + extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); ++extern unsigned long e820_hole_size(unsigned long start, unsigned long end); + + extern void e820_setup_gap(struct e820entry *e820, int nr_map); + extern void e820_register_active_regions(int nid, +@@ -56,6 +57,7 @@ extern void finish_e820_parsing(void); + extern struct e820map e820; + + extern unsigned ebda_addr, ebda_size; ++extern unsigned long nodemap_addr, nodemap_size; + #endif/*!__ASSEMBLY__*/ + + #endif/*__E820_HEADER*/ +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/hw_irq.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/hw_irq.h 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/hw_irq.h 2007-10-22 13:58:46.000000000 +0200 +@@ -31,10 +31,32 @@ + + #define IA32_SYSCALL_VECTOR 0x80 + ++#ifndef CONFIG_XEN ++ ++/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering ++ * cleanup after irq migration. ++ */ ++#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + + /* +- * Vectors 0x20-0x2f are used for ISA interrupts. ++ * Vectors 0x30-0x3f are used for ISA interrupts. + */ ++#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10 ++#define IRQ1_VECTOR IRQ0_VECTOR + 1 ++#define IRQ2_VECTOR IRQ0_VECTOR + 2 ++#define IRQ3_VECTOR IRQ0_VECTOR + 3 ++#define IRQ4_VECTOR IRQ0_VECTOR + 4 ++#define IRQ5_VECTOR IRQ0_VECTOR + 5 ++#define IRQ6_VECTOR IRQ0_VECTOR + 6 ++#define IRQ7_VECTOR IRQ0_VECTOR + 7 ++#define IRQ8_VECTOR IRQ0_VECTOR + 8 ++#define IRQ9_VECTOR IRQ0_VECTOR + 9 ++#define IRQ10_VECTOR IRQ0_VECTOR + 10 ++#define IRQ11_VECTOR IRQ0_VECTOR + 11 ++#define IRQ12_VECTOR IRQ0_VECTOR + 12 ++#define IRQ13_VECTOR IRQ0_VECTOR + 13 ++#define IRQ14_VECTOR IRQ0_VECTOR + 14 ++#define IRQ15_VECTOR IRQ0_VECTOR + 15 + + /* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff +@@ -43,7 +65,6 @@ + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + */ +-#ifndef CONFIG_XEN + #define SPURIOUS_APIC_VECTOR 0xff + #define ERROR_APIC_VECTOR 0xfe + #define RESCHEDULE_VECTOR 0xfd +@@ -57,7 +78,6 @@ + #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + + #define NUM_INVALIDATE_TLB_VECTORS 8 +-#endif + + /* + * Local APIC timer IRQ vector is on a different priority level, +@@ -68,12 +88,13 @@ + + /* + * First APIC vector available to drivers: (vectors 0x30-0xee) +- * we start at 0x31 to spread out vectors evenly between priority ++ * we start at 0x41 to spread out vectors evenly between priority + * levels. (0x80 is the syscall vector) + */ +-#define FIRST_DEVICE_VECTOR 0x31 ++#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) + #define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ + ++#endif + + #ifndef __ASSEMBLY__ + typedef int vector_irq_t[NR_VECTORS]; +@@ -93,7 +114,7 @@ extern void enable_8259A_irq(unsigned in + extern int i8259A_irq_pending(unsigned int irq); + extern void make_8259A_irq(unsigned int irq); + extern void init_8259A(int aeoi); +-extern void FASTCALL(send_IPI_self(int vector)); ++extern void send_IPI_self(int vector); + extern void init_VISWS_APIC_irqs(void); + extern void setup_IO_APIC(void); + extern void disable_IO_APIC(void); +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/io.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/io.h 2007-10-22 13:53:08.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/io.h 2007-10-22 13:58:46.000000000 +0200 +@@ -101,7 +101,7 @@ __OUTS(l) + + #define IO_SPACE_LIMIT 0xffff + +-#if defined(__KERNEL__) && __x86_64__ ++#if defined(__KERNEL__) && defined(__x86_64__) + + #include <linux/vmalloc.h> + +@@ -267,12 +267,6 @@ void memset_io(volatile void __iomem *a, + */ + #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN))) + +-/* +- * Again, x86-64 does not require mem IO specific function. +- */ +- +-#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d)) +- + /* Nothing to do */ + + #define dma_cache_inv(_start,_size) do { } while (0) +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/msr.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/msr.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/msr.h 2007-10-22 13:58:46.000000000 +0200 +@@ -160,6 +160,19 @@ static inline unsigned int cpuid_edx(uns + #define MSR_IA32_UCODE_WRITE 0x79 + #define MSR_IA32_UCODE_REV 0x8b + ++#ifdef CONFIG_SMP ++void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); ++void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); ++#else /* CONFIG_SMP */ ++static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) ++{ ++ rdmsr(msr_no, *l, *h); ++} ++static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) ++{ ++ wrmsr(msr_no, l, h); ++} ++#endif /* CONFIG_SMP */ + + #endif + +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgtable.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-10-22 13:58:46.000000000 +0200 +@@ -410,15 +410,6 @@ static inline int pmd_large(pmd_t pte) { + #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + #define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) + +-/* physical address -> PTE */ +-static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) +-{ +- unsigned long pteval; +- pteval = physpage | pgprot_val(pgprot); +- pteval &= __supported_pte_mask; +- return __pte(pteval); +-} +- + /* Change flags of a PTE */ + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) + { +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/smp.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/smp.h 2007-10-22 13:53:25.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/smp.h 2007-10-22 13:58:46.000000000 +0200 +@@ -7,6 +7,7 @@ + #include <linux/threads.h> + #include <linux/cpumask.h> + #include <linux/bitops.h> ++#include <linux/init.h> + extern int disable_apic; + + #ifdef CONFIG_X86_LOCAL_APIC +@@ -73,7 +74,7 @@ extern int __cpu_disable(void); + extern void __cpu_die(unsigned int cpu); + extern void prefill_possible_map(void); + extern unsigned num_processors; +-extern unsigned disabled_cpus; ++extern unsigned __cpuinitdata disabled_cpus; + + #define NO_PROC_ID 0xFF /* No processor magic marker */ + +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/timer.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/timer.h 2007-09-03 09:52:56.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/timer.h 2007-10-22 13:58:46.000000000 +0200 +@@ -4,20 +4,4 @@ + + #define TICK_SIZE (tick_nsec / 1000) + +-extern void clock_fallback(void); +-void setup_pit_timer(void); +- +-/* Modifiers for buggy PIT handling */ +- +-extern int pit_latch_buggy; +- +-extern int timer_ack; +- +-/* list of externed timers */ +-extern unsigned long calibrate_tsc(void); +-extern void init_cpu_khz(void); +-#ifdef CONFIG_HPET_TIMER +-extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr); +-#endif +- + #endif |