diff options
Diffstat (limited to 'trunk/2.6.22/20019_15181-dma-tracking.patch1')
-rw-r--r-- | trunk/2.6.22/20019_15181-dma-tracking.patch1 | 551 |
1 files changed, 551 insertions, 0 deletions
diff --git a/trunk/2.6.22/20019_15181-dma-tracking.patch1 b/trunk/2.6.22/20019_15181-dma-tracking.patch1 new file mode 100644 index 0000000..2bf8906 --- /dev/null +++ b/trunk/2.6.22/20019_15181-dma-tracking.patch1 @@ -0,0 +1,551 @@ +# HG changeset 15181+33+41 patch +# User kfraser@localhost.localdomain +# Date 1180518373 -3600 +# Node ID 45f939d0c72493d237783419996bbca0132551df +# Parent 1f7a6456c330272a3cec13b31fc1ba9b4db898ec +Subject: gnttab: Add basic DMA tracking + +This patch adds basic tracking of outstanding DMA requests on +grant table entries marked as PageForeign. + +When a PageForeign struct page is about to be mapped for DMA, +we set its map count to 1 (or zero in actual value). This is +then checked for when we need to free a grant table entry early +to ensure that we don't free an entry that's currently used for +DMA. + +So any entry that has been marked for DMA will not be freed early. + +If the unmapping API had a struct page (which exists for the sg +case) then we could do this properly. + +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> + +(added the interfacing bits from 15180) + +Subject: gnttab: Fix copy_grant_page race with seqlock + +Previously gnttab_copy_grant_page would always unmap the grant table +entry, even if DMA operations were outstanding. This would allow a +hostile guest to free a page still used by DMA to the hypervisor. + +This patch fixes this by making sure that we don't free the grant +table entry if a DMA operation has taken place. To achieve this a +seqlock is used to synchronise the DMA operations and +copy_grant_page. + +The DMA operations use the read side of the seqlock so performance +should be largely unaffected. + +Thanks to Isaku Yamahata for noticing the race condition. + +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> + +Subject: Make dma address conversion logic of gnttab dma arch specific. + +gnttab_dma_map_page() and gnttab_dma_unmap_page() uses machine address +with dma address interchangebly. However it doesn't work with auto +translated mode enabled (i.e. on ia64) because + +- bus address space(dma_addr_t) is different from machine address + space(maddr_t). + With the terminology in xen/include/public/mm.h, + dma_addr_t is maddr and maddr_t is gmaddr. + So they should be handled differently with auto translated physmap + mode + enabled. + +- dma address conversion depends on dma api implementation and + its paravirtualization. + "pfn_valid(mfn_to_local_pfn(maddr >> PAGE_SHIFT)" check in + gnttab_dma_map_page() doesn't make sense with auto translate physmap + mode enabled. + +To address those issues, split those logic from gnttab_dma_map_page() +and gnttab_dma_unmap_page(), and put it into arch specific files. +This patch doesn't change the already existing x86 logic. + +Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp> + +Acked-by: jbeulich@novell.com + +--- + arch/i386/kernel/pci-dma-xen.c | 19 +++- + arch/i386/kernel/swiotlb.c | 27 ++++- + drivers/xen/core/gnttab.c | 124 +++++++++++++++++++++++++++ + include/asm-i386/mach-xen/asm/gnttab_dma.h | 41 ++++++++ + include/asm-x86_64/mach-xen/asm/gnttab_dma.h | 1 + include/xen/gnttab.h | 28 ++++++ + include/xen/interface/grant_table.h | 23 +++++ + 7 files changed, 252 insertions(+), 11 deletions(-) + +--- a/arch/i386/kernel/pci-dma-xen.c 2007-08-27 14:01:24.000000000 -0400 ++++ b/arch/i386/kernel/pci-dma-xen.c 2007-08-27 14:02:07.000000000 -0400 +@@ -15,9 +15,11 @@ + #include <linux/version.h> + #include <asm/io.h> + #include <xen/balloon.h> ++#include <xen/gnttab.h> + #include <asm/swiotlb.h> + #include <asm/tlbflush.h> + #include <asm-i386/mach-xen/asm/swiotlb.h> ++#include <asm-i386/mach-xen/asm/gnttab_dma.h> + #include <asm/bug.h> + + #ifdef __x86_64__ +@@ -90,7 +92,7 @@ dma_map_sg(struct device *hwdev, struct + } else { + for (i = 0; i < nents; i++ ) { + sg[i].dma_address = +- page_to_bus(sg[i].page) + sg[i].offset; ++ gnttab_dma_map_page(sg[i].page) + sg[i].offset; + sg[i].dma_length = sg[i].length; + BUG_ON(!sg[i].page); + IOMMU_BUG_ON(address_needs_mapping( +@@ -108,9 +110,15 @@ void + dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) + { ++ int i; ++ + BUG_ON(direction == DMA_NONE); + if (swiotlb) + swiotlb_unmap_sg(hwdev, sg, nents, direction); ++ else { ++ for (i = 0; i < nents; i++ ) ++ gnttab_dma_unmap_page(sg[i].dma_address); ++ } + } + EXPORT_SYMBOL(dma_unmap_sg); + +@@ -127,7 +135,7 @@ dma_map_page(struct device *dev, struct + dma_addr = swiotlb_map_page( + dev, page, offset, size, direction); + } else { +- dma_addr = page_to_bus(page) + offset; ++ dma_addr = gnttab_dma_map_page(page) + offset; + IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr)); + } + +@@ -142,6 +150,8 @@ dma_unmap_page(struct device *dev, dma_a + BUG_ON(direction == DMA_NONE); + if (swiotlb) + swiotlb_unmap_page(dev, dma_address, size, direction); ++ else ++ gnttab_dma_unmap_page(dma_address); + } + EXPORT_SYMBOL(dma_unmap_page); + #endif /* CONFIG_HIGHMEM */ +@@ -326,7 +336,8 @@ dma_map_single(struct device *dev, void + if (swiotlb) { + dma = swiotlb_map_single(dev, ptr, size, direction); + } else { +- dma = virt_to_bus(ptr); ++ dma = gnttab_dma_map_page(virt_to_page(ptr)) + ++ offset_in_page(ptr); + IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size)); + IOMMU_BUG_ON(address_needs_mapping(dev, dma)); + } +@@ -344,6 +355,8 @@ dma_unmap_single(struct device *dev, dma + BUG(); + if (swiotlb) + swiotlb_unmap_single(dev, dma_addr, size, direction); ++ else ++ gnttab_dma_unmap_page(dma_addr); + } + EXPORT_SYMBOL(dma_unmap_single); + +--- a/arch/i386/kernel/swiotlb.c 2007-08-27 14:01:25.000000000 -0400 ++++ b/arch/i386/kernel/swiotlb.c 2007-08-27 14:02:07.000000000 -0400 +@@ -25,15 +25,15 @@ + #include <asm/pci.h> + #include <asm/dma.h> + #include <asm/uaccess.h> ++#include <xen/gnttab.h> + #include <xen/interface/memory.h> ++#include <asm-i386/mach-xen/asm/gnttab_dma.h> + + int swiotlb; + EXPORT_SYMBOL(swiotlb); + + #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) + +-#define SG_ENT_PHYS_ADDRESS(sg) (page_to_bus((sg)->page) + (sg)->offset) +- + /* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? +@@ -468,7 +468,8 @@ swiotlb_full(struct device *dev, size_t + dma_addr_t + swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) + { +- dma_addr_t dev_addr = virt_to_bus(ptr); ++ dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) + ++ offset_in_page(ptr); + void *map; + struct phys_addr buffer; + +@@ -486,6 +487,7 @@ swiotlb_map_single(struct device *hwdev, + /* + * Oh well, have to allocate and map a bounce buffer. + */ ++ gnttab_dma_unmap_page(dev_addr); + buffer.page = virt_to_page(ptr); + buffer.offset = (unsigned long)ptr & ~PAGE_MASK; + map = map_single(hwdev, buffer, size, dir); +@@ -513,6 +515,8 @@ swiotlb_unmap_single(struct device *hwde + BUG_ON(dir == DMA_NONE); + if (in_swiotlb_aperture(dev_addr)) + unmap_single(hwdev, bus_to_virt(dev_addr), size, dir); ++ else ++ gnttab_dma_unmap_page(dev_addr); + } + + /* +@@ -571,8 +575,10 @@ swiotlb_map_sg(struct device *hwdev, str + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) { +- dev_addr = SG_ENT_PHYS_ADDRESS(sg); ++ dev_addr = gnttab_dma_map_page(sg->page) + sg->offset; ++ + if (address_needs_mapping(hwdev, dev_addr)) { ++ gnttab_dma_unmap_page(dev_addr); + buffer.page = sg->page; + buffer.offset = sg->offset; + map = map_single(hwdev, buffer, sg->length, dir); +@@ -605,10 +611,12 @@ swiotlb_unmap_sg(struct device *hwdev, s + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) +- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) ++ if (in_swiotlb_aperture(sg->dma_address)) + unmap_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); ++ else ++ gnttab_dma_unmap_page(sg->dma_address); + } + + /* +@@ -627,7 +635,7 @@ swiotlb_sync_sg_for_cpu(struct device *h + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) +- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) ++ if (in_swiotlb_aperture(sg->dma_address)) + sync_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); +@@ -642,7 +650,7 @@ swiotlb_sync_sg_for_device(struct device + BUG_ON(dir == DMA_NONE); + + for (i = 0; i < nelems; i++, sg++) +- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) ++ if (in_swiotlb_aperture(sg->dma_address)) + sync_single(hwdev, + (void *)bus_to_virt(sg->dma_address), + sg->dma_length, dir); +@@ -659,8 +667,9 @@ swiotlb_map_page(struct device *hwdev, s + dma_addr_t dev_addr; + char *map; + +- dev_addr = page_to_bus(page) + offset; ++ dev_addr = gnttab_dma_map_page(page) + offset; + if (address_needs_mapping(hwdev, dev_addr)) { ++ gnttab_dma_unmap_page(dev_addr); + buffer.page = page; + buffer.offset = offset; + map = map_single(hwdev, buffer, size, direction); +@@ -681,6 +690,8 @@ swiotlb_unmap_page(struct device *hwdev, + BUG_ON(direction == DMA_NONE); + if (in_swiotlb_aperture(dma_address)) + unmap_single(hwdev, bus_to_virt(dma_address), size, direction); ++ else ++ gnttab_dma_unmap_page(dma_address); + } + + #endif +--- a/drivers/xen/core/gnttab.c 2007-08-27 14:01:25.000000000 -0400 ++++ b/drivers/xen/core/gnttab.c 2007-08-27 14:01:25.000000000 -0400 +@@ -34,6 +34,7 @@ + #include <linux/module.h> + #include <linux/sched.h> + #include <linux/mm.h> ++#include <linux/seqlock.h> + #include <xen/interface/xen.h> + #include <xen/gnttab.h> + #include <asm/pgtable.h> +@@ -42,6 +43,7 @@ + #include <asm/io.h> + #include <xen/interface/memory.h> + #include <xen/driver_util.h> ++#include <asm/gnttab_dma.h> + + #ifdef HAVE_XEN_PLATFORM_COMPAT_H + #include <xen/platform-compat.h> +@@ -63,6 +65,8 @@ static struct grant_entry *shared; + + static struct gnttab_free_callback *gnttab_free_callback_list; + ++static DEFINE_SEQLOCK(gnttab_dma_lock); ++ + static int gnttab_expand(unsigned int req_entries); + + #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +@@ -490,6 +494,126 @@ static int gnttab_map(unsigned int start + return 0; + } + ++static void gnttab_page_free(struct page *page) ++{ ++ ClearPageForeign(page); ++ gnttab_reset_grant_page(page); ++ put_page(page); ++} ++ ++/* ++ * Must not be called with IRQs off. This should only be used on the ++ * slow path. ++ * ++ * Copy a foreign granted page to local memory. ++ */ ++int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep) ++{ ++ struct gnttab_unmap_and_replace unmap; ++ mmu_update_t mmu; ++ struct page *page; ++ struct page *new_page; ++ void *new_addr; ++ void *addr; ++ paddr_t pfn; ++ maddr_t mfn; ++ maddr_t new_mfn; ++ int err; ++ ++ page = *pagep; ++ if (!get_page_unless_zero(page)) ++ return -ENOENT; ++ ++ err = -ENOMEM; ++ new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); ++ if (!new_page) ++ goto out; ++ ++ new_addr = page_address(new_page); ++ addr = page_address(page); ++ memcpy(new_addr, addr, PAGE_SIZE); ++ ++ pfn = page_to_pfn(page); ++ mfn = pfn_to_mfn(pfn); ++ new_mfn = virt_to_mfn(new_addr); ++ ++ write_seqlock(&gnttab_dma_lock); ++ ++ /* Make seq visible before checking page_mapped. */ ++ smp_mb(); ++ ++ /* Has the page been DMA-mapped? */ ++ if (unlikely(page_mapped(page))) { ++ write_sequnlock(&gnttab_dma_lock); ++ put_page(new_page); ++ err = -EBUSY; ++ goto out; ++ } ++ ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) ++ set_phys_to_machine(pfn, new_mfn); ++ ++ gnttab_set_replace_op(&unmap, (unsigned long)addr, ++ (unsigned long)new_addr, ref); ++ ++ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, ++ &unmap, 1); ++ BUG_ON(err); ++ BUG_ON(unmap.status); ++ ++ write_sequnlock(&gnttab_dma_lock); ++ ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY); ++ ++ mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; ++ mmu.val = pfn; ++ err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF); ++ BUG_ON(err); ++ } ++ ++ new_page->mapping = page->mapping; ++ new_page->index = page->index; ++ set_bit(PG_foreign, &new_page->flags); ++ *pagep = new_page; ++ ++ SetPageForeign(page, gnttab_page_free); ++ page->mapping = NULL; ++ ++out: ++ put_page(page); ++ return err; ++} ++EXPORT_SYMBOL(gnttab_copy_grant_page); ++ ++/* ++ * Keep track of foreign pages marked as PageForeign so that we don't ++ * return them to the remote domain prematurely. ++ * ++ * PageForeign pages are pinned down by increasing their mapcount. ++ * ++ * All other pages are simply returned as is. ++ */ ++void __gnttab_dma_map_page(struct page *page) ++{ ++ unsigned int seq; ++ ++ if (!is_running_on_xen() || !PageForeign(page)) ++ return; ++ ++ do { ++ seq = read_seqbegin(&gnttab_dma_lock); ++ ++ if (gnttab_dma_local_pfn(page)) ++ break; ++ ++ atomic_set(&page->_mapcount, 0); ++ ++ /* Make _mapcount visible before read_seqretry. */ ++ smp_mb(); ++ } while (unlikely(read_seqretry(&gnttab_dma_lock, seq))); ++} ++ + int gnttab_resume(void) + { + if (max_nr_grant_frames() < nr_grant_frames) +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/include/asm-i386/mach-xen/asm/gnttab_dma.h 2007-08-27 14:01:25.000000000 -0400 +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> ++ * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp> ++ * VA Linux Systems Japan K.K. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#ifndef _ASM_I386_GNTTAB_DMA_H ++#define _ASM_I386_GNTTAB_DMA_H ++ ++static inline int gnttab_dma_local_pfn(struct page *page) ++{ ++ /* Has it become a local MFN? */ ++ return pfn_valid(mfn_to_local_pfn(pfn_to_mfn(page_to_pfn(page)))); ++} ++ ++static inline maddr_t gnttab_dma_map_page(struct page *page) ++{ ++ __gnttab_dma_map_page(page); ++ return page_to_bus(page); ++} ++ ++static inline void gnttab_dma_unmap_page(maddr_t maddr) ++{ ++ __gnttab_dma_unmap_page(virt_to_page(bus_to_virt(maddr))); ++} ++ ++#endif /* _ASM_I386_GNTTAB_DMA_H */ +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ b/include/asm-x86_64/mach-xen/asm/gnttab_dma.h 2007-08-27 14:01:25.000000000 -0400 +@@ -0,0 +1 @@ ++#include <asm-i386/mach-xen/asm/gnttab_dma.h> +--- a/include/xen/gnttab.h 2007-08-27 14:01:25.000000000 -0400 ++++ b/include/xen/gnttab.h 2007-08-27 14:01:25.000000000 -0400 +@@ -39,6 +39,7 @@ + + #include <asm/hypervisor.h> + #include <asm/maddr.h> /* maddr_t */ ++#include <linux/mm.h> + #include <xen/interface/grant_table.h> + #include <xen/features.h> + +@@ -101,6 +102,18 @@ void gnttab_grant_foreign_access_ref(gra + void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, + unsigned long pfn); + ++int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep); ++void __gnttab_dma_map_page(struct page *page); ++static inline void __gnttab_dma_unmap_page(struct page *page) ++{ ++} ++ ++static inline void gnttab_reset_grant_page(struct page *page) ++{ ++ init_page_count(page); ++ reset_page_mapcount(page); ++} ++ + int gnttab_suspend(void); + int gnttab_resume(void); + +@@ -135,4 +148,19 @@ gnttab_set_unmap_op(struct gnttab_unmap_ + unmap->dev_bus_addr = 0; + } + ++static inline void ++gnttab_set_replace_op(struct gnttab_unmap_and_replace *unmap, maddr_t addr, ++ maddr_t new_addr, grant_handle_t handle) ++{ ++ if (xen_feature(XENFEAT_auto_translated_physmap)) { ++ unmap->host_addr = __pa(addr); ++ unmap->new_addr = __pa(new_addr); ++ } else { ++ unmap->host_addr = addr; ++ unmap->new_addr = new_addr; ++ } ++ ++ unmap->handle = handle; ++} ++ + #endif /* __ASM_GNTTAB_H__ */ +--- a/include/xen/interface/grant_table.h 2007-08-27 14:01:25.000000000 -0400 ++++ b/include/xen/interface/grant_table.h 2007-08-27 14:01:25.000000000 -0400 +@@ -328,6 +328,29 @@ struct gnttab_query_size { + typedef struct gnttab_query_size gnttab_query_size_t; + DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + ++/* ++ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings ++ * tracked by <handle> but atomically replace the page table entry with one ++ * pointing to the machine address under <new_addr>. <new_addr> will be ++ * redirected to the null entry. ++ * NOTES: ++ * 1. The call may fail in an undefined manner if either mapping is not ++ * tracked by <handle>. ++ * 2. After executing a batch of unmaps, it is guaranteed that no stale ++ * mappings will remain in the device or host TLBs. ++ */ ++#define GNTTABOP_unmap_and_replace 7 ++struct gnttab_unmap_and_replace { ++ /* IN parameters. */ ++ uint64_t host_addr; ++ uint64_t new_addr; ++ grant_handle_t handle; ++ /* OUT parameters. */ ++ int16_t status; /* GNTST_* */ ++}; ++typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; ++DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); ++ + + /* + * Bitfield values for update_pin_status.flags. |