On Thu, Feb 3, 2011 at 11:32 PM, Eduard - Gabriel Munteanu
<eduard.munteanu(a)linux360.ro> wrote:
> This introduces replacements for memory access functions like
> cpu_physical_memory_read(). The new interface can handle address
> translation and access checking through an IOMMU.
>
> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu(a)linux360.ro>
> ---
> Makefile.target | 2 +-
> hw/dma_rw.c | 124 +++++++++++++++++++++++++++++++++++++++++++
> hw/dma_rw.h | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 282 insertions(+), 1 deletions(-)
> create mode 100644 hw/dma_rw.c
> create mode 100644 hw/dma_rw.h
>
> diff --git a/Makefile.target b/Makefile.target
> index e15b1c4..e5817ab 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -218,7 +218,7 @@ obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
> obj-i386-y += vmmouse.o vmport.o hpet.o applesmc.o
> obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
> obj-i386-y += debugcon.o multiboot.o
> -obj-i386-y += pc_piix.o
> +obj-i386-y += pc_piix.o dma_rw.o
> obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>
> # shared objects
> diff --git a/hw/dma_rw.c b/hw/dma_rw.c
> new file mode 100644
> index 0000000..ef8e7f8
> --- /dev/null
> +++ b/hw/dma_rw.c
> @@ -0,0 +1,124 @@
> +/*
> + * Generic DMA memory access interface.
> + *
> + * Copyright (c) 2011 Eduard - Gabriel Munteanu
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "dma_rw.h"
> +#include "range.h"
> +
> +static void dma_register_memory_map(DMADevice *dev,
> + dma_addr_t addr,
> + dma_addr_t len,
> + target_phys_addr_t paddr,
> + DMAInvalidateMapFunc *invalidate,
> + void *invalidate_opaque)
> +{
> + DMAMemoryMap *map;
> +
> + map = qemu_malloc(sizeof(DMAMemoryMap));
> + map->addr = addr;
> + map->len = len;
> + map->paddr = paddr;
> + map->invalidate = invalidate;
> + map->invalidate_opaque = invalidate_opaque;
> +
> + QLIST_INSERT_HEAD(&dev->mmu->memory_maps, map, list);
> +}
> +
> +static void dma_unregister_memory_map(DMADevice *dev,
> + target_phys_addr_t paddr,
> + dma_addr_t len)
> +{
> + DMAMemoryMap *map;
> +
> + QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
> + if (map->paddr == paddr && map->len == len) {
> + QLIST_REMOVE(map, list);
> + free(map);
> + }
> + }
> +}
> +
> +void dma_invalidate_memory_range(DMADevice *dev,
> + dma_addr_t addr,
> + dma_addr_t len)
> +{
> + DMAMemoryMap *map;
> +
> + QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
> + if (ranges_overlap(addr, len, map->addr, map->len)) {
> + map->invalidate(map->invalidate_opaque);
> + QLIST_REMOVE(map, list);
> + free(map);
> + }
> + }
> +}
> +
> +void *dma_memory_map(DMADevice *dev,
> + DMAInvalidateMapFunc *cb,
> + void *opaque,
> + dma_addr_t addr,
> + dma_addr_t *len,
> + int is_write)
> +{
> + int err;
> + target_phys_addr_t paddr, plen;
> +
> + if (!dev || !dev->mmu) {
> + return cpu_physical_memory_map(addr, len, is_write);
> + }
> +
> + plen = *len;
> + err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
> + if (err) {
> + return NULL;
> + }
> +
> + /*
> + * If this is true, the virtual region is contiguous,
> + * but the translated physical region isn't. We just
> + * clamp *len, much like cpu_physical_memory_map() does.
> + */
> + if (plen < *len) {
> + *len = plen;
> + }
> +
> + /* We treat maps as remote TLBs to cope with stuff like AIO. */
> + if (cb) {
> + dma_register_memory_map(dev, addr, *len, paddr, cb, opaque);
> + }
> +
> + return cpu_physical_memory_map(paddr, len, is_write);
> +}
> +
> +void dma_memory_unmap(DMADevice *dev,
> + void *buffer,
> + dma_addr_t len,
> + int is_write,
> + dma_addr_t access_len)
> +{
> + cpu_physical_memory_unmap(buffer, len, is_write, access_len);
> + if (dev && dev->mmu) {
> + dma_unregister_memory_map(dev, (target_phys_addr_t) buffer, len);
> + }
> +}
> +
> diff --git a/hw/dma_rw.h b/hw/dma_rw.h
> new file mode 100644
> index 0000000..bc93511
> --- /dev/null
> +++ b/hw/dma_rw.h
> @@ -0,0 +1,157 @@
> +#ifndef DMA_RW_H
> +#define DMA_RW_H
> +
> +#include "qemu-common.h"
> +
> +typedef uint64_t dma_addr_t;
> +
> +typedef struct DMAMmu DMAMmu;
> +typedef struct DMADevice DMADevice;
> +typedef struct DMAMemoryMap DMAMemoryMap;
> +
> +typedef int DMATranslateFunc(DMADevice *dev,
> + dma_addr_t addr,
> + dma_addr_t *paddr,
> + dma_addr_t *len,
> + int is_write);
> +
> +typedef void DMAInvalidateMapFunc(void *);
> +
> +struct DMAMmu {
> + DeviceState *iommu;
> + DMATranslateFunc *translate;
> + QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps;
> +};
> +
> +struct DMADevice {
> + DMAMmu *mmu;
> +};
> +
> +struct DMAMemoryMap {
> + dma_addr_t addr;
> + dma_addr_t len;
> + target_phys_addr_t paddr;
> + DMAInvalidateMapFunc *invalidate;
> + void *invalidate_opaque;
> +
> + QLIST_ENTRY(DMAMemoryMap) list;
> +};
> +
> +static inline void dma_memory_rw(DMADevice *dev,
> + dma_addr_t addr,
> + void *buf,
> + dma_addr_t len,
> + int is_write)
> +{
> + dma_addr_t paddr, plen;
> + int err;
> +
> + /*
> + * Fast-path non-iommu.
> + * More importantly, makes it obvious what this function does.
> + */
> + if (!dev || !dev->mmu) {
> + cpu_physical_memory_rw(addr, buf, plen, is_write);
> + return;
> + }
> +
> + while (len) {
> + err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
> + if (err) {
> + return;
> + }
> +
> + /* The translation might be valid for larger regions. */
> + if (plen > len) {
> + plen = len;
> + }
> +
> + cpu_physical_memory_rw(paddr, buf, plen, is_write);
> +
> + len -= plen;
> + addr += plen;
> + buf += plen;
> + }
> +}
> +
> +static inline void dma_memory_read(DMADevice *dev,
> + dma_addr_t addr,
> + void *buf,
> + dma_addr_t len)
> +{
> + dma_memory_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline void dma_memory_write(DMADevice *dev,
> + dma_addr_t addr,
> + const void *buf,
> + dma_addr_t len)
> +{
> + dma_memory_rw(dev, addr, (void *) buf, len, 1);
> +}
> +
> +void *dma_memory_map(DMADevice *dev,
> + DMAInvalidateMapFunc *cb,
> + void *opaque,
> + dma_addr_t addr,
> + dma_addr_t *len,
> + int is_write);
> +void dma_memory_unmap(DMADevice *dev,
> + void *buffer,
> + dma_addr_t len,
> + int is_write,
> + dma_addr_t access_len);
> +
> +
> +void dma_invalidate_memory_range(DMADevice *dev,
> + dma_addr_t addr,
> + dma_addr_t len);
> +
> +
> +#define DEFINE_DMA_LD(suffix, size) \
> +static inline uint##size##_t \
> +dma_ld##suffix(DMADevice *dev, dma_addr_t addr) \
> +{ \
> + int err; \
> + dma_addr_t paddr, plen; \
> + \
> + if (!dev || !dev->mmu) { \
> + return ld##suffix##_phys(addr); \
> + } \
> + \
> + err = dev->mmu->translate(dev, addr, &paddr, &plen, 0); \
> + if (err || (plen < size / 8)) \
If the access is unaligned and the translation splits it to two (for
example, because of page boundary), the access is ignored, which can't
be correct.
Do we have such cases? If yes, should this be handled by the caller
instead (maybe not)?