From 9f9ff20d46c6728b092f34b6a642e1e81ab5e254 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 14 Aug 2010 11:01:45 +0200
Subject: [PATCH 01/18] dma/shdma: move dereference below the NULL check

"param" can be NULL here, so only dereference it after the check.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/shdma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index fb64cf36ba61..eb6b54dbb806 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -580,7 +580,6 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 
 	sh_chan = to_sh_chan(chan);
 	param = chan->private;
-	slave_addr = param->config->addr;
 
 	/* Someone calling slave DMA on a public channel? */
 	if (!param || !sg_len) {
@@ -589,6 +588,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 		return NULL;
 	}
 
+	slave_addr = param->config->addr;
+
 	/*
 	 * if (param != NULL), this is a successfully requested slave channel,
 	 * therefore param->config != NULL too.

From d3f3cf859db17cc5f8156c5bfcd032413e44483b Mon Sep 17 00:00:00 2001
From: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Date: Sat, 14 Aug 2010 15:02:44 +0200
Subject: [PATCH 02/18] missing inline keyword for static function in
 linux/dmaengine.h

Add a missing inline keyword for static function in linux/dmaengine.h to
avoid duplicate symbol definitions.

Signed-off-by: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c61d4ca27bcc..e2106495cc11 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
 	return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
 }
 
-static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma)
 {
 	return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
 }

From cc60f8878eab892c03d06b10f389232b9b66bd83 Mon Sep 17 00:00:00 2001
From: Simon Guinot <sguinot@lacie.com>
Date: Fri, 17 Sep 2010 23:33:51 +0200
Subject: [PATCH 03/18] dmaengine: fix interrupt clearing for mv_xor

When using simultaneously the two DMA channels on a same engine, some
transfers are never completed. For example, an endless lock can occur
while writing heavily on a RAID5 array (with async-tx offload support
enabled).

Note that this issue can also be reproduced by using the DMA test
client.

On a same engine, the interrupt cause register is shared between two
DMA channels. This patch make sure that the cause bit is only cleared
for the requested channel.

Signed-off-by: Simon Guinot <sguinot@lacie.com>
Tested-by: Luc Saillard <luc@saillard.org>
Acked-by: saeed bishara <saeed.bishara@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/mv_xor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 86c5ae9fde34..411d5bf50fc4 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -162,7 +162,7 @@ static int mv_is_err_intr(u32 intr_cause)
 
 static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
 {
-	u32 val = (1 << (1 + (chan->idx * 16)));
+	u32 val = ~(1 << (chan->idx * 16));
 	dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
 	__raw_writel(val, XOR_INTR_CAUSE(chan));
 }

From e8689e63d4d2046079f2db9d494ac05c6885ac0c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 28 Sep 2010 15:57:37 +0200
Subject: [PATCH 04/18] dmaengine: driver for the ARM PL080/PL081 PrimeCells v5

This creates a DMAengine driver for the ARM PL080/PL081 PrimeCells
based on the implementation earlier submitted by Peter Pearse.
This is working like a charm for memcpy and slave DMA to the PL011
PrimeCell on the PB11MPCore.

This DMA controller is used in mostly unmodified form in the ARM
RealView and Versatile platforms, in the ST-Ericsson Nomadik, and
in the ST SPEAr platform.

It has been converted to use the header from the Samsung PL080
derivate instead of its own defintions. The Samsungs have a custom
driver in their mach-* folders though, atleast we can share the
register definitions.

Cc: Peter Pearse <peter.pearse@arm.com>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Alessandro Rubini <rubini@unipv.it>
Acked-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
[GFP_KERNEL to GFP_NOWAIT in pl08x_prep_dma_memcpy]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/Kconfig        |    8 +
 drivers/dma/Makefile       |    1 +
 drivers/dma/amba-pl08x.c   | 2167 ++++++++++++++++++++++++++++++++++++
 include/linux/amba/pl08x.h |  222 ++++
 4 files changed, 2398 insertions(+)
 create mode 100644 drivers/dma/amba-pl08x.c
 create mode 100644 include/linux/amba/pl08x.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 9520cf02edc8..f82ef10a8361 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -49,6 +49,14 @@ config INTEL_MID_DMAC
 config ASYNC_TX_DISABLE_CHANNEL_SWITCH
 	bool
 
+config AMBA_PL08X
+	bool "ARM PrimeCell PL080 or PL081 support"
+	depends on ARM_AMBA && EXPERIMENTAL
+	select DMA_ENGINE
+	help
+	  Platform has a PL08x DMAC device
+	  which can provide DMA engine support
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 72bd70384d8a..0b690e7e4384 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
+obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
new file mode 100644
index 000000000000..b605cc9ac3a2
--- /dev/null
+++ b/drivers/dma/amba-pl08x.c
@@ -0,0 +1,2167 @@
+/*
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is iin this distribution in the
+ * file called COPYING.
+ *
+ * Documentation: ARM DDI 0196G == PL080
+ * Documentation: ARM DDI 0218E	== PL081
+ *
+ * PL080 & PL081 both have 16 sets of DMA signals that can be routed to
+ * any channel.
+ *
+ * The PL080 has 8 channels available for simultaneous use, and the PL081
+ * has only two channels. So on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * The PL080 has a dual bus master, PL081 has a single master.
+ *
+ * Memory to peripheral transfer may be visualized as
+ *	Get data from memory to DMAC
+ *	Until no data left
+ *		On burst request from peripheral
+ *			Destination burst from DMAC to peripheral
+ *			Clear burst request
+ *	Raise terminal count interrupt
+ *
+ * For peripherals with a FIFO:
+ * Source      burst size == half the depth of the peripheral FIFO
+ * Destination burst size == the depth of the peripheral FIFO
+ *
+ * (Bursts are irrelevant for mem to mem transfers - there are no burst
+ * signals, the DMA controller will simply facilitate its AHB master.)
+ *
+ * ASSUMES default (little) endianness for DMA transfers
+ *
+ * Only DMAC flow control is implemented
+ *
+ * Global TODO:
+ * - Break out common code from arch/arm/mach-s3c64xx and share
+ */
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/dmapool.h>
+#include <linux/amba/bus.h>
+#include <linux/dmaengine.h>
+#include <linux/amba/pl08x.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/hardware/pl080.h>
+#include <asm/dma.h>
+#include <asm/mach/dma.h>
+#include <asm/atomic.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+
+#define DRIVER_NAME	"pl08xdmac"
+
+/**
+ * struct vendor_data - vendor-specific config parameters
+ * for PL08x derivates
+ * @name: the name of this specific variant
+ * @channels: the number of channels available in this variant
+ * @dualmaster: whether this version supports dual AHB masters
+ * or not.
+ */
+struct vendor_data {
+	char *name;
+	u8 channels;
+	bool dualmaster;
+};
+
+/*
+ * PL08X private data structures
+ * An LLI struct - see pl08x TRM
+ * Note that next uses bit[0] as a bus bit,
+ * start & end do not - their bus bit info
+ * is in cctl
+ */
+struct lli {
+	dma_addr_t src;
+	dma_addr_t dst;
+	dma_addr_t next;
+	u32 cctl;
+};
+
+/**
+ * struct pl08x_driver_data - the local state holder for the PL08x
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @base: virtual memory base (remapped) for the PL08x
+ * @adev: the corresponding AMBA (PrimeCell) bus entry
+ * @vd: vendor data for this PL08x variant
+ * @pd: platform data passed in from the platform/machine
+ * @phy_chans: array of data for the physical channels
+ * @pool: a pool for the LLI descriptors
+ * @pool_ctr: counter of LLIs in the pool
+ * @lock: a spinlock for this struct
+ */
+struct pl08x_driver_data {
+	struct dma_device slave;
+	struct dma_device memcpy;
+	void __iomem *base;
+	struct amba_device *adev;
+	struct vendor_data *vd;
+	struct pl08x_platform_data *pd;
+	struct pl08x_phy_chan *phy_chans;
+	struct dma_pool *pool;
+	int pool_ctr;
+	spinlock_t lock;
+};
+
+/*
+ * PL08X specific defines
+ */
+
+/*
+ * Memory boundaries: the manual for PL08x says that the controller
+ * cannot read past a 1KiB boundary, so these defines are used to
+ * create transfer LLIs that do not cross such boundaries.
+ */
+#define PL08X_BOUNDARY_SHIFT		(10)	/* 1KB 0x400 */
+#define PL08X_BOUNDARY_SIZE		(1 << PL08X_BOUNDARY_SHIFT)
+
+/* Minimum period between work queue runs */
+#define PL08X_WQ_PERIODMIN	20
+
+/* Size (bytes) of each LLI buffer allocated for one transfer */
+# define PL08X_LLI_TSFR_SIZE	0x2000
+
+/* Maximimum times we call dma_pool_alloc on this pool without freeing */
+#define PL08X_MAX_ALLOCS	0x40
+#define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct lli))
+#define PL08X_ALIGN		8
+
+static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct pl08x_dma_chan, chan);
+}
+
+/*
+ * Physical channel handling
+ */
+
+/* Whether a certain channel is busy or not */
+static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
+{
+	unsigned int val;
+
+	val = readl(ch->base + PL080_CH_CONFIG);
+	return val & PL080_CONFIG_ACTIVE;
+}
+
+/*
+ * Set the initial DMA register values i.e. those for the first LLI
+ * The next lli pointer and the configuration interrupt bit have
+ * been set when the LLIs were constructed
+ */
+static void pl08x_set_cregs(struct pl08x_driver_data *pl08x,
+			    struct pl08x_phy_chan *ch)
+{
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(ch))
+		;
+
+	dev_vdbg(&pl08x->adev->dev,
+		"WRITE channel %d: csrc=%08x, cdst=%08x, "
+		 "cctl=%08x, clli=%08x, ccfg=%08x\n",
+		ch->id,
+		ch->csrc,
+		ch->cdst,
+		ch->cctl,
+		ch->clli,
+		ch->ccfg);
+
+	writel(ch->csrc, ch->base + PL080_CH_SRC_ADDR);
+	writel(ch->cdst, ch->base + PL080_CH_DST_ADDR);
+	writel(ch->clli, ch->base + PL080_CH_LLI);
+	writel(ch->cctl, ch->base + PL080_CH_CONTROL);
+	writel(ch->ccfg, ch->base + PL080_CH_CONFIG);
+}
+
+static inline void pl08x_config_phychan_for_txd(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_channel_data *cd = plchan->cd;
+	struct pl08x_phy_chan *phychan = plchan->phychan;
+	struct pl08x_txd *txd = plchan->at;
+
+	/* Copy the basic control register calculated at transfer config */
+	phychan->csrc = txd->csrc;
+	phychan->cdst = txd->cdst;
+	phychan->clli = txd->clli;
+	phychan->cctl = txd->cctl;
+
+	/* Assign the signal to the proper control registers */
+	phychan->ccfg = cd->ccfg;
+	phychan->ccfg &= ~PL080_CONFIG_SRC_SEL_MASK;
+	phychan->ccfg &= ~PL080_CONFIG_DST_SEL_MASK;
+	/* If it wasn't set from AMBA, ignore it */
+	if (txd->direction == DMA_TO_DEVICE)
+		/* Select signal as destination */
+		phychan->ccfg |=
+			(phychan->signal << PL080_CONFIG_DST_SEL_SHIFT);
+	else if (txd->direction == DMA_FROM_DEVICE)
+		/* Select signal as source */
+		phychan->ccfg |=
+			(phychan->signal << PL080_CONFIG_SRC_SEL_SHIFT);
+	/* Always enable error interrupts */
+	phychan->ccfg |= PL080_CONFIG_ERR_IRQ_MASK;
+	/* Always enable terminal interrupts */
+	phychan->ccfg |= PL080_CONFIG_TC_IRQ_MASK;
+}
+
+/*
+ * Enable the DMA channel
+ * Assumes all other configuration bits have been set
+ * as desired before this code is called
+ */
+static void pl08x_enable_phy_chan(struct pl08x_driver_data *pl08x,
+				  struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/*
+	 * Do not access config register until channel shows as disabled
+	 */
+	while (readl(pl08x->base + PL080_EN_CHAN) & (1 << ch->id))
+		;
+
+	/*
+	 * Do not access config register until channel shows as inactive
+	 */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
+		val = readl(ch->base + PL080_CH_CONFIG);
+
+	writel(val | PL080_CONFIG_ENABLE, ch->base + PL080_CH_CONFIG);
+}
+
+/*
+ * Overall DMAC remains enabled always.
+ *
+ * Disabling individual channels could lose data.
+ *
+ * Disable the peripheral DMA after disabling the DMAC
+ * in order to allow the DMAC FIFO to drain, and
+ * hence allow the channel to show inactive
+ *
+ */
+static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* Set the HALT bit and wait for the FIFO to drain */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val |= PL080_CONFIG_HALT;
+	writel(val, ch->base + PL080_CH_CONFIG);
+
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(ch))
+		;
+}
+
+static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* Clear the HALT bit */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val &= ~PL080_CONFIG_HALT;
+	writel(val, ch->base + PL080_CH_CONFIG);
+}
+
+
+/* Stops the channel */
+static void pl08x_stop_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	pl08x_pause_phy_chan(ch);
+
+	/* Disable channel */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val &= ~PL080_CONFIG_ENABLE;
+	val &= ~PL080_CONFIG_ERR_IRQ_MASK;
+	val &= ~PL080_CONFIG_TC_IRQ_MASK;
+	writel(val, ch->base + PL080_CH_CONFIG);
+}
+
+static inline u32 get_bytes_in_cctl(u32 cctl)
+{
+	/* The source width defines the number of bytes */
+	u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	case PL080_WIDTH_8BIT:
+		break;
+	case PL080_WIDTH_16BIT:
+		bytes *= 2;
+		break;
+	case PL080_WIDTH_32BIT:
+		bytes *= 4;
+		break;
+	}
+	return bytes;
+}
+
+/* The channel should be paused when calling this */
+static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_phy_chan *ch;
+	struct pl08x_txd *txdi = NULL;
+	struct pl08x_txd *txd;
+	unsigned long flags;
+	u32 bytes = 0;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+
+	ch = plchan->phychan;
+	txd = plchan->at;
+
+	/*
+	 * Next follow the LLIs to get the number of pending bytes in the
+	 * currently active transaction.
+	 */
+	if (ch && txd) {
+		struct lli *llis_va = txd->llis_va;
+		struct lli *llis_bus = (struct lli *) txd->llis_bus;
+		u32 clli = readl(ch->base + PL080_CH_LLI);
+
+		/* First get the bytes in the current active LLI */
+		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
+
+		if (clli) {
+			int i = 0;
+
+			/* Forward to the LLI pointed to by clli */
+			while ((clli != (u32) &(llis_bus[i])) &&
+			       (i < MAX_NUM_TSFR_LLIS))
+				i++;
+
+			while (clli) {
+				bytes += get_bytes_in_cctl(llis_va[i].cctl);
+				/*
+				 * A clli of 0x00000000 will terminate the
+				 * LLI list
+				 */
+				clli = llis_va[i].next;
+				i++;
+			}
+		}
+	}
+
+	/* Sum up all queued transactions */
+	if (!list_empty(&plchan->desc_list)) {
+		list_for_each_entry(txdi, &plchan->desc_list, node) {
+			bytes += txdi->len;
+		}
+
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return bytes;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ */
+static struct pl08x_phy_chan *
+pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
+		      struct pl08x_dma_chan *virt_chan)
+{
+	struct pl08x_phy_chan *ch = NULL;
+	unsigned long flags;
+	int i;
+
+	/*
+	 * Try to locate a physical channel to be used for
+	 * this transfer. If all are taken return NULL and
+	 * the requester will have to cope by using some fallback
+	 * PIO mode or retrying later.
+	 */
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+
+		if (!ch->serving) {
+			ch->serving = virt_chan;
+			ch->signal = -1;
+			spin_unlock_irqrestore(&ch->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	if (i == pl08x->vd->channels) {
+		/* No physical channel available, cope with it */
+		return NULL;
+	}
+
+	return ch;
+}
+
+static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x,
+					 struct pl08x_phy_chan *ch)
+{
+	unsigned long flags;
+
+	/* Stop the channel and clear its interrupts */
+	pl08x_stop_phy_chan(ch);
+	writel((1 << ch->id), pl08x->base + PL080_ERR_CLEAR);
+	writel((1 << ch->id), pl08x->base + PL080_TC_CLEAR);
+
+	/* Mark it as free */
+	spin_lock_irqsave(&ch->lock, flags);
+	ch->serving = NULL;
+	spin_unlock_irqrestore(&ch->lock, flags);
+}
+
+/*
+ * LLI handling
+ */
+
+static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
+{
+	switch (coded) {
+	case PL080_WIDTH_8BIT:
+		return 1;
+	case PL080_WIDTH_16BIT:
+		return 2;
+	case PL080_WIDTH_32BIT:
+		return 4;
+	default:
+		break;
+	}
+	BUG();
+	return 0;
+}
+
+static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
+				  u32 tsize)
+{
+	u32 retbits = cctl;
+
+	/* Remove all src, dst and transfersize bits */
+	retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+	retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+	retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+	/* Then set the bits according to the parameters */
+	switch (srcwidth) {
+	case 1:
+		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	case 2:
+		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	case 4:
+		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	switch (dstwidth) {
+	case 1:
+		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 2:
+		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 4:
+		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
+	return retbits;
+}
+
+/*
+ * Autoselect a master bus to use for the transfer
+ * this prefers the destination bus if both available
+ * if fixed address on one bus the other will be chosen
+ */
+void pl08x_choose_master_bus(struct pl08x_bus_data *src_bus,
+	struct pl08x_bus_data *dst_bus, struct pl08x_bus_data **mbus,
+	struct pl08x_bus_data **sbus, u32 cctl)
+{
+	if (!(cctl & PL080_CONTROL_DST_INCR)) {
+		*mbus = src_bus;
+		*sbus = dst_bus;
+	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
+		*mbus = dst_bus;
+		*sbus = src_bus;
+	} else {
+		if (dst_bus->buswidth == 4) {
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		} else if (src_bus->buswidth == 4) {
+			*mbus = src_bus;
+			*sbus = dst_bus;
+		} else if (dst_bus->buswidth == 2) {
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		} else if (src_bus->buswidth == 2) {
+			*mbus = src_bus;
+			*sbus = dst_bus;
+		} else {
+			/* src_bus->buswidth == 1 */
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		}
+	}
+}
+
+/*
+ * Fills in one LLI for a certain transfer descriptor
+ * and advance the counter
+ */
+int pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
+			    struct pl08x_txd *txd, int num_llis, int len,
+			    u32 cctl, u32 *remainder)
+{
+	struct lli *llis_va = txd->llis_va;
+	struct lli *llis_bus = (struct lli *) txd->llis_bus;
+
+	BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
+
+	llis_va[num_llis].cctl		= cctl;
+	llis_va[num_llis].src		= txd->srcbus.addr;
+	llis_va[num_llis].dst		= txd->dstbus.addr;
+
+	/*
+	 * On versions with dual masters, you can optionally AND on
+	 * PL080_LLI_LM_AHB2 to the LLI to tell the hardware to read
+	 * in new LLIs with that controller, but we always try to
+	 * choose AHB1 to point into memory. The idea is to have AHB2
+	 * fixed on the peripheral and AHB1 messing around in the
+	 * memory. So we don't manipulate this bit currently.
+	 */
+
+	llis_va[num_llis].next =
+		(dma_addr_t)((u32) &(llis_bus[num_llis + 1]));
+
+	if (cctl & PL080_CONTROL_SRC_INCR)
+		txd->srcbus.addr += len;
+	if (cctl & PL080_CONTROL_DST_INCR)
+		txd->dstbus.addr += len;
+
+	*remainder -= len;
+
+	return num_llis + 1;
+}
+
+/*
+ * Return number of bytes to fill to boundary, or len
+ */
+static inline u32 pl08x_pre_boundary(u32 addr, u32 len)
+{
+	u32 boundary;
+
+	boundary = ((addr >> PL08X_BOUNDARY_SHIFT) + 1)
+		<< PL08X_BOUNDARY_SHIFT;
+
+	if (boundary < addr + len)
+		return boundary - addr;
+	else
+		return len;
+}
+
+/*
+ * This fills in the table of LLIs for the transfer descriptor
+ * Note that we assume we never have to change the burst sizes
+ * Return 0 for error
+ */
+static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
+			      struct pl08x_txd *txd)
+{
+	struct pl08x_channel_data *cd = txd->cd;
+	struct pl08x_bus_data *mbus, *sbus;
+	u32 remainder;
+	int num_llis = 0;
+	u32 cctl;
+	int max_bytes_per_lli;
+	int total_bytes = 0;
+	struct lli *llis_va;
+	struct lli *llis_bus;
+
+	if (!txd) {
+		dev_err(&pl08x->adev->dev, "%s no descriptor\n", __func__);
+		return 0;
+	}
+
+	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT,
+				      &txd->llis_bus);
+	if (!txd->llis_va) {
+		dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__);
+		return 0;
+	}
+
+	pl08x->pool_ctr++;
+
+	/*
+	 * Initialize bus values for this transfer
+	 * from the passed optimal values
+	 */
+	if (!cd) {
+		dev_err(&pl08x->adev->dev, "%s no channel data\n", __func__);
+		return 0;
+	}
+
+	/* Get the default CCTL from the platform data */
+	cctl = cd->cctl;
+
+	/*
+	 * On the PL080 we have two bus masters and we
+	 * should select one for source and one for
+	 * destination. We try to use AHB2 for the
+	 * bus which does not increment (typically the
+	 * peripheral) else we just choose something.
+	 */
+	cctl &= ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
+	if (pl08x->vd->dualmaster) {
+		if (cctl & PL080_CONTROL_SRC_INCR)
+			/* Source increments, use AHB2 for destination */
+			cctl |= PL080_CONTROL_DST_AHB2;
+		else if (cctl & PL080_CONTROL_DST_INCR)
+			/* Destination increments, use AHB2 for source */
+			cctl |= PL080_CONTROL_SRC_AHB2;
+		else
+			/* Just pick something, source AHB1 dest AHB2 */
+			cctl |= PL080_CONTROL_DST_AHB2;
+	}
+
+	/* Find maximum width of the source bus */
+	txd->srcbus.maxwidth =
+		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >>
+				       PL080_CONTROL_SWIDTH_SHIFT);
+
+	/* Find maximum width of the destination bus */
+	txd->dstbus.maxwidth =
+		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >>
+				       PL080_CONTROL_DWIDTH_SHIFT);
+
+	/* Set up the bus widths to the maximum */
+	txd->srcbus.buswidth = txd->srcbus.maxwidth;
+	txd->dstbus.buswidth = txd->dstbus.maxwidth;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s source bus is %d bytes wide, dest bus is %d bytes wide\n",
+		 __func__, txd->srcbus.buswidth, txd->dstbus.buswidth);
+
+
+	/*
+	 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
+	 */
+	max_bytes_per_lli = min(txd->srcbus.buswidth, txd->dstbus.buswidth) *
+		PL080_CONTROL_TRANSFER_SIZE_MASK;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s max bytes per lli = %d\n",
+		 __func__, max_bytes_per_lli);
+
+	/* We need to count this down to zero */
+	remainder = txd->len;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s remainder = %d\n",
+		 __func__, remainder);
+
+	/*
+	 * Choose bus to align to
+	 * - prefers destination bus if both available
+	 * - if fixed address on one bus chooses other
+	 * - modifies cctl to choose an apropriate master
+	 */
+	pl08x_choose_master_bus(&txd->srcbus, &txd->dstbus,
+				&mbus, &sbus, cctl);
+
+
+	/*
+	 * The lowest bit of the LLI register
+	 * is also used to indicate which master to
+	 * use for reading the LLIs.
+	 */
+
+	if (txd->len < mbus->buswidth) {
+		/*
+		 * Less than a bus width available
+		 * - send as single bytes
+		 */
+		while (remainder) {
+			dev_vdbg(&pl08x->adev->dev,
+				 "%s single byte LLIs for a transfer of "
+				 "less than a bus width (remain %08x)\n",
+				 __func__, remainder);
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			num_llis =
+				pl08x_fill_lli_for_desc(pl08x, txd, num_llis, 1,
+					cctl, &remainder);
+			total_bytes++;
+		}
+	} else {
+		/*
+		 *  Make one byte LLIs until master bus is aligned
+		 *  - slave will then be aligned also
+		 */
+		while ((mbus->addr) % (mbus->buswidth)) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%s adjustment lli for less than bus width "
+				 "(remain %08x)\n",
+				 __func__, remainder);
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			num_llis = pl08x_fill_lli_for_desc
+				(pl08x, txd, num_llis, 1, cctl, &remainder);
+			total_bytes++;
+		}
+
+		/*
+		 *  Master now aligned
+		 * - if slave is not then we must set its width down
+		 */
+		if (sbus->addr % sbus->buswidth) {
+			dev_dbg(&pl08x->adev->dev,
+				"%s set down bus width to one byte\n",
+				 __func__);
+
+			sbus->buswidth = 1;
+		}
+
+		/*
+		 * Make largest possible LLIs until less than one bus
+		 * width left
+		 */
+		while (remainder > (mbus->buswidth - 1)) {
+			int lli_len, target_len;
+			int tsize;
+			int odd_bytes;
+
+			/*
+			 * If enough left try to send max possible,
+			 * otherwise try to send the remainder
+			 */
+			target_len = remainder;
+			if (remainder > max_bytes_per_lli)
+				target_len = max_bytes_per_lli;
+
+			/*
+			 * Set bus lengths for incrementing busses
+			 * to number of bytes which fill to next memory
+			 * boundary
+			 */
+			if (cctl & PL080_CONTROL_SRC_INCR)
+				txd->srcbus.fill_bytes =
+					pl08x_pre_boundary(
+						txd->srcbus.addr,
+						remainder);
+			else
+				txd->srcbus.fill_bytes =
+					max_bytes_per_lli;
+
+			if (cctl & PL080_CONTROL_DST_INCR)
+				txd->dstbus.fill_bytes =
+					pl08x_pre_boundary(
+						txd->dstbus.addr,
+						remainder);
+			else
+				txd->dstbus.fill_bytes =
+						max_bytes_per_lli;
+
+			/*
+			 *  Find the nearest
+			 */
+			lli_len	= min(txd->srcbus.fill_bytes,
+				txd->dstbus.fill_bytes);
+
+			BUG_ON(lli_len > remainder);
+
+			if (lli_len <= 0) {
+				dev_err(&pl08x->adev->dev,
+					"%s lli_len is %d, <= 0\n",
+						__func__, lli_len);
+				return 0;
+			}
+
+			if (lli_len == target_len) {
+				/*
+				 * Can send what we wanted
+				 */
+				/*
+				 *  Maintain alignment
+				 */
+				lli_len	= (lli_len/mbus->buswidth) *
+							mbus->buswidth;
+				odd_bytes = 0;
+			} else {
+				/*
+				 * So now we know how many bytes to transfer
+				 * to get to the nearest boundary
+				 * The next lli will past the boundary
+				 * - however we may be working to a boundary
+				 *   on the slave bus
+				 *   We need to ensure the master stays aligned
+				 */
+				odd_bytes = lli_len % mbus->buswidth;
+				/*
+				 * - and that we are working in multiples
+				 *   of the bus widths
+				 */
+				lli_len -= odd_bytes;
+
+			}
+
+			if (lli_len) {
+				/*
+				 * Check against minimum bus alignment:
+				 * Calculate actual transfer size in relation
+				 * to bus width an get a maximum remainder of
+				 * the smallest bus width - 1
+				 */
+				/* FIXME: use round_down()? */
+				tsize = lli_len / min(mbus->buswidth,
+						      sbus->buswidth);
+				lli_len	= tsize * min(mbus->buswidth,
+						      sbus->buswidth);
+
+				if (target_len != lli_len) {
+					dev_vdbg(&pl08x->adev->dev,
+					"%s can't send what we want. Desired %08x, lli of %08x bytes in txd of %08x\n",
+					__func__, target_len, lli_len, txd->len);
+				}
+
+				cctl = pl08x_cctl_bits(cctl,
+						       txd->srcbus.buswidth,
+						       txd->dstbus.buswidth,
+						       tsize);
+
+				dev_vdbg(&pl08x->adev->dev,
+					"%s fill lli with single lli chunk of size %08x (remainder %08x)\n",
+					__func__, lli_len, remainder);
+				num_llis = pl08x_fill_lli_for_desc(pl08x, txd,
+						num_llis, lli_len, cctl,
+						&remainder);
+				total_bytes += lli_len;
+			}
+
+
+			if (odd_bytes) {
+				/*
+				 * Creep past the boundary,
+				 * maintaining master alignment
+				 */
+				int j;
+				for (j = 0; (j < mbus->buswidth)
+						&& (remainder); j++) {
+					cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+					dev_vdbg(&pl08x->adev->dev,
+						"%s align with boundardy, single byte (remain %08x)\n",
+						__func__, remainder);
+					num_llis =
+						pl08x_fill_lli_for_desc(pl08x,
+							txd, num_llis, 1,
+							cctl, &remainder);
+					total_bytes++;
+				}
+			}
+		}
+
+		/*
+		 * Send any odd bytes
+		 */
+		if (remainder < 0) {
+			dev_err(&pl08x->adev->dev, "%s remainder not fitted 0x%08x bytes\n",
+					__func__, remainder);
+			return 0;
+		}
+
+		while (remainder) {
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			dev_vdbg(&pl08x->adev->dev,
+				"%s align with boundardy, single odd byte (remain %d)\n",
+				__func__, remainder);
+			num_llis = pl08x_fill_lli_for_desc(pl08x, txd, num_llis,
+					1, cctl, &remainder);
+			total_bytes++;
+		}
+	}
+	if (total_bytes != txd->len) {
+		dev_err(&pl08x->adev->dev,
+			"%s size of encoded lli:s don't match total txd, transferred 0x%08x from size 0x%08x\n",
+			__func__, total_bytes, txd->len);
+		return 0;
+	}
+
+	if (num_llis >= MAX_NUM_TSFR_LLIS) {
+		dev_err(&pl08x->adev->dev,
+			"%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n",
+			__func__, (u32) MAX_NUM_TSFR_LLIS);
+		return 0;
+	}
+	/*
+	 * Decide whether this is a loop or a terminated transfer
+	 */
+	llis_va = txd->llis_va;
+	llis_bus = (struct lli *) txd->llis_bus;
+
+	if (cd->circular_buffer) {
+		/*
+		 * Loop the circular buffer so that the next element
+		 * points back to the beginning of the LLI.
+		 */
+		llis_va[num_llis - 1].next =
+			(dma_addr_t)((unsigned int)&(llis_bus[0]));
+	} else {
+		/*
+		 * On non-circular buffers, the final LLI terminates
+		 * the LLI.
+		 */
+		llis_va[num_llis - 1].next = 0;
+		/*
+		 * The final LLI element shall also fire an interrupt
+		 */
+		llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN;
+	}
+
+	/* Now store the channel register values */
+	txd->csrc = llis_va[0].src;
+	txd->cdst = llis_va[0].dst;
+	if (num_llis > 1)
+		txd->clli = llis_va[0].next;
+	else
+		txd->clli = 0;
+
+	txd->cctl = llis_va[0].cctl;
+	/* ccfg will be set at physical channel allocation time */
+
+#ifdef VERBOSE_DEBUG
+	{
+		int i;
+
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				 "lli %d @%p: csrc=%08x, cdst=%08x, cctl=%08x, clli=%08x\n",
+				 i,
+				 &llis_va[i],
+				 llis_va[i].src,
+				 llis_va[i].dst,
+				 llis_va[i].cctl,
+				 llis_va[i].next
+				);
+		}
+	}
+#endif
+
+	return num_llis;
+}
+
+/* You should call this with the struct pl08x lock held */
+static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
+			   struct pl08x_txd *txd)
+{
+	if (!txd)
+		dev_err(&pl08x->adev->dev,
+			"%s no descriptor to free\n",
+			__func__);
+
+	/* Free the LLI */
+	dma_pool_free(pl08x->pool, txd->llis_va,
+		      txd->llis_bus);
+
+	pl08x->pool_ctr--;
+
+	kfree(txd);
+}
+
+static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
+				struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_txd *txdi = NULL;
+	struct pl08x_txd *next;
+
+	if (!list_empty(&plchan->desc_list)) {
+		list_for_each_entry_safe(txdi,
+					 next, &plchan->desc_list, node) {
+			list_del(&txdi->node);
+			pl08x_free_txd(pl08x, txdi);
+		}
+
+	}
+}
+
+/*
+ * The DMA ENGINE API
+ */
+static int pl08x_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void pl08x_free_chan_resources(struct dma_chan *chan)
+{
+}
+
+/*
+ * This should be called with the channel plchan->lock held
+ */
+static int prep_phy_channel(struct pl08x_dma_chan *plchan,
+			    struct pl08x_txd *txd)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_phy_chan *ch;
+	int ret;
+
+	/* Check if we already have a channel */
+	if (plchan->phychan)
+		return 0;
+
+	ch = pl08x_get_phy_channel(pl08x, plchan);
+	if (!ch) {
+		/* No physical channel available, cope with it */
+		dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name);
+		return -EBUSY;
+	}
+
+	/*
+	 * OK we have a physical channel: for memcpy() this is all we
+	 * need, but for slaves the physical signals may be muxed!
+	 * Can the platform allow us to use this channel?
+	 */
+	if (plchan->slave &&
+	    ch->signal < 0 &&
+	    pl08x->pd->get_signal) {
+		ret = pl08x->pd->get_signal(plchan);
+		if (ret < 0) {
+			dev_dbg(&pl08x->adev->dev,
+				"unable to use physical channel %d for transfer on %s due to platform restrictions\n",
+				ch->id, plchan->name);
+			/* Release physical channel & return */
+			pl08x_put_phy_channel(pl08x, ch);
+			return -EBUSY;
+		}
+		ch->signal = ret;
+	}
+
+	dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n",
+		 ch->id,
+		 ch->signal,
+		 plchan->name);
+
+	plchan->phychan = ch;
+
+	return 0;
+}
+
+static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan);
+
+	atomic_inc(&plchan->last_issued);
+	tx->cookie = atomic_read(&plchan->last_issued);
+	/* This unlock follows the lock in the prep() function */
+	spin_unlock_irqrestore(&plchan->lock, plchan->lockflags);
+
+	return tx->cookie;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt(
+		struct dma_chan *chan, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *retval = NULL;
+
+	return retval;
+}
+
+/*
+ * Code accessing dma_async_is_complete() in a tight loop
+ * may give problems - could schedule where indicated.
+ * If slaves are relying on interrupts to signal completion this
+ * function must not be called with interrupts disabled
+ */
+static enum dma_status
+pl08x_dma_tx_status(struct dma_chan *chan,
+		    dma_cookie_t cookie,
+		    struct dma_tx_state *txstate)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+	enum dma_status ret;
+	u32 bytesleft = 0;
+
+	last_used = atomic_read(&plchan->last_issued);
+	last_complete = plchan->lc;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret == DMA_SUCCESS) {
+		dma_set_tx_state(txstate, last_complete, last_used, 0);
+		return ret;
+	}
+
+	/*
+	 * schedule(); could be inserted here
+	 */
+
+	/*
+	 * This cookie not complete yet
+	 */
+	last_used = atomic_read(&plchan->last_issued);
+	last_complete = plchan->lc;
+
+	/* Get number of bytes left in the active transactions and queue */
+	bytesleft = pl08x_getbytes_chan(plchan);
+
+	dma_set_tx_state(txstate, last_complete, last_used,
+			 bytesleft);
+
+	if (plchan->state == PL08X_CHAN_PAUSED)
+		return DMA_PAUSED;
+
+	/* Whether waiting or running, we're in progress */
+	return DMA_IN_PROGRESS;
+}
+
+/* PrimeCell DMA extension */
+struct burst_table {
+	int burstwords;
+	u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+	{
+		.burstwords = 256,
+		.reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 128,
+		.reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 64,
+		.reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 32,
+		.reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 16,
+		.reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 8,
+		.reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 4,
+		.reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 1,
+		.reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+};
+
+static void dma_set_runtime_config(struct dma_chan *chan,
+			       struct dma_slave_config *config)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_channel_data *cd = plchan->cd;
+	enum dma_slave_buswidth addr_width;
+	u32 maxburst;
+	u32 cctl = 0;
+	/* Mask out all except src and dst channel */
+	u32 ccfg = cd->ccfg & 0x000003DEU;
+	int i = 0;
+
+	/* Transfer direction */
+	plchan->runtime_direction = config->direction;
+	if (config->direction == DMA_TO_DEVICE) {
+		plchan->runtime_addr = config->dst_addr;
+		cctl |= PL080_CONTROL_SRC_INCR;
+		ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		addr_width = config->dst_addr_width;
+		maxburst = config->dst_maxburst;
+	} else if (config->direction == DMA_FROM_DEVICE) {
+		plchan->runtime_addr = config->src_addr;
+		cctl |= PL080_CONTROL_DST_INCR;
+		ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		addr_width = config->src_addr_width;
+		maxburst = config->src_maxburst;
+	} else {
+		dev_err(&pl08x->adev->dev,
+			"bad runtime_config: alien transfer direction\n");
+		return;
+	}
+
+	switch (addr_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	default:
+		dev_err(&pl08x->adev->dev,
+			"bad runtime_config: alien address width\n");
+		return;
+	}
+
+	/*
+	 * Now decide on a maxburst:
+	 * If this channel will only request single transfers, set
+	 * this down to ONE element.
+	 */
+	if (plchan->cd->single) {
+		cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT);
+	} else {
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burstwords <= maxburst)
+				break;
+			i++;
+		}
+		cctl |= burst_sizes[i].reg;
+	}
+
+	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
+	cctl &= ~PL080_CONTROL_PROT_MASK;
+	cctl |= PL080_CONTROL_PROT_SYS;
+
+	/* Modify the default channel data to fit PrimeCell request */
+	cd->cctl = cctl;
+	cd->ccfg = ccfg;
+
+	dev_dbg(&pl08x->adev->dev,
+		"configured channel %s (%s) for %s, data width %d, "
+		"maxburst %d words, LE, CCTL=%08x, CCFG=%08x\n",
+		dma_chan_name(chan), plchan->name,
+		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		addr_width,
+		maxburst,
+		cctl, ccfg);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void pl08x_issue_pending(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+	/* Something is already active */
+	if (plchan->at) {
+			spin_unlock_irqrestore(&plchan->lock, flags);
+			return;
+	}
+
+	/* Didn't get a physical channel so waiting for it ... */
+	if (plchan->state == PL08X_CHAN_WAITING)
+		return;
+
+	/* Take the first element in the queue and execute it */
+	if (!list_empty(&plchan->desc_list)) {
+		struct pl08x_txd *next;
+
+		next = list_first_entry(&plchan->desc_list,
+					struct pl08x_txd,
+					node);
+		list_del(&next->node);
+		plchan->at = next;
+		plchan->state = PL08X_CHAN_RUNNING;
+
+		/* Configure the physical channel for the active txd */
+		pl08x_config_phychan_for_txd(plchan);
+		pl08x_set_cregs(pl08x, plchan->phychan);
+		pl08x_enable_phy_chan(pl08x, plchan->phychan);
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+}
+
+static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
+					struct pl08x_txd *txd)
+{
+	int num_llis;
+	struct pl08x_driver_data *pl08x = plchan->host;
+	int ret;
+
+	num_llis = pl08x_fill_llis_for_desc(pl08x, txd);
+
+	if (!num_llis)
+		return -EINVAL;
+
+	spin_lock_irqsave(&plchan->lock, plchan->lockflags);
+
+	/*
+	 * If this device is not using a circular buffer then
+	 * queue this new descriptor for transfer.
+	 * The descriptor for a circular buffer continues
+	 * to be used until the channel is freed.
+	 */
+	if (txd->cd->circular_buffer)
+		dev_err(&pl08x->adev->dev,
+			"%s attempting to queue a circular buffer\n",
+			__func__);
+	else
+		list_add_tail(&txd->node,
+			      &plchan->desc_list);
+
+	/*
+	 * See if we already have a physical channel allocated,
+	 * else this is the time to try to get one.
+	 */
+	ret = prep_phy_channel(plchan, txd);
+	if (ret) {
+		/*
+		 * No physical channel available, we will
+		 * stack up the memcpy channels until there is a channel
+		 * available to handle it whereas slave transfers may
+		 * have been denied due to platform channel muxing restrictions
+		 * and since there is no guarantee that this will ever be
+		 * resolved, and since the signal must be aquired AFTER
+		 * aquiring the physical channel, we will let them be NACK:ed
+		 * with -EBUSY here. The drivers can alway retry the prep()
+		 * call if they are eager on doing this using DMA.
+		 */
+		if (plchan->slave) {
+			pl08x_free_txd_list(pl08x, plchan);
+			spin_unlock_irqrestore(&plchan->lock, plchan->lockflags);
+			return -EBUSY;
+		}
+		/* Do this memcpy whenever there is a channel ready */
+		plchan->state = PL08X_CHAN_WAITING;
+		plchan->waiting = txd;
+	} else
+		/*
+		 * Else we're all set, paused and ready to roll,
+		 * status will switch to PL08X_CHAN_RUNNING when
+		 * we call issue_pending(). If there is something
+		 * running on the channel already we don't change
+		 * its state.
+		 */
+		if (plchan->state == PL08X_CHAN_IDLE)
+			plchan->state = PL08X_CHAN_PAUSED;
+
+	/*
+	 * Notice that we leave plchan->lock locked on purpose:
+	 * it will be unlocked in the subsequent tx_submit()
+	 * call. This is a consequence of the current API.
+	 */
+
+	return 0;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret;
+
+	txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev,
+			"%s no memory for descriptor\n", __func__);
+		return NULL;
+	}
+
+	dma_async_tx_descriptor_init(&txd->tx, chan);
+	txd->direction = DMA_NONE;
+	txd->srcbus.addr = src;
+	txd->dstbus.addr = dest;
+
+	/* Set platform data for m2m */
+	txd->cd = &pl08x->pd->memcpy_channel;
+	/* Both to be incremented or the code will break */
+	txd->cd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+	txd->tx.tx_submit = pl08x_tx_submit;
+	txd->tx.callback = NULL;
+	txd->tx.callback_param = NULL;
+	txd->len = len;
+
+	INIT_LIST_HEAD(&txd->node);
+	ret = pl08x_prep_channel_resources(plchan, txd);
+	if (ret)
+		return NULL;
+	/*
+	 * NB: the channel lock is held at this point so tx_submit()
+	 * must be called in direct succession.
+	 */
+
+	return &txd->tx;
+}
+
+struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret;
+
+	/*
+	 * Current implementation ASSUMES only one sg
+	 */
+	if (sg_len != 1) {
+		dev_err(&pl08x->adev->dev, "%s prepared too long sglist\n",
+			__func__);
+		BUG();
+	}
+
+	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
+		__func__, sgl->length, plchan->name);
+
+	txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
+		return NULL;
+	}
+
+	dma_async_tx_descriptor_init(&txd->tx, chan);
+
+	if (direction != plchan->runtime_direction)
+		dev_err(&pl08x->adev->dev, "%s DMA setup does not match "
+			"the direction configured for the PrimeCell\n",
+			__func__);
+
+	/*
+	 * Set up addresses, the PrimeCell configured address
+	 * will take precedence since this may configure the
+	 * channel target address dynamically at runtime.
+	 */
+	txd->direction = direction;
+	if (direction == DMA_TO_DEVICE) {
+		txd->srcbus.addr = sgl->dma_address;
+		if (plchan->runtime_addr)
+			txd->dstbus.addr = plchan->runtime_addr;
+		else
+			txd->dstbus.addr = plchan->cd->addr;
+	} else if (direction == DMA_FROM_DEVICE) {
+		if (plchan->runtime_addr)
+			txd->srcbus.addr = plchan->runtime_addr;
+		else
+			txd->srcbus.addr = plchan->cd->addr;
+		txd->dstbus.addr = sgl->dma_address;
+	} else {
+		dev_err(&pl08x->adev->dev,
+			"%s direction unsupported\n", __func__);
+		return NULL;
+	}
+	txd->cd = plchan->cd;
+	txd->tx.tx_submit = pl08x_tx_submit;
+	txd->tx.callback = NULL;
+	txd->tx.callback_param = NULL;
+	txd->len = sgl->length;
+	INIT_LIST_HEAD(&txd->node);
+
+	ret = pl08x_prep_channel_resources(plchan, txd);
+	if (ret)
+		return NULL;
+	/*
+	 * NB: the channel lock is held at this point so tx_submit()
+	 * must be called in direct succession.
+	 */
+
+	return &txd->tx;
+}
+
+static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			 unsigned long arg)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+	int ret = 0;
+
+	/* Controls applicable to inactive channels */
+	if (cmd == DMA_SLAVE_CONFIG) {
+		dma_set_runtime_config(chan,
+				       (struct dma_slave_config *)
+				       arg);
+		return 0;
+	}
+
+	/*
+	 * Anything succeeds on channels with no physical allocation and
+	 * no queued transfers.
+	 */
+	spin_lock_irqsave(&plchan->lock, flags);
+	if (!plchan->phychan && !plchan->at) {
+		spin_unlock_irqrestore(&plchan->lock, flags);
+		return 0;
+	}
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		plchan->state = PL08X_CHAN_IDLE;
+
+		if (plchan->phychan) {
+			pl08x_stop_phy_chan(plchan->phychan);
+
+			/*
+			 * Mark physical channel as free and free any slave
+			 * signal
+			 */
+			if ((plchan->phychan->signal >= 0) &&
+			    pl08x->pd->put_signal) {
+				pl08x->pd->put_signal(plchan);
+				plchan->phychan->signal = -1;
+			}
+			pl08x_put_phy_channel(pl08x, plchan->phychan);
+			plchan->phychan = NULL;
+		}
+		/* Stop any pending tasklet */
+		tasklet_disable(&plchan->tasklet);
+		/* Dequeue jobs and free LLIs */
+		if (plchan->at) {
+			pl08x_free_txd(pl08x, plchan->at);
+			plchan->at = NULL;
+		}
+		/* Dequeue jobs not yet fired as well */
+		pl08x_free_txd_list(pl08x, plchan);
+		break;
+	case DMA_PAUSE:
+		pl08x_pause_phy_chan(plchan->phychan);
+		plchan->state = PL08X_CHAN_PAUSED;
+		break;
+	case DMA_RESUME:
+		pl08x_resume_phy_chan(plchan->phychan);
+		plchan->state = PL08X_CHAN_RUNNING;
+		break;
+	default:
+		/* Unknown command */
+		ret = -ENXIO;
+		break;
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return ret;
+}
+
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	char *name = chan_id;
+
+	/* Check that the channel is not taken! */
+	if (!strcmp(plchan->name, name))
+		return true;
+
+	return false;
+}
+
+/*
+ * Just check that the device is there and active
+ * TODO: turn this bit on/off depending on the number of
+ * physical channels actually used, if it is zero... well
+ * shut it off. That will save some power. Cut the clock
+ * at the same time.
+ */
+static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
+{
+	u32 val;
+
+	val = readl(pl08x->base + PL080_CONFIG);
+	val &= ~(PL080_CONFIG_M2_BE | PL080_CONFIG_M1_BE | PL080_CONFIG_ENABLE);
+	/* We implictly clear bit 1 and that means little-endian mode */
+	val |= PL080_CONFIG_ENABLE;
+	writel(val, pl08x->base + PL080_CONFIG);
+}
+
+static void pl08x_tasklet(unsigned long data)
+{
+	struct pl08x_dma_chan *plchan = (struct pl08x_dma_chan *) data;
+	struct pl08x_phy_chan *phychan = plchan->phychan;
+	struct pl08x_driver_data *pl08x = plchan->host;
+
+	if (!plchan)
+		BUG();
+
+	spin_lock(&plchan->lock);
+
+	if (plchan->at) {
+		dma_async_tx_callback callback =
+			plchan->at->tx.callback;
+		void *callback_param =
+			plchan->at->tx.callback_param;
+
+		/*
+		 * Update last completed
+		 */
+		plchan->lc =
+			(plchan->at->tx.cookie);
+
+		/*
+		 * Callback to signal completion
+		 */
+		if (callback)
+			callback(callback_param);
+
+		/*
+		 * Device callbacks should NOT clear
+		 * the current transaction on the channel
+		 * Linus: sometimes they should?
+		 */
+		if (!plchan->at)
+			BUG();
+
+		/*
+		 * Free the descriptor if it's not for a device
+		 * using a circular buffer
+		 */
+		if (!plchan->at->cd->circular_buffer) {
+			pl08x_free_txd(pl08x, plchan->at);
+			plchan->at = NULL;
+		}
+		/*
+		 * else descriptor for circular
+		 * buffers only freed when
+		 * client has disabled dma
+		 */
+	}
+	/*
+	 * If a new descriptor is queued, set it up
+	 * plchan->at is NULL here
+	 */
+	if (!list_empty(&plchan->desc_list)) {
+		struct pl08x_txd *next;
+
+		next = list_first_entry(&plchan->desc_list,
+					struct pl08x_txd,
+					node);
+		list_del(&next->node);
+		plchan->at = next;
+		/* Configure the physical channel for the next txd */
+		pl08x_config_phychan_for_txd(plchan);
+		pl08x_set_cregs(pl08x, plchan->phychan);
+		pl08x_enable_phy_chan(pl08x, plchan->phychan);
+	} else {
+		struct pl08x_dma_chan *waiting = NULL;
+
+		/*
+		 * No more jobs, so free up the physical channel
+		 * Free any allocated signal on slave transfers too
+		 */
+		if ((phychan->signal >= 0) && pl08x->pd->put_signal) {
+			pl08x->pd->put_signal(plchan);
+			phychan->signal = -1;
+		}
+		pl08x_put_phy_channel(pl08x, phychan);
+		plchan->phychan = NULL;
+		plchan->state = PL08X_CHAN_IDLE;
+
+		/*
+		 * And NOW before anyone else can grab that free:d
+		 * up physical channel, see if there is some memcpy
+		 * pending that seriously needs to start because of
+		 * being stacked up while we were choking the
+		 * physical channels with data.
+		 */
+		list_for_each_entry(waiting, &pl08x->memcpy.channels,
+				    chan.device_node) {
+		  if (waiting->state == PL08X_CHAN_WAITING &&
+			    waiting->waiting != NULL) {
+				int ret;
+
+				/* This should REALLY not fail now */
+				ret = prep_phy_channel(waiting,
+						       waiting->waiting);
+				BUG_ON(ret);
+				waiting->state = PL08X_CHAN_RUNNING;
+				waiting->waiting = NULL;
+				pl08x_issue_pending(&waiting->chan);
+				break;
+			}
+		}
+	}
+
+	spin_unlock(&plchan->lock);
+}
+
+static irqreturn_t pl08x_irq(int irq, void *dev)
+{
+	struct pl08x_driver_data *pl08x = dev;
+	u32 mask = 0;
+	u32 val;
+	int i;
+
+	val = readl(pl08x->base + PL080_ERR_STATUS);
+	if (val) {
+		/*
+		 * An error interrupt (on one or more channels)
+		 */
+		dev_err(&pl08x->adev->dev,
+			"%s error interrupt, register value 0x%08x\n",
+				__func__, val);
+		/*
+		 * Simply clear ALL PL08X error interrupts,
+		 * regardless of channel and cause
+		 * FIXME: should be 0x00000003 on PL081 really.
+		 */
+		writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	}
+	val = readl(pl08x->base + PL080_INT_STATUS);
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		if ((1 << i) & val) {
+			/* Locate physical channel */
+			struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
+			struct pl08x_dma_chan *plchan = phychan->serving;
+
+			/* Schedule tasklet on this channel */
+			tasklet_schedule(&plchan->tasklet);
+
+			mask |= (1 << i);
+		}
+	}
+	/*
+	 * Clear only the terminal interrupts on channels we processed
+	 */
+	writel(mask, pl08x->base + PL080_TC_CLEAR);
+
+	return mask ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
+					   struct dma_device *dmadev,
+					   unsigned int channels,
+					   bool slave)
+{
+	struct pl08x_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = kzalloc(sizeof(struct pl08x_dma_chan), GFP_KERNEL);
+		if (!chan) {
+			dev_err(&pl08x->adev->dev,
+				"%s no memory for channel\n", __func__);
+			return -ENOMEM;
+		}
+
+		chan->host = pl08x;
+		chan->state = PL08X_CHAN_IDLE;
+
+		if (slave) {
+			chan->slave = true;
+			chan->name = pl08x->pd->slave_channels[i].bus_id;
+			chan->cd = &pl08x->pd->slave_channels[i];
+		} else {
+			chan->cd = &pl08x->pd->memcpy_channel;
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name) {
+				kfree(chan);
+				return -ENOMEM;
+			}
+		}
+		dev_info(&pl08x->adev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->chan.device = dmadev;
+		atomic_set(&chan->last_issued, 0);
+		chan->lc = atomic_read(&chan->last_issued);
+
+		spin_lock_init(&chan->lock);
+		INIT_LIST_HEAD(&chan->desc_list);
+		tasklet_init(&chan->tasklet, pl08x_tasklet,
+			     (unsigned long) chan);
+
+		list_add_tail(&chan->chan.device_node, &dmadev->channels);
+	}
+	dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void pl08x_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct pl08x_dma_chan *chan = NULL;
+	struct pl08x_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, chan.device_node) {
+		list_del(&chan->chan.device_node);
+		kfree(chan);
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+static const char *pl08x_state_str(enum pl08x_dma_chan_state state)
+{
+	switch (state) {
+	case PL08X_CHAN_IDLE:
+		return "idle";
+	case PL08X_CHAN_RUNNING:
+		return "running";
+	case PL08X_CHAN_PAUSED:
+		return "paused";
+	case PL08X_CHAN_WAITING:
+		return "waiting";
+	default:
+		break;
+	}
+	return "UNKNOWN STATE";
+}
+
+static int pl08x_debugfs_show(struct seq_file *s, void *data)
+{
+	struct pl08x_driver_data *pl08x = s->private;
+	struct pl08x_dma_chan *chan;
+	struct pl08x_phy_chan *ch;
+	unsigned long flags;
+	int i;
+
+	seq_printf(s, "PL08x physical channels:\n");
+	seq_printf(s, "CHANNEL:\tUSER:\n");
+	seq_printf(s, "--------\t-----\n");
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		struct pl08x_dma_chan *virt_chan;
+
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+		virt_chan = ch->serving;
+
+		seq_printf(s, "%d\t\t%s\n",
+			   ch->id, virt_chan ? virt_chan->name : "(none)");
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	seq_printf(s, "\nPL08x virtual memcpy channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->memcpy.channels, chan.device_node) {
+		seq_printf(s, "%s\t\t\%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	seq_printf(s, "\nPL08x virtual slave channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->slave.channels, chan.device_node) {
+		seq_printf(s, "%s\t\t\%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	return 0;
+}
+
+static int pl08x_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pl08x_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations pl08x_debugfs_operations = {
+	.open		= pl08x_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+	/* Expose a simple debugfs interface to view all clocks */
+	(void) debugfs_create_file(dev_name(&pl08x->adev->dev), S_IFREG | S_IRUGO,
+				   NULL, pl08x,
+				   &pl08x_debugfs_operations);
+}
+
+#else
+static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+}
+#endif
+
+static int pl08x_probe(struct amba_device *adev, struct amba_id *id)
+{
+	struct pl08x_driver_data *pl08x;
+	struct vendor_data *vd = id->data;
+	int ret = 0;
+	int i;
+
+	ret = amba_request_regions(adev, NULL);
+	if (ret)
+		return ret;
+
+	/* Create the driver state holder */
+	pl08x = kzalloc(sizeof(struct pl08x_driver_data), GFP_KERNEL);
+	if (!pl08x) {
+		ret = -ENOMEM;
+		goto out_no_pl08x;
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
+	pl08x->memcpy.dev = &adev->dev;
+	pl08x->memcpy.device_alloc_chan_resources = pl08x_alloc_chan_resources;
+	pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy;
+	pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->memcpy.device_tx_status = pl08x_dma_tx_status;
+	pl08x->memcpy.device_issue_pending = pl08x_issue_pending;
+	pl08x->memcpy.device_control = pl08x_control;
+
+	/* Initialize slave engine */
+	dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+	pl08x->slave.dev = &adev->dev;
+	pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources;
+	pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->slave.device_tx_status = pl08x_dma_tx_status;
+	pl08x->slave.device_issue_pending = pl08x_issue_pending;
+	pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+	pl08x->slave.device_control = pl08x_control;
+
+	/* Get the platform data */
+	pl08x->pd = dev_get_platdata(&adev->dev);
+	if (!pl08x->pd) {
+		dev_err(&adev->dev, "no platform data supplied\n");
+		goto out_no_platdata;
+	}
+
+	/* Assign useful pointers to the driver state */
+	pl08x->adev = adev;
+	pl08x->vd = vd;
+
+	/* A DMA memory pool for LLIs, align on 1-byte boundary */
+	pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
+			PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0);
+	if (!pl08x->pool) {
+		ret = -ENOMEM;
+		goto out_no_lli_pool;
+	}
+
+	spin_lock_init(&pl08x->lock);
+
+	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (!pl08x->base) {
+		ret = -ENOMEM;
+		goto out_no_ioremap;
+	}
+
+	/* Turn on the PL08x */
+	pl08x_ensure_on(pl08x);
+
+	/*
+	 * Attach the interrupt handler
+	 */
+	writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
+
+	ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
+			  vd->name, pl08x);
+	if (ret) {
+		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
+			__func__, adev->irq[0]);
+		goto out_no_irq;
+	}
+
+	/* Initialize physical channels */
+	pl08x->phy_chans = kmalloc((vd->channels * sizeof(struct pl08x_phy_chan)),
+			GFP_KERNEL);
+	if (!pl08x->phy_chans) {
+		dev_err(&adev->dev, "%s failed to allocate "
+			"physical channel holders\n",
+			__func__);
+		goto out_no_phychans;
+	}
+
+	for (i = 0; i < vd->channels; i++) {
+		struct pl08x_phy_chan *ch = &pl08x->phy_chans[i];
+
+		ch->id = i;
+		ch->base = pl08x->base + PL080_Cx_BASE(i);
+		spin_lock_init(&ch->lock);
+		ch->serving = NULL;
+		ch->signal = -1;
+		dev_info(&adev->dev,
+			 "physical channel %d is %s\n", i,
+			 pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
+	}
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy,
+					      pl08x->vd->channels, false);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto out_no_memcpy;
+	}
+	pl08x->memcpy.chancnt = ret;
+
+	/* Register slave channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
+					      pl08x->pd->num_slave_channels,
+					      true);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto out_no_slave;
+	}
+	pl08x->slave.chancnt = ret;
+
+	ret = dma_async_device_register(&pl08x->memcpy);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto out_no_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&pl08x->slave);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto out_no_slave_reg;
+	}
+
+	amba_set_drvdata(adev, pl08x);
+	init_pl08x_debugfs(pl08x);
+	dev_info(&pl08x->adev->dev, "ARM(R) %s DMA block initialized @%08x\n",
+		vd->name, adev->res.start);
+	return 0;
+
+out_no_slave_reg:
+	dma_async_device_unregister(&pl08x->memcpy);
+out_no_memcpy_reg:
+	pl08x_free_virtual_channels(&pl08x->slave);
+out_no_slave:
+	pl08x_free_virtual_channels(&pl08x->memcpy);
+out_no_memcpy:
+	kfree(pl08x->phy_chans);
+out_no_phychans:
+	free_irq(adev->irq[0], pl08x);
+out_no_irq:
+	iounmap(pl08x->base);
+out_no_ioremap:
+	dma_pool_destroy(pl08x->pool);
+out_no_lli_pool:
+out_no_platdata:
+	kfree(pl08x);
+out_no_pl08x:
+	amba_release_regions(adev);
+	return ret;
+}
+
+/* PL080 has 8 channels and the PL080 have just 2 */
+static struct vendor_data vendor_pl080 = {
+	.name = "PL080",
+	.channels = 8,
+	.dualmaster = true,
+};
+
+static struct vendor_data vendor_pl081 = {
+	.name = "PL081",
+	.channels = 2,
+	.dualmaster = false,
+};
+
+static struct amba_id pl08x_ids[] = {
+	/* PL080 */
+	{
+		.id	= 0x00041080,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl080,
+	},
+	/* PL081 */
+	{
+		.id	= 0x00041081,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl081,
+	},
+	/* Nomadik 8815 PL080 variant */
+	{
+		.id	= 0x00280880,
+		.mask	= 0x00ffffff,
+		.data	= &vendor_pl080,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl08x_amba_driver = {
+	.drv.name	= DRIVER_NAME,
+	.id_table	= pl08x_ids,
+	.probe		= pl08x_probe,
+};
+
+static int __init pl08x_init(void)
+{
+	int retval;
+	retval = amba_driver_register(&pl08x_amba_driver);
+	if (retval)
+		printk(KERN_WARNING DRIVER_NAME
+		       "failed to register as an amba device (%d)\n",
+		       retval);
+	return retval;
+}
+subsys_initcall(pl08x_init);
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
new file mode 100644
index 000000000000..521a0f8974ac
--- /dev/null
+++ b/include/linux/amba/pl08x.h
@@ -0,0 +1,222 @@
+/*
+ * linux/amba/pl08x.h - ARM PrimeCell DMA Controller driver
+ *
+ * Copyright (C) 2005 ARM Ltd
+ * Copyright (C) 2010 ST-Ericsson SA
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * pl08x information required by platform code
+ *
+ * Please credit ARM.com
+ * Documentation: ARM DDI 0196D
+ *
+ */
+
+#ifndef AMBA_PL08X_H
+#define AMBA_PL08X_H
+
+/* We need sizes of structs from this header */
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+/**
+ * struct pl08x_channel_data - data structure to pass info between
+ * platform and PL08x driver regarding channel configuration
+ * @bus_id: name of this device channel, not just a device name since
+ * devices may have more than one channel e.g. "foo_tx"
+ * @min_signal: the minimum DMA signal number to be muxed in for this
+ * channel (for platforms supporting muxed signals). If you have
+ * static assignments, make sure this is set to the assigned signal
+ * number, PL08x have 16 possible signals in number 0 thru 15 so
+ * when these are not enough they often get muxed (in hardware)
+ * disabling simultaneous use of the same channel for two devices.
+ * @max_signal: the maximum DMA signal number to be muxed in for
+ * the channel. Set to the same as min_signal for
+ * devices with static assignments
+ * @muxval: a number usually used to poke into some mux regiser to
+ * mux in the signal to this channel
+ * @cctl_opt: default options for the channel control register
+ * @addr: source/target address in physical memory for this DMA channel,
+ * can be the address of a FIFO register for burst requests for example.
+ * This can be left undefined if the PrimeCell API is used for configuring
+ * this.
+ * @circular_buffer: whether the buffer passed in is circular and
+ * shall simply be looped round round (like a record baby round
+ * round round round)
+ * @single: the device connected to this channel will request single
+ * DMA transfers, not bursts. (Bursts are default.)
+ */
+struct pl08x_channel_data {
+	char *bus_id;
+	int min_signal;
+	int max_signal;
+	u32 muxval;
+	u32 cctl;
+	u32 ccfg;
+	dma_addr_t addr;
+	bool circular_buffer;
+	bool single;
+};
+
+/**
+ * Struct pl08x_bus_data - information of source or destination
+ * busses for a transfer
+ * @addr: current address
+ * @maxwidth: the maximum width of a transfer on this bus
+ * @buswidth: the width of this bus in bytes: 1, 2 or 4
+ * @fill_bytes: bytes required to fill to the next bus memory
+ * boundary
+ */
+struct pl08x_bus_data {
+	dma_addr_t addr;
+	u8 maxwidth;
+	u8 buswidth;
+	u32 fill_bytes;
+};
+
+/**
+ * struct pl08x_phy_chan - holder for the physical channels
+ * @id: physical index to this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @signal: the physical signal (aka channel) serving this
+ * physical channel right now
+ * @serving: the virtual channel currently being served by this
+ * physical channel
+ */
+struct pl08x_phy_chan {
+	unsigned int id;
+	void __iomem *base;
+	spinlock_t lock;
+	int signal;
+	struct pl08x_dma_chan *serving;
+	u32 csrc;
+	u32 cdst;
+	u32 clli;
+	u32 cctl;
+	u32 ccfg;
+};
+
+/**
+ * struct pl08x_txd - wrapper for struct dma_async_tx_descriptor
+ * @llis_bus: DMA memory address (physical) start for the LLIs
+ * @llis_va: virtual memory address start for the LLIs
+ */
+struct pl08x_txd {
+	struct dma_async_tx_descriptor tx;
+	struct list_head node;
+	enum dma_data_direction	direction;
+	struct pl08x_bus_data srcbus;
+	struct pl08x_bus_data dstbus;
+	int len;
+	dma_addr_t llis_bus;
+	void *llis_va;
+	struct pl08x_channel_data *cd;
+	bool active;
+	/*
+	 * Settings to be put into the physical channel when we
+	 * trigger this txd
+	 */
+	u32 csrc;
+	u32 cdst;
+	u32 clli;
+	u32 cctl;
+};
+
+/**
+ * struct pl08x_dma_chan_state - holds the PL08x specific virtual
+ * channel states
+ * @PL08X_CHAN_IDLE: the channel is idle
+ * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @PL08X_CHAN_PAUSED: the channel has allocated a physical transport
+ * channel, but the transfer is currently paused
+ * @PL08X_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum pl08x_dma_chan_state {
+	PL08X_CHAN_IDLE,
+	PL08X_CHAN_RUNNING,
+	PL08X_CHAN_PAUSED,
+	PL08X_CHAN_WAITING,
+};
+
+/**
+ * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel
+ * @chan: wrappped abstract channel
+ * @phychan: the physical channel utilized by this channel, if there is one
+ * @tasklet: tasklet scheduled by the IRQ to handle actual work etc
+ * @name: name of channel
+ * @cd: channel platform data
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @runtime_direction: current direction of this channel according to
+ * runtime config
+ * @lc: last completed transaction on this channel
+ * @desc_list: queued transactions pending on this channel
+ * @at: active transaction on this channel
+ * @lockflags: sometimes we let a lock last between two function calls,
+ * especially prep/submit, and then we need to store the IRQ flags
+ * in the channel state, here
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, paused, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ * @waiting: a TX descriptor on this channel which is waiting for
+ * a physical channel to become available
+ */
+struct pl08x_dma_chan {
+	struct dma_chan chan;
+	struct pl08x_phy_chan *phychan;
+	struct tasklet_struct tasklet;
+	char *name;
+	struct pl08x_channel_data *cd;
+	dma_addr_t runtime_addr;
+	enum dma_data_direction	runtime_direction;
+	atomic_t last_issued;
+	dma_cookie_t lc;
+	struct list_head desc_list;
+	struct pl08x_txd *at;
+	unsigned long lockflags;
+	spinlock_t lock;
+	void *host;
+	enum pl08x_dma_chan_state state;
+	bool slave;
+	struct pl08x_txd *waiting;
+};
+
+/**
+ * struct pl08x_platform_data - the platform configuration for the
+ * PL08x PrimeCells.
+ * @slave_channels: the channels defined for the different devices on the
+ * platform, all inclusive, including multiplexed channels. The available
+ * physical channels will be multiplexed around these signals as they
+ * are requested, just enumerate all possible channels.
+ * @get_signal: request a physical signal to be used for a DMA
+ * transfer immediately: if there is some multiplexing or similar blocking
+ * the use of the channel the transfer can be denied by returning
+ * less than zero, else it returns the allocated signal number
+ * @put_signal: indicate to the platform that this physical signal is not
+ * running any DMA transfer and multiplexing can be recycled
+ * @bus_bit_lli: Bit[0] of the address indicated which AHB bus master the
+ * LLI addresses are on 0/1 Master 1/2.
+ */
+struct pl08x_platform_data {
+	struct pl08x_channel_data *slave_channels;
+	unsigned int num_slave_channels;
+	struct pl08x_channel_data memcpy_channel;
+	int (*get_signal)(struct pl08x_dma_chan *);
+	void (*put_signal)(struct pl08x_dma_chan *);
+};
+
+#ifdef CONFIG_AMBA_PL08X
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id);
+#else
+static inline bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	return false;
+}
+#endif
+
+#endif	/* AMBA_PL08X_H */

From 782bc950d84e404422ba21008fd51ee894c8d231 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 30 Sep 2010 13:56:32 +0000
Subject: [PATCH 05/18] dmaengine: add possibility for cyclic transfers

Cyclic transfers are useful for audio where a single buffer divided
in periods has to be transfered endlessly until stopped. After being
prepared the transfer is started using the dma_async_descriptor->tx_submit
function. dma_async_descriptor->callback is called after each period.
The transfer is stopped using the DMA_TERMINATE_ALL callback.
While being used for cyclic transfers the channel cannot be used
for other transfer types.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 2 ++
 include/linux/dmaengine.h | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9d31d5eb95c1..e5e79ced4f4b 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -692,6 +692,8 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_interrupt);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_prep_slave_sg);
+	BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) &&
+		!device->device_prep_dma_cyclic);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_control);
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c61d4ca27bcc..32cd84b47478 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -67,10 +67,11 @@ enum dma_transaction_type {
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
+	DMA_CYCLIC,
 };
 
 /* last transaction type for creation of the capabilities mask */
-#define DMA_TX_TYPE_END (DMA_SLAVE + 1)
+#define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
 
 
 /**
@@ -422,6 +423,9 @@ struct dma_tx_state {
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
+ * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
+ *	The function takes a buffer of size buf_len. The callback function will
+ *	be called after period_len bytes have been transferred.
  * @device_control: manipulate all pending operations on a channel, returns
  *	zero or error code
  * @device_tx_status: poll for transaction completion, the optional
@@ -478,6 +482,9 @@ struct dma_device {
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_data_direction direction,
 		unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
+		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 

From 6e3ecaf0ad49de0bed829d409a164e7107c02993 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 30 Sep 2010 13:56:33 +0000
Subject: [PATCH 06/18] dmaengine: add wrapper functions for device control
 functions

Add wrapper functions around the dma_device->device_control function
to bring back type safety. Also, add a wrapper function around
dma_async_tx_descriptor->tx_submit. This is named dmaengine_submit
instead of dmaengine_tx_submit to get rid of the confusing 'tx' in the
function name

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 32cd84b47478..2218fdcbe8a9 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -494,6 +494,40 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline int dmaengine_device_control(struct dma_chan *chan,
+					   enum dma_ctrl_cmd cmd,
+					   unsigned long arg)
+{
+	return chan->device->device_control(chan, cmd, arg);
+}
+
+static inline int dmaengine_slave_config(struct dma_chan *chan,
+					  struct dma_slave_config *config)
+{
+	return dmaengine_device_control(chan, DMA_SLAVE_CONFIG,
+			(unsigned long)config);
+}
+
+static inline int dmaengine_terminate_all(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_TERMINATE_ALL, 0);
+}
+
+static inline int dmaengine_pause(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_PAUSE, 0);
+}
+
+static inline int dmaengine_resume(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_RESUME, 0);
+}
+
+static inline int dmaengine_submit(struct dma_async_tx_descriptor *desc)
+{
+	return desc->tx_submit(desc);
+}
+
 static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
 {
 	size_t mask;

From 1ec1e82f2510e2bdcb6268ed74aa79e1a7bc9594 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 30 Sep 2010 13:56:34 +0000
Subject: [PATCH 07/18] dmaengine: Add Freescale i.MX SDMA support

This patch adds support for the Freescale i.MX SDMA engine.

The SDMA engine is a scatter/gather DMA engine which is implemented
as a seperate coprocessor. SDMA needs its own firmware which is
requested using the standard request_firmware mechanism. The firmware
has different entry points for each peripheral type, so drivers
have to pass the peripheral type to the DMA engine which in turn
picks the correct firmware entry point from a table contained in
the firmware image itself.
The original Freescale code also supports support for transfering
data to the internal SRAM which needs different entry points to
the firmware. Support for this is currently not implemented. Also,
support for the ASRC (asymmetric sample rate converter) is skipped.

I took a very simple approach to implement dmaengine support. Only
a single descriptor is statically assigned to a each channel. This
means that transfers can't be queued up but only a single transfer
is in progress. This simplifies implementation a lot and is sufficient
for the usual device/memory transfers.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Linus Walleij <linus.ml.walleij@gmail.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/mach-imx/include/mach/dma-v1.h |    8 +-
 arch/arm/plat-mxc/include/mach/dma.h    |   67 ++
 arch/arm/plat-mxc/include/mach/sdma.h   |   17 +
 drivers/dma/Kconfig                     |    8 +
 drivers/dma/Makefile                    |    1 +
 drivers/dma/imx-sdma.c                  | 1392 +++++++++++++++++++++++
 6 files changed, 1487 insertions(+), 6 deletions(-)
 create mode 100644 arch/arm/plat-mxc/include/mach/dma.h
 create mode 100644 arch/arm/plat-mxc/include/mach/sdma.h
 create mode 100644 drivers/dma/imx-sdma.c

diff --git a/arch/arm/mach-imx/include/mach/dma-v1.h b/arch/arm/mach-imx/include/mach/dma-v1.h
index 287431cc13e5..ac6fd713828a 100644
--- a/arch/arm/mach-imx/include/mach/dma-v1.h
+++ b/arch/arm/mach-imx/include/mach/dma-v1.h
@@ -27,6 +27,8 @@
 
 #define imx_has_dma_v1()	(cpu_is_mx1() || cpu_is_mx21() || cpu_is_mx27())
 
+#include <mach/dma.h>
+
 #define IMX_DMA_CHANNELS  16
 
 #define DMA_MODE_READ		0
@@ -96,12 +98,6 @@ int imx_dma_request(int channel, const char *name);
 
 void imx_dma_free(int channel);
 
-enum imx_dma_prio {
-	DMA_PRIO_HIGH = 0,
-	DMA_PRIO_MEDIUM = 1,
-	DMA_PRIO_LOW = 2
-};
-
 int imx_dma_request_by_prio(const char *name, enum imx_dma_prio prio);
 
 #endif	/* __MACH_DMA_V1_H__ */
diff --git a/arch/arm/plat-mxc/include/mach/dma.h b/arch/arm/plat-mxc/include/mach/dma.h
new file mode 100644
index 000000000000..ef7751546f5f
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/dma.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_MXC_DMA_H__
+#define __ASM_ARCH_MXC_DMA_H__
+
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+
+/*
+ * This enumerates peripheral types. Used for SDMA.
+ */
+enum sdma_peripheral_type {
+	IMX_DMATYPE_SSI,	/* MCU domain SSI */
+	IMX_DMATYPE_SSI_SP,	/* Shared SSI */
+	IMX_DMATYPE_MMC,	/* MMC */
+	IMX_DMATYPE_SDHC,	/* SDHC */
+	IMX_DMATYPE_UART,	/* MCU domain UART */
+	IMX_DMATYPE_UART_SP,	/* Shared UART */
+	IMX_DMATYPE_FIRI,	/* FIRI */
+	IMX_DMATYPE_CSPI,	/* MCU domain CSPI */
+	IMX_DMATYPE_CSPI_SP,	/* Shared CSPI */
+	IMX_DMATYPE_SIM,	/* SIM */
+	IMX_DMATYPE_ATA,	/* ATA */
+	IMX_DMATYPE_CCM,	/* CCM */
+	IMX_DMATYPE_EXT,	/* External peripheral */
+	IMX_DMATYPE_MSHC,	/* Memory Stick Host Controller */
+	IMX_DMATYPE_MSHC_SP,	/* Shared Memory Stick Host Controller */
+	IMX_DMATYPE_DSP,	/* DSP */
+	IMX_DMATYPE_MEMORY,	/* Memory */
+	IMX_DMATYPE_FIFO_MEMORY,/* FIFO type Memory */
+	IMX_DMATYPE_SPDIF,	/* SPDIF */
+	IMX_DMATYPE_IPU_MEMORY,	/* IPU Memory */
+	IMX_DMATYPE_ASRC,	/* ASRC */
+	IMX_DMATYPE_ESAI,	/* ESAI */
+};
+
+enum imx_dma_prio {
+	DMA_PRIO_HIGH = 0,
+	DMA_PRIO_MEDIUM = 1,
+	DMA_PRIO_LOW = 2
+};
+
+struct imx_dma_data {
+	int dma_request; /* DMA request line */
+	enum sdma_peripheral_type peripheral_type;
+	int priority;
+};
+
+static inline int imx_dma_is_ipu(struct dma_chan *chan)
+{
+	return !strcmp(dev_name(chan->device->dev), "ipu-core");
+}
+
+static inline int imx_dma_is_general_purpose(struct dma_chan *chan)
+{
+	return !strcmp(dev_name(chan->device->dev), "imx-sdma") ||
+		!strcmp(dev_name(chan->device->dev), "imx-dma");
+}
+
+#endif
diff --git a/arch/arm/plat-mxc/include/mach/sdma.h b/arch/arm/plat-mxc/include/mach/sdma.h
new file mode 100644
index 000000000000..9be112227ac4
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/sdma.h
@@ -0,0 +1,17 @@
+#ifndef __MACH_MXC_SDMA_H__
+#define __MACH_MXC_SDMA_H__
+
+/**
+ * struct sdma_platform_data - platform specific data for SDMA engine
+ *
+ * @sdma_version	The version of this SDMA engine
+ * @cpu_name		used to generate the firmware name
+ * @to_version		CPU Tape out version
+ */
+struct sdma_platform_data {
+	int sdma_version;
+	char *cpu_name;
+	int to_version;
+};
+
+#endif /* __MACH_MXC_SDMA_H__ */
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 9520cf02edc8..3cf1d123f2d3 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -195,6 +195,14 @@ config PCH_DMA
 	help
 	  Enable support for the Topcliff PCH DMA engine.
 
+config IMX_SDMA
+	tristate "i.MX SDMA support"
+	depends on ARCH_MX25 || ARCH_MX3 || ARCH_MX5
+	select DMA_ENGINE
+	help
+	  Support the i.MX SDMA engine. This engine is integrated into
+	  Freescale i.MX25/31/35/51 chips.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 72bd70384d8a..3ed7babd3a99 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
 obj-$(CONFIG_SH_DMAE) += shdma.o
 obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
new file mode 100644
index 000000000000..0834323a0599
--- /dev/null
+++ b/drivers/dma/imx-sdma.c
@@ -0,0 +1,1392 @@
+/*
+ * drivers/dma/imx-sdma.c
+ *
+ * This file contains a driver for the Freescale Smart DMA engine
+ *
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ *
+ * Based on code from Freescale:
+ *
+ * Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+
+#include <asm/irq.h>
+#include <mach/sdma.h>
+#include <mach/dma.h>
+#include <mach/hardware.h>
+
+/* SDMA registers */
+#define SDMA_H_C0PTR		0x000
+#define SDMA_H_INTR		0x004
+#define SDMA_H_STATSTOP		0x008
+#define SDMA_H_START		0x00c
+#define SDMA_H_EVTOVR		0x010
+#define SDMA_H_DSPOVR		0x014
+#define SDMA_H_HOSTOVR		0x018
+#define SDMA_H_EVTPEND		0x01c
+#define SDMA_H_DSPENBL		0x020
+#define SDMA_H_RESET		0x024
+#define SDMA_H_EVTERR		0x028
+#define SDMA_H_INTRMSK		0x02c
+#define SDMA_H_PSW		0x030
+#define SDMA_H_EVTERRDBG	0x034
+#define SDMA_H_CONFIG		0x038
+#define SDMA_ONCE_ENB		0x040
+#define SDMA_ONCE_DATA		0x044
+#define SDMA_ONCE_INSTR		0x048
+#define SDMA_ONCE_STAT		0x04c
+#define SDMA_ONCE_CMD		0x050
+#define SDMA_EVT_MIRROR		0x054
+#define SDMA_ILLINSTADDR	0x058
+#define SDMA_CHN0ADDR		0x05c
+#define SDMA_ONCE_RTB		0x060
+#define SDMA_XTRIG_CONF1	0x070
+#define SDMA_XTRIG_CONF2	0x074
+#define SDMA_CHNENBL0_V2	0x200
+#define SDMA_CHNENBL0_V1	0x080
+#define SDMA_CHNPRI_0		0x100
+
+/*
+ * Buffer descriptor status values.
+ */
+#define BD_DONE  0x01
+#define BD_WRAP  0x02
+#define BD_CONT  0x04
+#define BD_INTR  0x08
+#define BD_RROR  0x10
+#define BD_LAST  0x20
+#define BD_EXTD  0x80
+
+/*
+ * Data Node descriptor status values.
+ */
+#define DND_END_OF_FRAME  0x80
+#define DND_END_OF_XFER   0x40
+#define DND_DONE          0x20
+#define DND_UNUSED        0x01
+
+/*
+ * IPCV2 descriptor status values.
+ */
+#define BD_IPCV2_END_OF_FRAME  0x40
+
+#define IPCV2_MAX_NODES        50
+/*
+ * Error bit set in the CCB status field by the SDMA,
+ * in setbd routine, in case of a transfer error
+ */
+#define DATA_ERROR  0x10000000
+
+/*
+ * Buffer descriptor commands.
+ */
+#define C0_ADDR             0x01
+#define C0_LOAD             0x02
+#define C0_DUMP             0x03
+#define C0_SETCTX           0x07
+#define C0_GETCTX           0x03
+#define C0_SETDM            0x01
+#define C0_SETPM            0x04
+#define C0_GETDM            0x02
+#define C0_GETPM            0x08
+/*
+ * Change endianness indicator in the BD command field
+ */
+#define CHANGE_ENDIANNESS   0x80
+
+/*
+ * Mode/Count of data node descriptors - IPCv2
+ */
+struct sdma_mode_count {
+	u32 count   : 16; /* size of the buffer pointed by this BD */
+	u32 status  :  8; /* E,R,I,C,W,D status bits stored here */
+	u32 command :  8; /* command mostlky used for channel 0 */
+};
+
+/*
+ * Buffer descriptor
+ */
+struct sdma_buffer_descriptor {
+	struct sdma_mode_count  mode;
+	u32 buffer_addr;	/* address of the buffer described */
+	u32 ext_buffer_addr;	/* extended buffer address */
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_channel_control - Channel control Block
+ *
+ * @current_bd_ptr	current buffer descriptor processed
+ * @base_bd_ptr		first element of buffer descriptor array
+ * @unused		padding. The SDMA engine expects an array of 128 byte
+ *			control blocks
+ */
+struct sdma_channel_control {
+	u32 current_bd_ptr;
+	u32 base_bd_ptr;
+	u32 unused[2];
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_state_registers - SDMA context for a channel
+ *
+ * @pc:		program counter
+ * @t:		test bit: status of arithmetic & test instruction
+ * @rpc:	return program counter
+ * @sf:		source fault while loading data
+ * @spc:	loop start program counter
+ * @df:		destination fault while storing data
+ * @epc:	loop end program counter
+ * @lm:		loop mode
+ */
+struct sdma_state_registers {
+	u32 pc     :14;
+	u32 unused1: 1;
+	u32 t      : 1;
+	u32 rpc    :14;
+	u32 unused0: 1;
+	u32 sf     : 1;
+	u32 spc    :14;
+	u32 unused2: 1;
+	u32 df     : 1;
+	u32 epc    :14;
+	u32 lm     : 2;
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_context_data - sdma context specific to a channel
+ *
+ * @channel_state:	channel state bits
+ * @gReg:		general registers
+ * @mda:		burst dma destination address register
+ * @msa:		burst dma source address register
+ * @ms:			burst dma status register
+ * @md:			burst dma data register
+ * @pda:		peripheral dma destination address register
+ * @psa:		peripheral dma source address register
+ * @ps:			peripheral dma status register
+ * @pd:			peripheral dma data register
+ * @ca:			CRC polynomial register
+ * @cs:			CRC accumulator register
+ * @dda:		dedicated core destination address register
+ * @dsa:		dedicated core source address register
+ * @ds:			dedicated core status register
+ * @dd:			dedicated core data register
+ */
+struct sdma_context_data {
+	struct sdma_state_registers  channel_state;
+	u32  gReg[8];
+	u32  mda;
+	u32  msa;
+	u32  ms;
+	u32  md;
+	u32  pda;
+	u32  psa;
+	u32  ps;
+	u32  pd;
+	u32  ca;
+	u32  cs;
+	u32  dda;
+	u32  dsa;
+	u32  ds;
+	u32  dd;
+	u32  scratch0;
+	u32  scratch1;
+	u32  scratch2;
+	u32  scratch3;
+	u32  scratch4;
+	u32  scratch5;
+	u32  scratch6;
+	u32  scratch7;
+} __attribute__ ((packed));
+
+#define NUM_BD (int)(PAGE_SIZE / sizeof(struct sdma_buffer_descriptor))
+
+struct sdma_engine;
+
+/**
+ * struct sdma_channel - housekeeping for a SDMA channel
+ *
+ * @sdma		pointer to the SDMA engine for this channel
+ * @channel		the channel number, matches dmaengine chan_id
+ * @direction		transfer type. Needed for setting SDMA script
+ * @peripheral_type	Peripheral type. Needed for setting SDMA script
+ * @event_id0		aka dma request line
+ * @event_id1		for channels that use 2 events
+ * @word_size		peripheral access size
+ * @buf_tail		ID of the buffer that was processed
+ * @done		channel completion
+ * @num_bd		max NUM_BD. number of descriptors currently handling
+ */
+struct sdma_channel {
+	struct sdma_engine		*sdma;
+	unsigned int			channel;
+	enum dma_data_direction		direction;
+	enum sdma_peripheral_type	peripheral_type;
+	unsigned int			event_id0;
+	unsigned int			event_id1;
+	enum dma_slave_buswidth		word_size;
+	unsigned int			buf_tail;
+	struct completion		done;
+	unsigned int			num_bd;
+	struct sdma_buffer_descriptor	*bd;
+	dma_addr_t			bd_phys;
+	unsigned int			pc_from_device, pc_to_device;
+	unsigned long			flags;
+	dma_addr_t			per_address;
+	u32				event_mask0, event_mask1;
+	u32				watermark_level;
+	u32				shp_addr, per_addr;
+	struct dma_chan			chan;
+	spinlock_t			lock;
+	struct dma_async_tx_descriptor	desc;
+	dma_cookie_t			last_completed;
+	enum dma_status			status;
+};
+
+#define IMX_DMA_SG_LOOP		(1 << 0)
+
+#define MAX_DMA_CHANNELS 32
+#define MXC_SDMA_DEFAULT_PRIORITY 1
+#define MXC_SDMA_MIN_PRIORITY 1
+#define MXC_SDMA_MAX_PRIORITY 7
+
+/**
+ * struct sdma_script_start_addrs - SDMA script start pointers
+ *
+ * start addresses of the different functions in the physical
+ * address space of the SDMA engine.
+ */
+struct sdma_script_start_addrs {
+	u32 ap_2_ap_addr;
+	u32 ap_2_bp_addr;
+	u32 ap_2_ap_fixed_addr;
+	u32 bp_2_ap_addr;
+	u32 loopback_on_dsp_side_addr;
+	u32 mcu_interrupt_only_addr;
+	u32 firi_2_per_addr;
+	u32 firi_2_mcu_addr;
+	u32 per_2_firi_addr;
+	u32 mcu_2_firi_addr;
+	u32 uart_2_per_addr;
+	u32 uart_2_mcu_addr;
+	u32 per_2_app_addr;
+	u32 mcu_2_app_addr;
+	u32 per_2_per_addr;
+	u32 uartsh_2_per_addr;
+	u32 uartsh_2_mcu_addr;
+	u32 per_2_shp_addr;
+	u32 mcu_2_shp_addr;
+	u32 ata_2_mcu_addr;
+	u32 mcu_2_ata_addr;
+	u32 app_2_per_addr;
+	u32 app_2_mcu_addr;
+	u32 shp_2_per_addr;
+	u32 shp_2_mcu_addr;
+	u32 mshc_2_mcu_addr;
+	u32 mcu_2_mshc_addr;
+	u32 spdif_2_mcu_addr;
+	u32 mcu_2_spdif_addr;
+	u32 asrc_2_mcu_addr;
+	u32 ext_mem_2_ipu_addr;
+	u32 descrambler_addr;
+	u32 dptc_dvfs_addr;
+	u32 utra_addr;
+	u32 ram_code_start_addr;
+};
+
+#define SDMA_FIRMWARE_MAGIC 0x414d4453
+
+/**
+ * struct sdma_firmware_header - Layout of the firmware image
+ *
+ * @magic		"SDMA"
+ * @version_major	increased whenever layout of struct sdma_script_start_addrs
+ *			changes.
+ * @version_minor	firmware minor version (for binary compatible changes)
+ * @script_addrs_start	offset of struct sdma_script_start_addrs in this image
+ * @num_script_addrs	Number of script addresses in this image
+ * @ram_code_start	offset of SDMA ram image in this firmware image
+ * @ram_code_size	size of SDMA ram image
+ * @script_addrs	Stores the start address of the SDMA scripts
+ *			(in SDMA memory space)
+ */
+struct sdma_firmware_header {
+	u32	magic;
+	u32	version_major;
+	u32	version_minor;
+	u32	script_addrs_start;
+	u32	num_script_addrs;
+	u32	ram_code_start;
+	u32	ram_code_size;
+};
+
+struct sdma_engine {
+	struct device			*dev;
+	struct sdma_channel		channel[MAX_DMA_CHANNELS];
+	struct sdma_channel_control	*channel_control;
+	void __iomem			*regs;
+	unsigned int			version;
+	unsigned int			num_events;
+	struct sdma_context_data	*context;
+	dma_addr_t			context_phys;
+	struct dma_device		dma_device;
+	struct clk			*clk;
+	struct sdma_script_start_addrs	*script_addrs;
+};
+
+#define SDMA_H_CONFIG_DSPDMA	(1 << 12) /* indicates if the DSPDMA is used */
+#define SDMA_H_CONFIG_RTD_PINS	(1 << 11) /* indicates if Real-Time Debug pins are enabled */
+#define SDMA_H_CONFIG_ACR	(1 << 4)  /* indicates if AHB freq /core freq = 2 or 1 */
+#define SDMA_H_CONFIG_CSM	(3)       /* indicates which context switch mode is selected*/
+
+static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
+{
+	u32 chnenbl0 = (sdma->version == 2 ? SDMA_CHNENBL0_V2 : SDMA_CHNENBL0_V1);
+
+	return chnenbl0 + event * 4;
+}
+
+static int sdma_config_ownership(struct sdma_channel *sdmac,
+		bool event_override, bool mcu_override, bool dsp_override)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	u32 evt, mcu, dsp;
+
+	if (event_override && mcu_override && dsp_override)
+		return -EINVAL;
+
+	evt = __raw_readl(sdma->regs + SDMA_H_EVTOVR);
+	mcu = __raw_readl(sdma->regs + SDMA_H_HOSTOVR);
+	dsp = __raw_readl(sdma->regs + SDMA_H_DSPOVR);
+
+	if (dsp_override)
+		dsp &= ~(1 << channel);
+	else
+		dsp |= (1 << channel);
+
+	if (event_override)
+		evt &= ~(1 << channel);
+	else
+		evt |= (1 << channel);
+
+	if (mcu_override)
+		mcu &= ~(1 << channel);
+	else
+		mcu |= (1 << channel);
+
+	__raw_writel(evt, sdma->regs + SDMA_H_EVTOVR);
+	__raw_writel(mcu, sdma->regs + SDMA_H_HOSTOVR);
+	__raw_writel(dsp, sdma->regs + SDMA_H_DSPOVR);
+
+	return 0;
+}
+
+/*
+ * sdma_run_channel - run a channel and wait till it's done
+ */
+static int sdma_run_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int ret;
+
+	init_completion(&sdmac->done);
+
+	__raw_writel(1 << channel, sdma->regs + SDMA_H_START);
+
+	ret = wait_for_completion_timeout(&sdmac->done, HZ);
+
+	return ret ? 0 : -ETIMEDOUT;
+}
+
+static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
+		u32 address)
+{
+	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
+	void *buf_virt;
+	dma_addr_t buf_phys;
+	int ret;
+
+	buf_virt = dma_alloc_coherent(NULL,
+			size,
+			&buf_phys, GFP_KERNEL);
+	if (!buf_virt)
+		return -ENOMEM;
+
+	bd0->mode.command = C0_SETPM;
+	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+	bd0->mode.count = size / 2;
+	bd0->buffer_addr = buf_phys;
+	bd0->ext_buffer_addr = address;
+
+	memcpy(buf_virt, buf, size);
+
+	ret = sdma_run_channel(&sdma->channel[0]);
+
+	dma_free_coherent(NULL, size, buf_virt, buf_phys);
+
+	return ret;
+}
+
+static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	u32 val;
+	u32 chnenbl = chnenbl_ofs(sdma, event);
+
+	val = __raw_readl(sdma->regs + chnenbl);
+	val |= (1 << channel);
+	__raw_writel(val, sdma->regs + chnenbl);
+}
+
+static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	u32 chnenbl = chnenbl_ofs(sdma, event);
+	u32 val;
+
+	val = __raw_readl(sdma->regs + chnenbl);
+	val &= ~(1 << channel);
+	__raw_writel(val, sdma->regs + chnenbl);
+}
+
+static void sdma_handle_channel_loop(struct sdma_channel *sdmac)
+{
+	struct sdma_buffer_descriptor *bd;
+
+	/*
+	 * loop mode. Iterate over descriptors, re-setup them and
+	 * call callback function.
+	 */
+	while (1) {
+		bd = &sdmac->bd[sdmac->buf_tail];
+
+		if (bd->mode.status & BD_DONE)
+			break;
+
+		if (bd->mode.status & BD_RROR)
+			sdmac->status = DMA_ERROR;
+		else
+			sdmac->status = DMA_SUCCESS;
+
+		bd->mode.status |= BD_DONE;
+		sdmac->buf_tail++;
+		sdmac->buf_tail %= sdmac->num_bd;
+
+		if (sdmac->desc.callback)
+			sdmac->desc.callback(sdmac->desc.callback_param);
+	}
+}
+
+static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
+{
+	struct sdma_buffer_descriptor *bd;
+	int i, error = 0;
+
+	/*
+	 * non loop mode. Iterate over all descriptors, collect
+	 * errors and call callback function
+	 */
+	for (i = 0; i < sdmac->num_bd; i++) {
+		bd = &sdmac->bd[i];
+
+		 if (bd->mode.status & (BD_DONE | BD_RROR))
+			error = -EIO;
+	}
+
+	if (error)
+		sdmac->status = DMA_ERROR;
+	else
+		sdmac->status = DMA_SUCCESS;
+
+	if (sdmac->desc.callback)
+		sdmac->desc.callback(sdmac->desc.callback_param);
+	sdmac->last_completed = sdmac->desc.cookie;
+}
+
+static void mxc_sdma_handle_channel(struct sdma_channel *sdmac)
+{
+	complete(&sdmac->done);
+
+	/* not interested in channel 0 interrupts */
+	if (sdmac->channel == 0)
+		return;
+
+	if (sdmac->flags & IMX_DMA_SG_LOOP)
+		sdma_handle_channel_loop(sdmac);
+	else
+		mxc_sdma_handle_channel_normal(sdmac);
+}
+
+static irqreturn_t sdma_int_handler(int irq, void *dev_id)
+{
+	struct sdma_engine *sdma = dev_id;
+	u32 stat;
+
+	stat = __raw_readl(sdma->regs + SDMA_H_INTR);
+	__raw_writel(stat, sdma->regs + SDMA_H_INTR);
+
+	while (stat) {
+		int channel = fls(stat) - 1;
+		struct sdma_channel *sdmac = &sdma->channel[channel];
+
+		mxc_sdma_handle_channel(sdmac);
+
+		stat &= ~(1 << channel);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * sets the pc of SDMA script according to the peripheral type
+ */
+static void sdma_get_pc(struct sdma_channel *sdmac,
+		enum sdma_peripheral_type peripheral_type)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int per_2_emi = 0, emi_2_per = 0;
+	/*
+	 * These are needed once we start to support transfers between
+	 * two peripherals or memory-to-memory transfers
+	 */
+	int per_2_per = 0, emi_2_emi = 0;
+
+	sdmac->pc_from_device = 0;
+	sdmac->pc_to_device = 0;
+
+	switch (peripheral_type) {
+	case IMX_DMATYPE_MEMORY:
+		emi_2_emi = sdma->script_addrs->ap_2_ap_addr;
+		break;
+	case IMX_DMATYPE_DSP:
+		emi_2_per = sdma->script_addrs->bp_2_ap_addr;
+		per_2_emi = sdma->script_addrs->ap_2_bp_addr;
+		break;
+	case IMX_DMATYPE_FIRI:
+		per_2_emi = sdma->script_addrs->firi_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_firi_addr;
+		break;
+	case IMX_DMATYPE_UART:
+		per_2_emi = sdma->script_addrs->uart_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+		break;
+	case IMX_DMATYPE_UART_SP:
+		per_2_emi = sdma->script_addrs->uartsh_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		break;
+	case IMX_DMATYPE_ATA:
+		per_2_emi = sdma->script_addrs->ata_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_ata_addr;
+		break;
+	case IMX_DMATYPE_CSPI:
+	case IMX_DMATYPE_EXT:
+	case IMX_DMATYPE_SSI:
+		per_2_emi = sdma->script_addrs->app_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+		break;
+	case IMX_DMATYPE_SSI_SP:
+	case IMX_DMATYPE_MMC:
+	case IMX_DMATYPE_SDHC:
+	case IMX_DMATYPE_CSPI_SP:
+	case IMX_DMATYPE_ESAI:
+	case IMX_DMATYPE_MSHC_SP:
+		per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		break;
+	case IMX_DMATYPE_ASRC:
+		per_2_emi = sdma->script_addrs->asrc_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->asrc_2_mcu_addr;
+		per_2_per = sdma->script_addrs->per_2_per_addr;
+		break;
+	case IMX_DMATYPE_MSHC:
+		per_2_emi = sdma->script_addrs->mshc_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_mshc_addr;
+		break;
+	case IMX_DMATYPE_CCM:
+		per_2_emi = sdma->script_addrs->dptc_dvfs_addr;
+		break;
+	case IMX_DMATYPE_SPDIF:
+		per_2_emi = sdma->script_addrs->spdif_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_spdif_addr;
+		break;
+	case IMX_DMATYPE_IPU_MEMORY:
+		emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr;
+		break;
+	default:
+		break;
+	}
+
+	sdmac->pc_from_device = per_2_emi;
+	sdmac->pc_to_device = emi_2_per;
+}
+
+static int sdma_load_context(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int load_address;
+	struct sdma_context_data *context = sdma->context;
+	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
+	int ret;
+
+	if (sdmac->direction == DMA_FROM_DEVICE) {
+		load_address = sdmac->pc_from_device;
+	} else {
+		load_address = sdmac->pc_to_device;
+	}
+
+	if (load_address < 0)
+		return load_address;
+
+	dev_dbg(sdma->dev, "load_address = %d\n", load_address);
+	dev_dbg(sdma->dev, "wml = 0x%08x\n", sdmac->watermark_level);
+	dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr);
+	dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr);
+	dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", sdmac->event_mask0);
+	dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", sdmac->event_mask1);
+
+	memset(context, 0, sizeof(*context));
+	context->channel_state.pc = load_address;
+
+	/* Send by context the event mask,base address for peripheral
+	 * and watermark level
+	 */
+	context->gReg[0] = sdmac->event_mask1;
+	context->gReg[1] = sdmac->event_mask0;
+	context->gReg[2] = sdmac->per_addr;
+	context->gReg[6] = sdmac->shp_addr;
+	context->gReg[7] = sdmac->watermark_level;
+
+	bd0->mode.command = C0_SETDM;
+	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+	bd0->mode.count = sizeof(*context) / 4;
+	bd0->buffer_addr = sdma->context_phys;
+	bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;
+
+	ret = sdma_run_channel(&sdma->channel[0]);
+
+	return ret;
+}
+
+static void sdma_disable_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	__raw_writel(1 << channel, sdma->regs + SDMA_H_STATSTOP);
+	sdmac->status = DMA_ERROR;
+}
+
+static int sdma_config_channel(struct sdma_channel *sdmac)
+{
+	int ret;
+
+	sdma_disable_channel(sdmac);
+
+	sdmac->event_mask0 = 0;
+	sdmac->event_mask1 = 0;
+	sdmac->shp_addr = 0;
+	sdmac->per_addr = 0;
+
+	if (sdmac->event_id0) {
+		if (sdmac->event_id0 > 32)
+			return -EINVAL;
+		sdma_event_enable(sdmac, sdmac->event_id0);
+	}
+
+	switch (sdmac->peripheral_type) {
+	case IMX_DMATYPE_DSP:
+		sdma_config_ownership(sdmac, false, true, true);
+		break;
+	case IMX_DMATYPE_MEMORY:
+		sdma_config_ownership(sdmac, false, true, false);
+		break;
+	default:
+		sdma_config_ownership(sdmac, true, true, false);
+		break;
+	}
+
+	sdma_get_pc(sdmac, sdmac->peripheral_type);
+
+	if ((sdmac->peripheral_type != IMX_DMATYPE_MEMORY) &&
+			(sdmac->peripheral_type != IMX_DMATYPE_DSP)) {
+		/* Handle multiple event channels differently */
+		if (sdmac->event_id1) {
+			sdmac->event_mask1 = 1 << (sdmac->event_id1 % 32);
+			if (sdmac->event_id1 > 31)
+				sdmac->watermark_level |= 1 << 31;
+			sdmac->event_mask0 = 1 << (sdmac->event_id0 % 32);
+			if (sdmac->event_id0 > 31)
+				sdmac->watermark_level |= 1 << 30;
+		} else {
+			sdmac->event_mask0 = 1 << sdmac->event_id0;
+			sdmac->event_mask1 = 1 << (sdmac->event_id0 - 32);
+		}
+		/* Watermark Level */
+		sdmac->watermark_level |= sdmac->watermark_level;
+		/* Address */
+		sdmac->shp_addr = sdmac->per_address;
+	} else {
+		sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */
+	}
+
+	ret = sdma_load_context(sdmac);
+
+	return ret;
+}
+
+static int sdma_set_channel_priority(struct sdma_channel *sdmac,
+		unsigned int priority)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	if (priority < MXC_SDMA_MIN_PRIORITY
+	    || priority > MXC_SDMA_MAX_PRIORITY) {
+		return -EINVAL;
+	}
+
+	__raw_writel(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
+
+	return 0;
+}
+
+static int sdma_request_channel(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int ret = -EBUSY;
+
+	sdmac->bd = dma_alloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, GFP_KERNEL);
+	if (!sdmac->bd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memset(sdmac->bd, 0, PAGE_SIZE);
+
+	sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	clk_enable(sdma->clk);
+
+	sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY);
+
+	init_completion(&sdmac->done);
+
+	sdmac->buf_tail = 0;
+
+	return 0;
+out:
+
+	return ret;
+}
+
+static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
+{
+	__raw_writel(1 << channel, sdma->regs + SDMA_H_START);
+}
+
+static dma_cookie_t sdma_assign_cookie(struct sdma_channel *sdma)
+{
+	dma_cookie_t cookie = sdma->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	sdma->chan.cookie = cookie;
+	sdma->desc.cookie = cookie;
+
+	return cookie;
+}
+
+static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sdma_channel, chan);
+}
+
+static dma_cookie_t sdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(tx->chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	dma_cookie_t cookie;
+
+	spin_lock_irq(&sdmac->lock);
+
+	cookie = sdma_assign_cookie(sdmac);
+
+	sdma_enable_channel(sdma, tx->chan->chan_id);
+
+	spin_unlock_irq(&sdmac->lock);
+
+	return cookie;
+}
+
+static int sdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct imx_dma_data *data = chan->private;
+	int prio, ret;
+
+	/* No need to execute this for internal channel 0 */
+	if (chan->chan_id == 0)
+		return 0;
+
+	if (!data)
+		return -EINVAL;
+
+	switch (data->priority) {
+	case DMA_PRIO_HIGH:
+		prio = 3;
+		break;
+	case DMA_PRIO_MEDIUM:
+		prio = 2;
+		break;
+	case DMA_PRIO_LOW:
+	default:
+		prio = 1;
+		break;
+	}
+
+	sdmac->peripheral_type = data->peripheral_type;
+	sdmac->event_id0 = data->dma_request;
+	ret = sdma_set_channel_priority(sdmac, prio);
+	if (ret)
+		return ret;
+
+	ret = sdma_request_channel(sdmac);
+	if (ret)
+		return ret;
+
+	dma_async_tx_descriptor_init(&sdmac->desc, chan);
+	sdmac->desc.tx_submit = sdma_tx_submit;
+	/* txd.flags will be overwritten in prep funcs */
+	sdmac->desc.flags = DMA_CTRL_ACK;
+
+	return 0;
+}
+
+static void sdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	sdma_disable_channel(sdmac);
+
+	if (sdmac->event_id0)
+		sdma_event_disable(sdmac, sdmac->event_id0);
+	if (sdmac->event_id1)
+		sdma_event_disable(sdmac, sdmac->event_id1);
+
+	sdmac->event_id0 = 0;
+	sdmac->event_id1 = 0;
+
+	sdma_set_channel_priority(sdmac, 0);
+
+	dma_free_coherent(NULL, PAGE_SIZE, sdmac->bd, sdmac->bd_phys);
+
+	clk_disable(sdma->clk);
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int ret, i, count;
+	int channel = chan->chan_id;
+	struct scatterlist *sg;
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+	sdmac->status = DMA_IN_PROGRESS;
+
+	sdmac->flags = 0;
+
+	dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n",
+			sg_len, channel);
+
+	sdmac->direction = direction;
+	ret = sdma_load_context(sdmac);
+	if (ret)
+		goto err_out;
+
+	if (sg_len > NUM_BD) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n",
+				channel, sg_len, NUM_BD);
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
+		int param;
+
+		bd->buffer_addr = sgl->dma_address;
+
+		count = sg->length;
+
+		if (count > 0xffff) {
+			dev_err(sdma->dev, "SDMA channel %d: maximum bytes for sg entry exceeded: %d > %d\n",
+					channel, count, 0xffff);
+			ret = -EINVAL;
+			goto err_out;
+		}
+
+		bd->mode.count = count;
+
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) {
+			ret =  -EINVAL;
+			goto err_out;
+		}
+		if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES)
+			bd->mode.command = 0;
+		else
+			bd->mode.command = sdmac->word_size;
+
+		param = BD_DONE | BD_EXTD | BD_CONT;
+
+		if (sdmac->flags & IMX_DMA_SG_LOOP) {
+			param |= BD_INTR;
+			if (i + 1 == sg_len)
+				param |= BD_WRAP;
+		}
+
+		if (i + 1 == sg_len)
+			param |= BD_INTR;
+
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
+				i, count, sg->dma_address,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+	}
+
+	sdmac->num_bd = sg_len;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	return &sdmac->desc;
+err_out:
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int num_periods = buf_len / period_len;
+	int channel = chan->chan_id;
+	int ret, i = 0, buf = 0;
+
+	dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel);
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	sdmac->status = DMA_IN_PROGRESS;
+
+	sdmac->flags |= IMX_DMA_SG_LOOP;
+	sdmac->direction = direction;
+	ret = sdma_load_context(sdmac);
+	if (ret)
+		goto err_out;
+
+	if (num_periods > NUM_BD) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n",
+				channel, num_periods, NUM_BD);
+		goto err_out;
+	}
+
+	if (period_len > 0xffff) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %d > %d\n",
+				channel, period_len, 0xffff);
+		goto err_out;
+	}
+
+	while (buf < buf_len) {
+		struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
+		int param;
+
+		bd->buffer_addr = dma_addr;
+
+		bd->mode.count = period_len;
+
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
+			goto err_out;
+		if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES)
+			bd->mode.command = 0;
+		else
+			bd->mode.command = sdmac->word_size;
+
+		param = BD_DONE | BD_EXTD | BD_CONT | BD_INTR;
+		if (i + 1 == num_periods)
+			param |= BD_WRAP;
+
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
+				i, period_len, dma_addr,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+
+		dma_addr += period_len;
+		buf += period_len;
+
+		i++;
+	}
+
+	sdmac->num_bd = num_periods;
+	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+
+	return &sdmac->desc;
+err_out:
+	sdmac->status = DMA_ERROR;
+	return NULL;
+}
+
+static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct dma_slave_config *dmaengine_cfg = (void *)arg;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		sdma_disable_channel(sdmac);
+		return 0;
+	case DMA_SLAVE_CONFIG:
+		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+			sdmac->per_address = dmaengine_cfg->src_addr;
+			sdmac->watermark_level = dmaengine_cfg->src_maxburst;
+			sdmac->word_size = dmaengine_cfg->src_addr_width;
+		} else {
+			sdmac->per_address = dmaengine_cfg->dst_addr;
+			sdmac->watermark_level = dmaengine_cfg->dst_maxburst;
+			sdmac->word_size = dmaengine_cfg->dst_addr_width;
+		}
+		return sdma_config_channel(sdmac);
+	default:
+		return -ENOSYS;
+	}
+
+	return -EINVAL;
+}
+
+static enum dma_status sdma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	dma_cookie_t last_used;
+	enum dma_status ret;
+
+	last_used = chan->cookie;
+
+	ret = dma_async_is_complete(cookie, sdmac->last_completed, last_used);
+	dma_set_tx_state(txstate, sdmac->last_completed, last_used, 0);
+
+	return ret;
+}
+
+static void sdma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * Nothing to do. We only have a single descriptor
+	 */
+}
+
+static int __init sdma_init(struct sdma_engine *sdma,
+		void *ram_code, int ram_code_size)
+{
+	int i, ret;
+	dma_addr_t ccb_phys;
+
+	switch (sdma->version) {
+	case 1:
+		sdma->num_events = 32;
+		break;
+	case 2:
+		sdma->num_events = 48;
+		break;
+	default:
+		dev_err(sdma->dev, "Unknown version %d. aborting\n", sdma->version);
+		return -ENODEV;
+	}
+
+	clk_enable(sdma->clk);
+
+	/* Be sure SDMA has not started yet */
+	__raw_writel(0, sdma->regs + SDMA_H_C0PTR);
+
+	sdma->channel_control = dma_alloc_coherent(NULL,
+			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) +
+			sizeof(struct sdma_context_data),
+			&ccb_phys, GFP_KERNEL);
+
+	if (!sdma->channel_control) {
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+
+	sdma->context = (void *)sdma->channel_control +
+		MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control);
+	sdma->context_phys = ccb_phys +
+		MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control);
+
+	/* Zero-out the CCB structures array just allocated */
+	memset(sdma->channel_control, 0,
+			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control));
+
+	/* disable all channels */
+	for (i = 0; i < sdma->num_events; i++)
+		__raw_writel(0, sdma->regs + chnenbl_ofs(sdma, i));
+
+	/* All channels have priority 0 */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++)
+		__raw_writel(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
+
+	ret = sdma_request_channel(&sdma->channel[0]);
+	if (ret)
+		goto err_dma_alloc;
+
+	sdma_config_ownership(&sdma->channel[0], false, true, false);
+
+	/* Set Command Channel (Channel Zero) */
+	__raw_writel(0x4050, sdma->regs + SDMA_CHN0ADDR);
+
+	/* Set bits of CONFIG register but with static context switching */
+	/* FIXME: Check whether to set ACR bit depending on clock ratios */
+	__raw_writel(0, sdma->regs + SDMA_H_CONFIG);
+
+	__raw_writel(ccb_phys, sdma->regs + SDMA_H_C0PTR);
+
+	/* download the RAM image for SDMA */
+	sdma_load_script(sdma, ram_code,
+			ram_code_size,
+			sdma->script_addrs->ram_code_start_addr);
+
+	/* Set bits of CONFIG register with given context switching mode */
+	__raw_writel(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
+
+	/* Initializes channel's priorities */
+	sdma_set_channel_priority(&sdma->channel[0], 7);
+
+	clk_disable(sdma->clk);
+
+	return 0;
+
+err_dma_alloc:
+	clk_disable(sdma->clk);
+	dev_err(sdma->dev, "initialisation failed with %d\n", ret);
+	return ret;
+}
+
+static int __init sdma_probe(struct platform_device *pdev)
+{
+	int ret;
+	const struct firmware *fw;
+	const struct sdma_firmware_header *header;
+	const struct sdma_script_start_addrs *addr;
+	int irq;
+	unsigned short *ram_code;
+	struct resource *iores;
+	struct sdma_platform_data *pdata = pdev->dev.platform_data;
+	char *fwname;
+	int i;
+	dma_cap_mask_t mask;
+	struct sdma_engine *sdma;
+
+	sdma = kzalloc(sizeof(*sdma), GFP_KERNEL);
+	if (!sdma)
+		return -ENOMEM;
+
+	sdma->dev = &pdev->dev;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(pdev, 0);
+	if (!iores || irq < 0 || !pdata) {
+		ret = -EINVAL;
+		goto err_irq;
+	}
+
+	if (!request_mem_region(iores->start, resource_size(iores), pdev->name)) {
+		ret = -EBUSY;
+		goto err_request_region;
+	}
+
+	sdma->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sdma->clk)) {
+		ret = PTR_ERR(sdma->clk);
+		goto err_clk;
+	}
+
+	sdma->regs = ioremap(iores->start, resource_size(iores));
+	if (!sdma->regs) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	ret = request_irq(irq, sdma_int_handler, 0, "sdma", sdma);
+	if (ret)
+		goto err_request_irq;
+
+	fwname = kasprintf(GFP_KERNEL, "sdma-%s-to%d.bin",
+			pdata->cpu_name, pdata->to_version);
+	if (!fwname) {
+		ret = -ENOMEM;
+		goto err_cputype;
+	}
+
+	ret = request_firmware(&fw, fwname, &pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev, "request firmware \"%s\" failed with %d\n",
+				fwname, ret);
+		kfree(fwname);
+		goto err_cputype;
+	}
+	kfree(fwname);
+
+	if (fw->size < sizeof(*header))
+		goto err_firmware;
+
+	header = (struct sdma_firmware_header *)fw->data;
+
+	if (header->magic != SDMA_FIRMWARE_MAGIC)
+		goto err_firmware;
+	if (header->ram_code_start + header->ram_code_size > fw->size)
+		goto err_firmware;
+
+	addr = (void *)header + header->script_addrs_start;
+	ram_code = (void *)header + header->ram_code_start;
+	sdma->script_addrs = kmalloc(sizeof(*addr), GFP_KERNEL);
+	if (!sdma->script_addrs)
+		goto err_firmware;
+	memcpy(sdma->script_addrs, addr, sizeof(*addr));
+
+	sdma->version = pdata->sdma_version;
+
+	INIT_LIST_HEAD(&sdma->dma_device.channels);
+	/* Initialize channel parameters */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct sdma_channel *sdmac = &sdma->channel[i];
+
+		sdmac->sdma = sdma;
+		spin_lock_init(&sdmac->lock);
+
+		dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask);
+		dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask);
+
+		sdmac->chan.device = &sdma->dma_device;
+		sdmac->chan.chan_id = i;
+		sdmac->channel = i;
+
+		/* Add the channel to the DMAC list */
+		list_add_tail(&sdmac->chan.device_node, &sdma->dma_device.channels);
+	}
+
+	ret = sdma_init(sdma, ram_code, header->ram_code_size);
+	if (ret)
+		goto err_init;
+
+	sdma->dma_device.dev = &pdev->dev;
+
+	sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources;
+	sdma->dma_device.device_free_chan_resources = sdma_free_chan_resources;
+	sdma->dma_device.device_tx_status = sdma_tx_status;
+	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
+	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
+	sdma->dma_device.device_control = sdma_control;
+	sdma->dma_device.device_issue_pending = sdma_issue_pending;
+
+	ret = dma_async_device_register(&sdma->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto err_init;
+	}
+
+	dev_info(&pdev->dev, "initialized (firmware %d.%d)\n",
+			header->version_major,
+			header->version_minor);
+
+	/* request channel 0. This is an internal control channel
+	 * to the SDMA engine and not available to clients.
+	 */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_request_channel(mask, NULL, NULL);
+
+	release_firmware(fw);
+
+	return 0;
+
+err_init:
+	kfree(sdma->script_addrs);
+err_firmware:
+	release_firmware(fw);
+err_cputype:
+	free_irq(irq, sdma);
+err_request_irq:
+	iounmap(sdma->regs);
+err_ioremap:
+	clk_put(sdma->clk);
+err_clk:
+	release_mem_region(iores->start, resource_size(iores));
+err_request_region:
+err_irq:
+	kfree(sdma);
+	return 0;
+}
+
+static int __exit sdma_remove(struct platform_device *pdev)
+{
+	return -EBUSY;
+}
+
+static struct platform_driver sdma_driver = {
+	.driver		= {
+		.name	= "imx-sdma",
+	},
+	.remove		= __exit_p(sdma_remove),
+};
+
+static int __init sdma_module_init(void)
+{
+	return platform_driver_probe(&sdma_driver, sdma_probe);
+}
+subsys_initcall(sdma_module_init);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX SDMA driver");
+MODULE_LICENSE("GPL");

From a86ee03ce6f279ebe581a7a8c0c4393eaeb789ee Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 30 Sep 2010 11:46:44 +0000
Subject: [PATCH 08/18] dma: add support for scatterlist to scatterlist copy

This adds support for scatterlist to scatterlist DMA transfers. A
similar interface is exposed by the fsldma driver (through the DMA_SLAVE
API) and by the ste_dma40 driver (through an exported function).

This patch paves the way for making this type of copy operation a part
of the generic DMAEngine API. Futher patches will add support in
individual drivers.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 2 ++
 include/linux/dmaengine.h | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9d31d5eb95c1..db403b8ccabd 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -690,6 +690,8 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
 		!device->device_prep_dma_interrupt);
+	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
+		!device->device_prep_dma_sg);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_prep_slave_sg);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index e2106495cc11..2c9ee98f6c77 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -64,6 +64,7 @@ enum dma_transaction_type {
 	DMA_PQ_VAL,
 	DMA_MEMSET,
 	DMA_INTERRUPT,
+	DMA_SG,
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
@@ -473,6 +474,11 @@ struct dma_device {
 		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_sg)(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags);
 
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,

From c14330417ef2050f4bf38ac20e125785fea14351 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 30 Sep 2010 11:46:45 +0000
Subject: [PATCH 09/18] fsldma: implement support for scatterlist to
 scatterlist copy

Now that the DMAEngine API has support for scatterlist to scatterlist
copy, implement support for the Freescale DMA controller.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/fsldma.c | 128 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 125 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index cea08bed9cf9..1ed29d10a5fa 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -38,6 +38,8 @@
 #include <asm/fsldma.h>
 #include "fsldma.h"
 
+static const char msg_ld_oom[] = "No free memory for link descriptor\n";
+
 static void dma_init(struct fsldma_chan *chan)
 {
 	/* Reset the channel */
@@ -499,7 +501,7 @@ fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
 
 	new = fsl_dma_alloc_descriptor(chan);
 	if (!new) {
-		dev_err(chan->dev, "No free memory for link descriptor\n");
+		dev_err(chan->dev, msg_ld_oom);
 		return NULL;
 	}
 
@@ -536,8 +538,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 		/* Allocate the link descriptor from DMA pool */
 		new = fsl_dma_alloc_descriptor(chan);
 		if (!new) {
-			dev_err(chan->dev,
-					"No free memory for link descriptor\n");
+			dev_err(chan->dev, msg_ld_oom);
 			goto fail;
 		}
 #ifdef FSL_DMA_LD_DEBUG
@@ -583,6 +584,125 @@ fail:
 	return NULL;
 }
 
+static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan,
+	struct scatterlist *dst_sg, unsigned int dst_nents,
+	struct scatterlist *src_sg, unsigned int src_nents,
+	unsigned long flags)
+{
+	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	size_t dst_avail, src_avail;
+	dma_addr_t dst, src;
+	size_t len;
+
+	/* basic sanity checks */
+	if (dst_nents == 0 || src_nents == 0)
+		return NULL;
+
+	if (dst_sg == NULL || src_sg == NULL)
+		return NULL;
+
+	/*
+	 * TODO: should we check that both scatterlists have the same
+	 * TODO: number of bytes in total? Is that really an error?
+	 */
+
+	/* get prepared for the loop */
+	dst_avail = sg_dma_len(dst_sg);
+	src_avail = sg_dma_len(src_sg);
+
+	/* run until we are out of scatterlist entries */
+	while (true) {
+
+		/* create the largest transaction possible */
+		len = min_t(size_t, src_avail, dst_avail);
+		len = min_t(size_t, len, FSL_DMA_BCR_MAX_CNT);
+		if (len == 0)
+			goto fetch;
+
+		dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail;
+		src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail;
+
+		/* allocate and populate the descriptor */
+		new = fsl_dma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, msg_ld_oom);
+			goto fail;
+		}
+#ifdef FSL_DMA_LD_DEBUG
+		dev_dbg(chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+		set_desc_cnt(chan, &new->hw, len);
+		set_desc_src(chan, &new->hw, src);
+		set_desc_dst(chan, &new->hw, dst);
+
+		if (!first)
+			first = new;
+		else
+			set_desc_next(chan, &prev->hw, new->async_tx.phys);
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+		prev = new;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+
+		/* update metadata */
+		dst_avail -= len;
+		src_avail -= len;
+
+fetch:
+		/* fetch the next dst scatterlist entry */
+		if (dst_avail == 0) {
+
+			/* no more entries: we're done */
+			if (dst_nents == 0)
+				break;
+
+			/* fetch the next entry: if there are no more: done */
+			dst_sg = sg_next(dst_sg);
+			if (dst_sg == NULL)
+				break;
+
+			dst_nents--;
+			dst_avail = sg_dma_len(dst_sg);
+		}
+
+		/* fetch the next src scatterlist entry */
+		if (src_avail == 0) {
+
+			/* no more entries: we're done */
+			if (src_nents == 0)
+				break;
+
+			/* fetch the next entry: if there are no more: done */
+			src_sg = sg_next(src_sg);
+			if (src_sg == NULL)
+				break;
+
+			src_nents--;
+			src_avail = sg_dma_len(src_sg);
+		}
+	}
+
+	new->async_tx.flags = flags; /* client is in control of this ack */
+	new->async_tx.cookie = -EBUSY;
+
+	/* Set End-of-link to the last link descriptor of new list */
+	set_ld_eol(chan, new);
+
+	return &first->async_tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	fsldma_free_desc_list_reverse(chan, &first->tx_list);
+	return NULL;
+}
+
 /**
  * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
  * @chan: DMA channel
@@ -1327,11 +1447,13 @@ static int __devinit fsldma_of_probe(struct platform_device *op,
 
 	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+	dma_cap_set(DMA_SG, fdev->common.cap_mask);
 	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
 	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
 	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
 	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
 	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+	fdev->common.device_prep_dma_sg = fsl_dma_prep_sg;
 	fdev->common.device_tx_status = fsl_tx_status;
 	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
 	fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;

From 968f19ae802fdc6b6b6b5af6fe79cf23d281be0f Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 30 Sep 2010 11:46:46 +0000
Subject: [PATCH 10/18] fsldma: improved DMA_SLAVE support

Now that the generic DMAEngine API has support for scatterlist to
scatterlist copying, the device_prep_slave_sg() portion of the
DMA_SLAVE API is no longer necessary and has been removed.

However, the device_control() portion of the DMA_SLAVE API is still
useful to control device specific parameters, such as externally
controlled DMA transfers and maximum burst length.

A special dma_ctrl_cmd has been added to enable externally controlled
DMA transfers. This is currently specific to the Freescale DMA
controller, but can easily be made generic when another user is found.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/include/asm/fsldma.h | 137 ------------------
 drivers/dma/fsldma.c              | 226 ++++++------------------------
 include/linux/dmaengine.h         |   3 +
 3 files changed, 47 insertions(+), 319 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/fsldma.h

diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h
deleted file mode 100644
index debc5ed96d6e..000000000000
--- a/arch/powerpc/include/asm/fsldma.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Freescale MPC83XX / MPC85XX DMA Controller
- *
- * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
-#define __ARCH_POWERPC_ASM_FSLDMA_H__
-
-#include <linux/slab.h>
-#include <linux/dmaengine.h>
-
-/*
- * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
- *
- * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
- * transfers. An example usage would be an accelerated copy between two
- * scatterlists. Another example use would be an accelerated copy from
- * multiple non-contiguous device buffers into a single scatterlist.
- *
- * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
- * structure contains a list of hardware addresses that should be copied
- * to/from the scatterlist passed into device_prep_slave_sg(). The structure
- * also has some fields to enable hardware-specific features.
- */
-
-/**
- * struct fsl_dma_hw_addr
- * @entry: linked list entry
- * @address: the hardware address
- * @length: length to transfer
- *
- * Holds a single physical hardware address / length pair for use
- * with the DMAEngine DMA_SLAVE API.
- */
-struct fsl_dma_hw_addr {
-	struct list_head entry;
-
-	dma_addr_t address;
-	size_t length;
-};
-
-/**
- * struct fsl_dma_slave
- * @addresses: a linked list of struct fsl_dma_hw_addr structures
- * @request_count: value for DMA request count
- * @src_loop_size: setup and enable constant source-address DMA transfers
- * @dst_loop_size: setup and enable constant destination address DMA transfers
- * @external_start: enable externally started DMA transfers
- * @external_pause: enable externally paused DMA transfers
- *
- * Holds a list of address / length pairs for use with the DMAEngine
- * DMA_SLAVE API implementation for the Freescale DMA controller.
- */
-struct fsl_dma_slave {
-
-	/* List of hardware address/length pairs */
-	struct list_head addresses;
-
-	/* Support for extra controller features */
-	unsigned int request_count;
-	unsigned int src_loop_size;
-	unsigned int dst_loop_size;
-	bool external_start;
-	bool external_pause;
-};
-
-/**
- * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
- * @slave: the &struct fsl_dma_slave to add to
- * @address: the hardware address to add
- * @length: the length of bytes to transfer from @address
- *
- * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
- * success, -ERRNO otherwise.
- */
-static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
-				       dma_addr_t address, size_t length)
-{
-	struct fsl_dma_hw_addr *addr;
-
-	addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
-	if (!addr)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&addr->entry);
-	addr->address = address;
-	addr->length = length;
-
-	list_add_tail(&addr->entry, &slave->addresses);
-	return 0;
-}
-
-/**
- * fsl_dma_slave_free - free a struct fsl_dma_slave
- * @slave: the struct fsl_dma_slave to free
- *
- * Free a struct fsl_dma_slave and all associated address/length pairs
- */
-static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
-{
-	struct fsl_dma_hw_addr *addr, *tmp;
-
-	if (slave) {
-		list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
-			list_del(&addr->entry);
-			kfree(addr);
-		}
-
-		kfree(slave);
-	}
-}
-
-/**
- * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
- * @gfp: the flags to pass to kmalloc when allocating this structure
- *
- * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
- * struct fsl_dma_slave on success, or NULL on failure.
- */
-static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
-{
-	struct fsl_dma_slave *slave;
-
-	slave = kzalloc(sizeof(*slave), gfp);
-	if (!slave)
-		return NULL;
-
-	INIT_LIST_HEAD(&slave->addresses);
-	return slave;
-}
-
-#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 1ed29d10a5fa..286c3ac6bdcc 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -35,7 +35,6 @@
 #include <linux/dmapool.h>
 #include <linux/of_platform.h>
 
-#include <asm/fsldma.h>
 #include "fsldma.h"
 
 static const char msg_ld_oom[] = "No free memory for link descriptor\n";
@@ -719,207 +718,70 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
 	enum dma_data_direction direction, unsigned long flags)
 {
-	struct fsldma_chan *chan;
-	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
-	struct fsl_dma_slave *slave;
-	size_t copy;
-
-	int i;
-	struct scatterlist *sg;
-	size_t sg_used;
-	size_t hw_used;
-	struct fsl_dma_hw_addr *hw;
-	dma_addr_t dma_dst, dma_src;
-
-	if (!dchan)
-		return NULL;
-
-	if (!dchan->private)
-		return NULL;
-
-	chan = to_fsl_chan(dchan);
-	slave = dchan->private;
-
-	if (list_empty(&slave->addresses))
-		return NULL;
-
-	hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
-	hw_used = 0;
-
 	/*
-	 * Build the hardware transaction to copy from the scatterlist to
-	 * the hardware, or from the hardware to the scatterlist
+	 * This operation is not supported on the Freescale DMA controller
 	 *
-	 * If you are copying from the hardware to the scatterlist and it
-	 * takes two hardware entries to fill an entire page, then both
-	 * hardware entries will be coalesced into the same page
-	 *
-	 * If you are copying from the scatterlist to the hardware and a
-	 * single page can fill two hardware entries, then the data will
-	 * be read out of the page into the first hardware entry, and so on
+	 * However, we need to provide the function pointer to allow the
+	 * device_control() method to work.
 	 */
-	for_each_sg(sgl, sg, sg_len, i) {
-		sg_used = 0;
-
-		/* Loop until the entire scatterlist entry is used */
-		while (sg_used < sg_dma_len(sg)) {
-
-			/*
-			 * If we've used up the current hardware address/length
-			 * pair, we need to load a new one
-			 *
-			 * This is done in a while loop so that descriptors with
-			 * length == 0 will be skipped
-			 */
-			while (hw_used >= hw->length) {
-
-				/*
-				 * If the current hardware entry is the last
-				 * entry in the list, we're finished
-				 */
-				if (list_is_last(&hw->entry, &slave->addresses))
-					goto finished;
-
-				/* Get the next hardware address/length pair */
-				hw = list_entry(hw->entry.next,
-						struct fsl_dma_hw_addr, entry);
-				hw_used = 0;
-			}
-
-			/* Allocate the link descriptor from DMA pool */
-			new = fsl_dma_alloc_descriptor(chan);
-			if (!new) {
-				dev_err(chan->dev, "No free memory for "
-						       "link descriptor\n");
-				goto fail;
-			}
-#ifdef FSL_DMA_LD_DEBUG
-			dev_dbg(chan->dev, "new link desc alloc %p\n", new);
-#endif
-
-			/*
-			 * Calculate the maximum number of bytes to transfer,
-			 * making sure it is less than the DMA controller limit
-			 */
-			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
-					     hw->length - hw_used);
-			copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
-
-			/*
-			 * DMA_FROM_DEVICE
-			 * from the hardware to the scatterlist
-			 *
-			 * DMA_TO_DEVICE
-			 * from the scatterlist to the hardware
-			 */
-			if (direction == DMA_FROM_DEVICE) {
-				dma_src = hw->address + hw_used;
-				dma_dst = sg_dma_address(sg) + sg_used;
-			} else {
-				dma_src = sg_dma_address(sg) + sg_used;
-				dma_dst = hw->address + hw_used;
-			}
-
-			/* Fill in the descriptor */
-			set_desc_cnt(chan, &new->hw, copy);
-			set_desc_src(chan, &new->hw, dma_src);
-			set_desc_dst(chan, &new->hw, dma_dst);
-
-			/*
-			 * If this is not the first descriptor, chain the
-			 * current descriptor after the previous descriptor
-			 */
-			if (!first) {
-				first = new;
-			} else {
-				set_desc_next(chan, &prev->hw,
-					      new->async_tx.phys);
-			}
-
-			new->async_tx.cookie = 0;
-			async_tx_ack(&new->async_tx);
-
-			prev = new;
-			sg_used += copy;
-			hw_used += copy;
-
-			/* Insert the link descriptor into the LD ring */
-			list_add_tail(&new->node, &first->tx_list);
-		}
-	}
-
-finished:
-
-	/* All of the hardware address/length pairs had length == 0 */
-	if (!first || !new)
-		return NULL;
-
-	new->async_tx.flags = flags;
-	new->async_tx.cookie = -EBUSY;
-
-	/* Set End-of-link to the last link descriptor of new list */
-	set_ld_eol(chan, new);
-
-	/* Enable extra controller features */
-	if (chan->set_src_loop_size)
-		chan->set_src_loop_size(chan, slave->src_loop_size);
-
-	if (chan->set_dst_loop_size)
-		chan->set_dst_loop_size(chan, slave->dst_loop_size);
-
-	if (chan->toggle_ext_start)
-		chan->toggle_ext_start(chan, slave->external_start);
-
-	if (chan->toggle_ext_pause)
-		chan->toggle_ext_pause(chan, slave->external_pause);
-
-	if (chan->set_request_count)
-		chan->set_request_count(chan, slave->request_count);
-
-	return &first->async_tx;
-
-fail:
-	/* If first was not set, then we failed to allocate the very first
-	 * descriptor, and we're done */
-	if (!first)
-		return NULL;
-
-	/*
-	 * First is set, so all of the descriptors we allocated have been added
-	 * to first->tx_list, INCLUDING "first" itself. Therefore we
-	 * must traverse the list backwards freeing each descriptor in turn
-	 *
-	 * We're re-using variables for the loop, oh well
-	 */
-	fsldma_free_desc_list_reverse(chan, &first->tx_list);
 	return NULL;
 }
 
 static int fsl_dma_device_control(struct dma_chan *dchan,
 				  enum dma_ctrl_cmd cmd, unsigned long arg)
 {
+	struct dma_slave_config *config;
 	struct fsldma_chan *chan;
 	unsigned long flags;
-
-	/* Only supports DMA_TERMINATE_ALL */
-	if (cmd != DMA_TERMINATE_ALL)
-		return -ENXIO;
+	int size;
 
 	if (!dchan)
 		return -EINVAL;
 
 	chan = to_fsl_chan(dchan);
 
-	/* Halt the DMA engine */
-	dma_halt(chan);
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		/* Halt the DMA engine */
+		dma_halt(chan);
 
-	spin_lock_irqsave(&chan->desc_lock, flags);
+		spin_lock_irqsave(&chan->desc_lock, flags);
 
-	/* Remove and free all of the descriptors in the LD queue */
-	fsldma_free_desc_list(chan, &chan->ld_pending);
-	fsldma_free_desc_list(chan, &chan->ld_running);
+		/* Remove and free all of the descriptors in the LD queue */
+		fsldma_free_desc_list(chan, &chan->ld_pending);
+		fsldma_free_desc_list(chan, &chan->ld_running);
 
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		return 0;
+
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+
+		/* make sure the channel supports setting burst size */
+		if (!chan->set_request_count)
+			return -ENXIO;
+
+		/* we set the controller burst size depending on direction */
+		if (config->direction == DMA_TO_DEVICE)
+			size = config->dst_addr_width * config->dst_maxburst;
+		else
+			size = config->src_addr_width * config->src_maxburst;
+
+		chan->set_request_count(chan, size);
+		return 0;
+
+	case FSLDMA_EXTERNAL_START:
+
+		/* make sure the channel supports external start */
+		if (!chan->toggle_ext_start)
+			return -ENXIO;
+
+		chan->toggle_ext_start(chan, arg);
+		return 0;
+
+	default:
+		return -ENXIO;
+	}
 
 	return 0;
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2c9ee98f6c77..885f35211675 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -120,12 +120,15 @@ enum dma_ctrl_flags {
  * configuration data in statically from the platform). An additional
  * argument of struct dma_slave_config must be passed in with this
  * command.
+ * @FSLDMA_EXTERNAL_START: this command will put the Freescale DMA controller
+ * into external start mode.
  */
 enum dma_ctrl_cmd {
 	DMA_TERMINATE_ALL,
 	DMA_PAUSE,
 	DMA_RESUME,
 	DMA_SLAVE_CONFIG,
+	FSLDMA_EXTERNAL_START,
 };
 
 /**

From 0d688662aab9d80078be82aa5aea561346643298 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 30 Sep 2010 11:46:47 +0000
Subject: [PATCH 11/18] ste_dma40: implement support for scatterlist to
 scatterlist copy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that the DMAEngine API has support for scatterlist to scatterlist
copy, implement support for the STE DMA40 DMA controller.

Cc: Linus Walleij <linus.ml.walleij@gmail.com>
Acked-by: Per Fridén <per.friden@stericsson.com>
Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ste_dma40.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 17e2600a00cf..d5fd098e22e8 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1857,6 +1857,18 @@ err:
 	return NULL;
 }
 
+static struct dma_async_tx_descriptor *
+d40_prep_sg(struct dma_chan *chan,
+	    struct scatterlist *dst_sg, unsigned int dst_nents,
+	    struct scatterlist *src_sg, unsigned int src_nents,
+	    unsigned long dma_flags)
+{
+	if (dst_nents != src_nents)
+		return NULL;
+
+	return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags);
+}
+
 static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 				 struct d40_chan *d40c,
 				 struct scatterlist *sgl,
@@ -2281,6 +2293,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_slave.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_slave.device_tx_status = d40_tx_status;
 	base->dma_slave.device_issue_pending = d40_issue_pending;
@@ -2301,10 +2314,12 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 
 	dma_cap_zero(base->dma_memcpy.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
 
 	base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_memcpy.device_tx_status = d40_tx_status;
 	base->dma_memcpy.device_issue_pending = d40_issue_pending;
@@ -2331,10 +2346,12 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	dma_cap_zero(base->dma_both.cap_mask);
 	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
+	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
 
 	base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_both.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy;
+	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
 	base->dma_both.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_both.device_tx_status = d40_tx_status;
 	base->dma_both.device_issue_pending = d40_issue_pending;

From 53a61badf47e674fb43d73cd22f0f8065098ddf6 Mon Sep 17 00:00:00 2001
From: "Koul, Vinod" <vinod.koul@intel.com>
Date: Mon, 4 Oct 2010 10:42:40 +0000
Subject: [PATCH 12/18] intel_mid_dma: Add runtime PM support

This patch adds runtime PM support in this dma driver
for 4 PCI Controllers
Whenever the driver is idle (no channels grabbed), it
can go to low power state
It also adds the PCI suspend and resume support

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/intel_mid_dma.c      | 123 +++++++++++++++++++++++++++++--
 drivers/dma/intel_mid_dma_regs.h |  14 +++-
 2 files changed, 129 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index c2591e8d9b6e..373396c462a0 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -25,6 +25,7 @@
  */
 #include <linux/pci.h>
 #include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
 #include <linux/intel_mid_dma.h>
 
 #define MAX_CHAN	4 /*max ch across controllers*/
@@ -247,13 +248,13 @@ static void midc_dostart(struct intel_mid_dma_chan *midc,
 	struct middma_device *mid = to_middma_device(midc->chan.device);
 
 	/*  channel is idle */
-	if (midc->in_use && test_ch_en(midc->dma_base, midc->ch_id)) {
+	if (midc->busy && test_ch_en(midc->dma_base, midc->ch_id)) {
 		/*error*/
 		pr_err("ERR_MDMA: channel is busy in start\n");
 		/* The tasklet will hopefully advance the queue... */
 		return;
 	}
-
+	midc->busy = true;
 	/*write registers and en*/
 	iowrite32(first->sar, midc->ch_regs + SAR);
 	iowrite32(first->dar, midc->ch_regs + DAR);
@@ -290,7 +291,7 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
 	param_txd = txd->callback_param;
 
 	list_move(&desc->desc_node, &midc->free_list);
-
+	midc->busy = false;
 	spin_unlock_bh(&midc->lock);
 	if (callback_txd) {
 		pr_debug("MDMA: TXD callback set ... calling\n");
@@ -434,7 +435,7 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 		return -ENXIO;
 
 	spin_lock_bh(&midc->lock);
-	if (midc->in_use == false) {
+	if (midc->busy == false) {
 		spin_unlock_bh(&midc->lock);
 		return 0;
 	}
@@ -618,11 +619,11 @@ static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
 	struct middma_device	*mid = to_middma_device(chan->device);
 	struct intel_mid_dma_desc	*desc, *_desc;
 
-	if (true == midc->in_use) {
+	if (true == midc->busy) {
 		/*trying to free ch in use!!!!!*/
 		pr_err("ERR_MDMA: trying to free ch in use\n");
 	}
-
+	pm_runtime_put(&mid->pdev->dev);
 	spin_lock_bh(&midc->lock);
 	midc->descs_allocated = 0;
 	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
@@ -639,6 +640,7 @@ static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
 	}
 	spin_unlock_bh(&midc->lock);
 	midc->in_use = false;
+	midc->busy = false;
 	/* Disable CH interrupts */
 	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK);
 	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR);
@@ -659,11 +661,20 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
 	dma_addr_t		phys;
 	int	i = 0;
 
+	pm_runtime_get_sync(&mid->pdev->dev);
+
+	if (mid->state == SUSPENDED) {
+		if (dma_resume(mid->pdev)) {
+			pr_err("ERR_MDMA: resume failed");
+			return -EFAULT;
+		}
+	}
 
 	/* ASSERT:  channel is idle */
 	if (test_ch_en(mid->dma_base, midc->ch_id)) {
 		/*ch is not idle*/
 		pr_err("ERR_MDMA: ch not idle\n");
+		pm_runtime_put(&mid->pdev->dev);
 		return -EIO;
 	}
 	midc->completed = chan->cookie = 1;
@@ -674,6 +685,7 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
 		desc = pci_pool_alloc(mid->dma_pool, GFP_KERNEL, &phys);
 		if (!desc) {
 			pr_err("ERR_MDMA: desc failed\n");
+			pm_runtime_put(&mid->pdev->dev);
 			return -ENOMEM;
 			/*check*/
 		}
@@ -686,7 +698,8 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
 		list_add_tail(&desc->desc_node, &midc->free_list);
 	}
 	spin_unlock_bh(&midc->lock);
-	midc->in_use = false;
+	midc->in_use = true;
+	midc->busy = false;
 	pr_debug("MID_DMA: Desc alloc done ret: %d desc\n", i);
 	return i;
 }
@@ -884,6 +897,7 @@ static int mid_setup_dma(struct pci_dev *pdev)
 	pr_debug("MDMA:Adding %d channel for this controller\n", dma->max_chan);
 	/*init CH structures*/
 	dma->intr_mask = 0;
+	dma->state = RUNNING;
 	for (i = 0; i < dma->max_chan; i++) {
 		struct intel_mid_dma_chan *midch = &dma->ch[i];
 
@@ -1070,6 +1084,9 @@ static int __devinit intel_mid_dma_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_dma;
 
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_allow(&pdev->dev);
 	return 0;
 
 err_dma:
@@ -1104,6 +1121,85 @@ static void __devexit intel_mid_dma_remove(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 }
 
+/* Power Management */
+/*
+* dma_suspend - PCI suspend function
+*
+* @pci: PCI device structure
+* @state: PM message
+*
+* This function is called by OS when a power event occurs
+*/
+int dma_suspend(struct pci_dev *pci, pm_message_t state)
+{
+	int i;
+	struct middma_device *device = pci_get_drvdata(pci);
+	pr_debug("MDMA: dma_suspend called\n");
+
+	for (i = 0; i < device->max_chan; i++) {
+		if (device->ch[i].in_use)
+			return -EAGAIN;
+	}
+	device->state = SUSPENDED;
+	pci_set_drvdata(pci, device);
+	pci_save_state(pci);
+	pci_disable_device(pci);
+	pci_set_power_state(pci, PCI_D3hot);
+	return 0;
+}
+
+/**
+* dma_resume - PCI resume function
+*
+* @pci:	PCI device structure
+*
+* This function is called by OS when a power event occurs
+*/
+int dma_resume(struct pci_dev *pci)
+{
+	int ret;
+	struct middma_device *device = pci_get_drvdata(pci);
+
+	pr_debug("MDMA: dma_resume called\n");
+	pci_set_power_state(pci, PCI_D0);
+	pci_restore_state(pci);
+	ret = pci_enable_device(pci);
+	if (ret) {
+		pr_err("MDMA: device cant be enabled for %x\n", pci->device);
+		return ret;
+	}
+	device->state = RUNNING;
+	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
+	pci_set_drvdata(pci, device);
+	return 0;
+}
+
+static int dma_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	return dma_suspend(pci_dev, PMSG_SUSPEND);
+}
+
+static int dma_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	return dma_resume(pci_dev);
+}
+
+static int dma_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct middma_device *device = pci_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < device->max_chan; i++) {
+		if (device->ch[i].in_use)
+			return -EAGAIN;
+	}
+
+	return pm_schedule_suspend(dev, 0);
+}
+
 /******************************************************************************
 * PCI stuff
 */
@@ -1116,11 +1212,24 @@ static struct pci_device_id intel_mid_dma_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, intel_mid_dma_ids);
 
+static const struct dev_pm_ops intel_mid_dma_pm = {
+	.runtime_suspend = dma_runtime_suspend,
+	.runtime_resume = dma_runtime_resume,
+	.runtime_idle = dma_runtime_idle,
+};
+
 static struct pci_driver intel_mid_dma_pci = {
 	.name		=	"Intel MID DMA",
 	.id_table	=	intel_mid_dma_ids,
 	.probe		=	intel_mid_dma_probe,
 	.remove		=	__devexit_p(intel_mid_dma_remove),
+#ifdef CONFIG_PM
+	.suspend = dma_suspend,
+	.resume = dma_resume,
+	.driver = {
+		.pm = &intel_mid_dma_pm,
+	},
+#endif
 };
 
 static int __init intel_mid_dma_init(void)
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index d81aa658ab09..a12dd2572dc3 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -29,7 +29,7 @@
 #include <linux/dmapool.h>
 #include <linux/pci_ids.h>
 
-#define INTEL_MID_DMA_DRIVER_VERSION "1.0.5"
+#define INTEL_MID_DMA_DRIVER_VERSION "1.0.6"
 
 #define	REG_BIT0		0x00000001
 #define	REG_BIT8		0x00000100
@@ -152,6 +152,7 @@ union intel_mid_dma_cfg_hi {
 	u32	cfg_hi;
 };
 
+
 /**
  * struct intel_mid_dma_chan - internal mid representation of a DMA channel
  * @chan: dma_chan strcture represetation for mid chan
@@ -166,6 +167,7 @@ union intel_mid_dma_cfg_hi {
  * @slave: dma slave struture
  * @descs_allocated: total number of decsiptors allocated
  * @dma: dma device struture pointer
+ * @busy: bool representing if ch is busy (active txn) or not
  * @in_use: bool representing if ch is in use or not
  */
 struct intel_mid_dma_chan {
@@ -181,6 +183,7 @@ struct intel_mid_dma_chan {
 	struct intel_mid_dma_slave	*slave;
 	unsigned int		descs_allocated;
 	struct middma_device	*dma;
+	bool			busy;
 	bool			in_use;
 };
 
@@ -190,6 +193,10 @@ static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
 	return container_of(chan, struct intel_mid_dma_chan, chan);
 }
 
+enum intel_mid_dma_state {
+	RUNNING = 0,
+	SUSPENDED,
+};
 /**
  * struct middma_device - internal representation of a DMA device
  * @pdev: PCI device
@@ -205,6 +212,7 @@ static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
  * @max_chan: max number of chs supported (from drv_data)
  * @block_size: Block size of DMA transfer supported (from drv_data)
  * @pimr_mask: MMIO register addr for periphral interrupt (from drv_data)
+ * @state: dma PM device state
  */
 struct middma_device {
 	struct pci_dev		*pdev;
@@ -220,6 +228,7 @@ struct middma_device {
 	int			max_chan;
 	int			block_size;
 	unsigned int		pimr_mask;
+	enum intel_mid_dma_state state;
 };
 
 static inline struct middma_device *to_middma_device(struct dma_device *common)
@@ -257,4 +266,7 @@ static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc
 {
 	return container_of(txd, struct intel_mid_dma_desc, txd);
 }
+
+int dma_resume(struct pci_dev *pci);
+
 #endif /*__INTEL_MID_DMAC_REGS_H__*/

From b306df5e925bb584b2157f11f97c5eb20a13de4d Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@intel.com>
Date: Mon, 4 Oct 2010 10:37:02 +0000
Subject: [PATCH 13/18] intel_mid_dma: Allow IRQ sharing

intel_mid_dma driver allows interrupt sharing. Thus it needs
to check whether IRQ source is the DMA controller and return
the appropriate IRQ return.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/intel_mid_dma.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 373396c462a0..c4b81387f0c6 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -817,9 +817,14 @@ static void dma_tasklet2(unsigned long data)
 static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
 {
 	struct middma_device *mid = data;
-	u32 status;
+	u32 tfr_status, err_status;
 	int call_tasklet = 0;
 
+	tfr_status = ioread32(mid->dma_base + RAW_TFR);
+	err_status = ioread32(mid->dma_base + RAW_ERR);
+	if (!tfr_status && !err_status)
+		return IRQ_NONE;
+
 	/*DMA Interrupt*/
 	pr_debug("MDMA:Got an interrupt on irq %d\n", irq);
 	if (!mid) {
@@ -827,19 +832,17 @@ static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
 		return -EINVAL;
 	}
 
-	status = ioread32(mid->dma_base + RAW_TFR);
-	pr_debug("MDMA: Status %x, Mask %x\n", status, mid->intr_mask);
-	status &= mid->intr_mask;
-	if (status) {
+	pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask);
+	tfr_status &= mid->intr_mask;
+	if (tfr_status) {
 		/*need to disable intr*/
-		iowrite32((status << 8), mid->dma_base + MASK_TFR);
-		pr_debug("MDMA: Calling tasklet %x\n", status);
+		iowrite32((tfr_status << 8), mid->dma_base + MASK_TFR);
+		pr_debug("MDMA: Calling tasklet %x\n", tfr_status);
 		call_tasklet = 1;
 	}
-	status = ioread32(mid->dma_base + RAW_ERR);
-	status &= mid->intr_mask;
-	if (status) {
-		iowrite32(MASK_INTR_REG(status), mid->dma_base + MASK_ERR);
+	err_status &= mid->intr_mask;
+	if (err_status) {
+		iowrite32(MASK_INTR_REG(err_status), mid->dma_base + MASK_ERR);
 		call_tasklet = 1;
 	}
 	if (call_tasklet)

From 03b96dca010145f3896abcd443b7fddb9813a0e6 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@intel.com>
Date: Mon, 4 Oct 2010 10:37:27 +0000
Subject: [PATCH 14/18] intel_mid_dma: Allow DMAC2 to share interrupt

Allow DMAC2 to share interrupt since exclusive interrupt line
for mrst DMAC2 is not provided on other platforms.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/intel_mid_dma.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index c4b81387f0c6..3c4333ee1fb7 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -872,7 +872,6 @@ static int mid_setup_dma(struct pci_dev *pdev)
 {
 	struct middma_device *dma = pci_get_drvdata(pdev);
 	int err, i;
-	unsigned int irq_level;
 
 	/* DMA coherent memory pool for DMA descriptor allocations */
 	dma->dma_pool = pci_pool_create("intel_mid_dma_desc_pool", pdev,
@@ -960,7 +959,6 @@ static int mid_setup_dma(struct pci_dev *pdev)
 
 	/*register irq */
 	if (dma->pimr_mask) {
-		irq_level = IRQF_SHARED;
 		pr_debug("MDMA:Requesting irq shared for DMAC1\n");
 		err = request_irq(pdev->irq, intel_mid_dma_interrupt1,
 			IRQF_SHARED, "INTEL_MID_DMAC1", dma);
@@ -968,10 +966,9 @@ static int mid_setup_dma(struct pci_dev *pdev)
 			goto err_irq;
 	} else {
 		dma->intr_mask = 0x03;
-		irq_level = 0;
 		pr_debug("MDMA:Requesting irq for DMAC2\n");
 		err = request_irq(pdev->irq, intel_mid_dma_interrupt2,
-			0, "INTEL_MID_DMAC2", dma);
+			IRQF_SHARED, "INTEL_MID_DMAC2", dma);
 		if (0 != err)
 			goto err_irq;
 	}

From 576e3c394a6c427c9a1378ec88ef7eb97e731992 Mon Sep 17 00:00:00 2001
From: Ramesh Babu K V <ramesh.b.k.v@intel.com>
Date: Mon, 4 Oct 2010 10:37:53 +0000
Subject: [PATCH 15/18] intel_mid_dma: Add sg list support to DMA driver

For a very high speed DMA various periphral devices need
scatter-gather list support. The DMA hardware support link list items.
This list can be circular also (adding new flag DMA_PREP_CIRCULAR_LIST)
Right now this flag is in driver header and should be moved to
dmaengine header file eventually

Signed-off-by: Ramesh Babu K V <ramesh.b.k.v@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/intel_mid_dma.c      | 267 +++++++++++++++++++++++++------
 drivers/dma/intel_mid_dma_regs.h |  30 +++-
 include/linux/intel_mid_dma.h    |   3 +
 3 files changed, 250 insertions(+), 50 deletions(-)

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 3c4333ee1fb7..2ae1086b9481 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -258,6 +258,7 @@ static void midc_dostart(struct intel_mid_dma_chan *midc,
 	/*write registers and en*/
 	iowrite32(first->sar, midc->ch_regs + SAR);
 	iowrite32(first->dar, midc->ch_regs + DAR);
+	iowrite32(first->lli_phys, midc->ch_regs + LLP);
 	iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH);
 	iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW);
 	iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW);
@@ -265,9 +266,9 @@ static void midc_dostart(struct intel_mid_dma_chan *midc,
 	pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n",
 		(int)first->sar, (int)first->dar, first->cfg_hi,
 		first->cfg_lo, first->ctl_hi, first->ctl_lo);
+	first->status = DMA_IN_PROGRESS;
 
 	iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
-	first->status = DMA_IN_PROGRESS;
 }
 
 /**
@@ -284,20 +285,36 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
 {
 	struct dma_async_tx_descriptor	*txd = &desc->txd;
 	dma_async_tx_callback callback_txd = NULL;
+	struct intel_mid_dma_lli	*llitem;
 	void *param_txd = NULL;
 
 	midc->completed = txd->cookie;
 	callback_txd = txd->callback;
 	param_txd = txd->callback_param;
 
-	list_move(&desc->desc_node, &midc->free_list);
-	midc->busy = false;
+	if (desc->lli != NULL) {
+		/*clear the DONE bit of completed LLI in memory*/
+		llitem = desc->lli + desc->current_lli;
+		llitem->ctl_hi &= CLEAR_DONE;
+		if (desc->current_lli < desc->lli_length-1)
+			(desc->current_lli)++;
+		else
+			desc->current_lli = 0;
+	}
 	spin_unlock_bh(&midc->lock);
 	if (callback_txd) {
 		pr_debug("MDMA: TXD callback set ... calling\n");
 		callback_txd(param_txd);
-		spin_lock_bh(&midc->lock);
-		return;
+	}
+	if (midc->raw_tfr) {
+		desc->status = DMA_SUCCESS;
+		if (desc->lli != NULL) {
+			pci_pool_free(desc->lli_pool, desc->lli,
+						desc->lli_phys);
+			pci_pool_destroy(desc->lli_pool);
+		}
+		list_move(&desc->desc_node, &midc->free_list);
+		midc->busy = false;
 	}
 	spin_lock_bh(&midc->lock);
 
@@ -318,14 +335,89 @@ static void midc_scan_descriptors(struct middma_device *mid,
 
 	/*tx is complete*/
 	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-		if (desc->status == DMA_IN_PROGRESS)  {
-			desc->status = DMA_SUCCESS;
+		if (desc->status == DMA_IN_PROGRESS)
 			midc_descriptor_complete(midc, desc);
-		}
 	}
 	return;
-}
+	}
+/**
+ * midc_lli_fill_sg -		Helper function to convert
+ *				SG list to Linked List Items.
+ *@midc: Channel
+ *@desc: DMA descriptor
+ *@sglist: Pointer to SG list
+ *@sglen: SG list length
+ *@flags: DMA transaction flags
+ *
+ * Walk through the SG list and convert the SG list into Linked
+ * List Items (LLI).
+ */
+static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
+				struct intel_mid_dma_desc *desc,
+				struct scatterlist *sglist,
+				unsigned int sglen,
+				unsigned int flags)
+{
+	struct intel_mid_dma_slave *mids;
+	struct scatterlist  *sg;
+	dma_addr_t lli_next, sg_phy_addr;
+	struct intel_mid_dma_lli *lli_bloc_desc;
+	union intel_mid_dma_ctl_lo ctl_lo;
+	union intel_mid_dma_ctl_hi ctl_hi;
+	int i;
 
+	pr_debug("MDMA: Entered midc_lli_fill_sg\n");
+	mids = midc->chan.private;
+
+	lli_bloc_desc = desc->lli;
+	lli_next = desc->lli_phys;
+
+	ctl_lo.ctl_lo = desc->ctl_lo;
+	ctl_hi.ctl_hi = desc->ctl_hi;
+	for_each_sg(sglist, sg, sglen, i) {
+		/*Populate CTL_LOW and LLI values*/
+		if (i != sglen - 1) {
+			lli_next = lli_next +
+				sizeof(struct intel_mid_dma_lli);
+		} else {
+		/*Check for circular list, otherwise terminate LLI to ZERO*/
+			if (flags & DMA_PREP_CIRCULAR_LIST) {
+				pr_debug("MDMA: LLI is configured in circular mode\n");
+				lli_next = desc->lli_phys;
+			} else {
+				lli_next = 0;
+				ctl_lo.ctlx.llp_dst_en = 0;
+				ctl_lo.ctlx.llp_src_en = 0;
+			}
+		}
+		/*Populate CTL_HI values*/
+		ctl_hi.ctlx.block_ts = get_block_ts(sg->length,
+							desc->width,
+							midc->dma->block_size);
+		/*Populate SAR and DAR values*/
+		sg_phy_addr = sg_phys(sg);
+		if (desc->dirn ==  DMA_TO_DEVICE) {
+			lli_bloc_desc->sar  = sg_phy_addr;
+			lli_bloc_desc->dar  = mids->per_addr;
+		} else if (desc->dirn ==  DMA_FROM_DEVICE) {
+			lli_bloc_desc->sar  = mids->per_addr;
+			lli_bloc_desc->dar  = sg_phy_addr;
+		}
+		/*Copy values into block descriptor in system memroy*/
+		lli_bloc_desc->llp = lli_next;
+		lli_bloc_desc->ctl_lo = ctl_lo.ctl_lo;
+		lli_bloc_desc->ctl_hi = ctl_hi.ctl_hi;
+
+		lli_bloc_desc++;
+	}
+	/*Copy very first LLI values to descriptor*/
+	desc->ctl_lo = desc->lli->ctl_lo;
+	desc->ctl_hi = desc->lli->ctl_hi;
+	desc->sar = desc->lli->sar;
+	desc->dar = desc->lli->dar;
+
+	return 0;
+}
 /*****************************************************************************
 DMA engine callback Functions*/
 /**
@@ -350,12 +442,12 @@ static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	desc->txd.cookie = cookie;
 
 
-	if (list_empty(&midc->active_list)) {
-		midc_dostart(midc, desc);
+	if (list_empty(&midc->active_list))
 		list_add_tail(&desc->desc_node, &midc->active_list);
-	} else {
+	else
 		list_add_tail(&desc->desc_node, &midc->queue);
-	}
+
+	midc_dostart(midc, desc);
 	spin_unlock_bh(&midc->lock);
 
 	return cookie;
@@ -429,7 +521,7 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
 	struct middma_device	*mid = to_middma_device(chan->device);
 	struct intel_mid_dma_desc	*desc, *_desc;
-	LIST_HEAD(list);
+	union intel_mid_dma_cfg_lo cfg_lo;
 
 	if (cmd != DMA_TERMINATE_ALL)
 		return -ENXIO;
@@ -439,39 +531,29 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 		spin_unlock_bh(&midc->lock);
 		return 0;
 	}
-	list_splice_init(&midc->free_list, &list);
+	/*Suspend and disable the channel*/
+	cfg_lo.cfg_lo = ioread32(midc->ch_regs + CFG_LOW);
+	cfg_lo.cfgx.ch_susp = 1;
+	iowrite32(cfg_lo.cfg_lo, midc->ch_regs + CFG_LOW);
+	iowrite32(DISABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
+	midc->busy = false;
+	/* Disable interrupts */
+	disable_dma_interrupt(midc);
 	midc->descs_allocated = 0;
 	midc->slave = NULL;
 
-	/* Disable interrupts */
-	disable_dma_interrupt(midc);
-
 	spin_unlock_bh(&midc->lock);
-	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
-		pr_debug("MDMA: freeing descriptor %p\n", desc);
-		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
+		if (desc->lli != NULL) {
+			pci_pool_free(desc->lli_pool, desc->lli,
+						desc->lli_phys);
+			pci_pool_destroy(desc->lli_pool);
+		}
+		list_move(&desc->desc_node, &midc->free_list);
 	}
 	return 0;
 }
 
-/**
- * intel_mid_dma_prep_slave_sg -	Prep slave sg txn
- * @chan: chan for DMA transfer
- * @sgl: scatter gather list
- * @sg_len: length of sg txn
- * @direction: DMA transfer dirtn
- * @flags: DMA flags
- *
- * Do DMA sg txn: NOT supported now
- */
-static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
-			struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_data_direction direction,
-			unsigned long flags)
-{
-	/*not supported now*/
-	return NULL;
-}
 
 /**
  * intel_mid_dma_prep_memcpy -	Prep memcpy txn
@@ -553,6 +635,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 
 	/*calculate CTL_HI*/
 	ctl_hi.ctlx.reser = 0;
+	ctl_hi.ctlx.done  = 0;
 	width = mids->src_width;
 
 	ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
@@ -599,6 +682,9 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	desc->ctl_hi = ctl_hi.ctl_hi;
 	desc->width = width;
 	desc->dirn = mids->dirn;
+	desc->lli_phys = 0;
+	desc->lli = NULL;
+	desc->lli_pool = NULL;
 	return &desc->txd;
 
 err_desc_get:
@@ -606,6 +692,85 @@ err_desc_get:
 	midc_desc_put(midc, desc);
 	return NULL;
 }
+/**
+ * intel_mid_dma_prep_slave_sg -	Prep slave sg txn
+ * @chan: chan for DMA transfer
+ * @sgl: scatter gather list
+ * @sg_len: length of sg txn
+ * @direction: DMA transfer dirtn
+ * @flags: DMA flags
+ *
+ * Prepares LLI based periphral transfer
+ */
+static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
+			struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned long flags)
+{
+	struct intel_mid_dma_chan *midc = NULL;
+	struct intel_mid_dma_slave *mids = NULL;
+	struct intel_mid_dma_desc *desc = NULL;
+	struct dma_async_tx_descriptor *txd = NULL;
+	union intel_mid_dma_ctl_lo ctl_lo;
+
+	pr_debug("MDMA: Prep for slave SG\n");
+
+	if (!sg_len) {
+		pr_err("MDMA: Invalid SG length\n");
+		return NULL;
+	}
+	midc = to_intel_mid_dma_chan(chan);
+	BUG_ON(!midc);
+
+	mids = chan->private;
+	BUG_ON(!mids);
+
+	if (!midc->dma->pimr_mask) {
+		pr_debug("MDMA: SG list is not supported by this controller\n");
+		return  NULL;
+	}
+
+	pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n",
+			sg_len, direction, flags);
+
+	txd = intel_mid_dma_prep_memcpy(chan, 0, 0, sgl->length, flags);
+	if (NULL == txd) {
+		pr_err("MDMA: Prep memcpy failed\n");
+		return NULL;
+	}
+	desc = to_intel_mid_dma_desc(txd);
+	desc->dirn = direction;
+	ctl_lo.ctl_lo = desc->ctl_lo;
+	ctl_lo.ctlx.llp_dst_en = 1;
+	ctl_lo.ctlx.llp_src_en = 1;
+	desc->ctl_lo = ctl_lo.ctl_lo;
+	desc->lli_length = sg_len;
+	desc->current_lli = 0;
+	/* DMA coherent memory pool for LLI descriptors*/
+	desc->lli_pool = pci_pool_create("intel_mid_dma_lli_pool",
+				midc->dma->pdev,
+				(sizeof(struct intel_mid_dma_lli)*sg_len),
+				32, 0);
+	if (NULL == desc->lli_pool) {
+		pr_err("MID_DMA:LLI pool create failed\n");
+		return NULL;
+	}
+
+	desc->lli = pci_pool_alloc(desc->lli_pool, GFP_KERNEL, &desc->lli_phys);
+	if (!desc->lli) {
+		pr_err("MID_DMA: LLI alloc failed\n");
+		pci_pool_destroy(desc->lli_pool);
+		return NULL;
+	}
+
+	midc_lli_fill_sg(midc, desc, sgl, sg_len, flags);
+	if (flags & DMA_PREP_INTERRUPT) {
+		iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				midc->dma_base + MASK_BLOCK);
+		pr_debug("MDMA:Enabled Block interrupt\n");
+	}
+	return &desc->txd;
+}
 
 /**
  * intel_mid_dma_free_chan_resources -	Frees dma resources
@@ -728,7 +893,7 @@ static void dma_tasklet(unsigned long data)
 {
 	struct middma_device *mid = NULL;
 	struct intel_mid_dma_chan *midc = NULL;
-	u32 status;
+	u32 status, raw_tfr, raw_block;
 	int i;
 
 	mid = (struct middma_device *)data;
@@ -737,8 +902,9 @@ static void dma_tasklet(unsigned long data)
 		return;
 	}
 	pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id);
-	status = ioread32(mid->dma_base + RAW_TFR);
-	pr_debug("MDMA:RAW_TFR %x\n", status);
+	raw_tfr = ioread32(mid->dma_base + RAW_TFR);
+	raw_block = ioread32(mid->dma_base + RAW_BLOCK);
+	status = raw_tfr | raw_block;
 	status &= mid->intr_mask;
 	while (status) {
 		/*txn interrupt*/
@@ -754,15 +920,23 @@ static void dma_tasklet(unsigned long data)
 		}
 		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
 				status, midc->ch_id, i);
+		midc->raw_tfr = raw_tfr;
+		midc->raw_block = raw_block;
+		spin_lock_bh(&midc->lock);
 		/*clearing this interrupts first*/
 		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR);
-		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_BLOCK);
-
-		spin_lock_bh(&midc->lock);
+		if (raw_block) {
+			iowrite32((1 << midc->ch_id),
+				mid->dma_base + CLEAR_BLOCK);
+		}
 		midc_scan_descriptors(mid, midc);
 		pr_debug("MDMA:Scan of desc... complete, unmasking\n");
 		iowrite32(UNMASK_INTR_REG(midc->ch_id),
 				mid->dma_base + MASK_TFR);
+		if (raw_block) {
+			iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				mid->dma_base + MASK_BLOCK);
+		}
 		spin_unlock_bh(&midc->lock);
 	}
 
@@ -836,7 +1010,8 @@ static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
 	tfr_status &= mid->intr_mask;
 	if (tfr_status) {
 		/*need to disable intr*/
-		iowrite32((tfr_status << 8), mid->dma_base + MASK_TFR);
+		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_TFR);
+		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_BLOCK);
 		pr_debug("MDMA: Calling tasklet %x\n", tfr_status);
 		call_tasklet = 1;
 	}
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index a12dd2572dc3..7a5ac56d1324 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -29,11 +29,12 @@
 #include <linux/dmapool.h>
 #include <linux/pci_ids.h>
 
-#define INTEL_MID_DMA_DRIVER_VERSION "1.0.6"
+#define INTEL_MID_DMA_DRIVER_VERSION "1.1.0"
 
 #define	REG_BIT0		0x00000001
 #define	REG_BIT8		0x00000100
-
+#define INT_MASK_WE		0x8
+#define CLEAR_DONE		0xFFFFEFFF
 #define UNMASK_INTR_REG(chan_num) \
 	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
 #define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num)
@@ -41,6 +42,9 @@
 #define ENABLE_CHANNEL(chan_num) \
 	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
 
+#define DISABLE_CHANNEL(chan_num) \
+	(REG_BIT8 << chan_num)
+
 #define DESCS_PER_CHANNEL	16
 /*DMA Registers*/
 /*registers associated with channel programming*/
@@ -50,6 +54,7 @@
 /*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/
 #define SAR			0x00 /* Source Address Register*/
 #define DAR			0x08 /* Destination Address Register*/
+#define LLP			0x10 /* Linked List Pointer Register*/
 #define CTL_LOW			0x18 /* Control Register*/
 #define CTL_HIGH		0x1C /* Control Register*/
 #define CFG_LOW			0x40 /* Configuration Register Low*/
@@ -112,8 +117,8 @@ union intel_mid_dma_ctl_lo {
 union intel_mid_dma_ctl_hi {
 	struct {
 		u32	block_ts:12;	/*block transfer size*/
-					/*configured by DMAC*/
-		u32	reser:20;
+		u32	done:1;		/*Done - updated by DMAC*/
+		u32	reser:19;	/*configured by DMAC*/
 	} ctlx;
 	u32	ctl_hi;
 
@@ -169,6 +174,8 @@ union intel_mid_dma_cfg_hi {
  * @dma: dma device struture pointer
  * @busy: bool representing if ch is busy (active txn) or not
  * @in_use: bool representing if ch is in use or not
+ * @raw_tfr: raw trf interrupt recieved
+ * @raw_block: raw block interrupt recieved
  */
 struct intel_mid_dma_chan {
 	struct dma_chan		chan;
@@ -185,6 +192,8 @@ struct intel_mid_dma_chan {
 	struct middma_device	*dma;
 	bool			busy;
 	bool			in_use;
+	u32			raw_tfr;
+	u32			raw_block;
 };
 
 static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
@@ -247,6 +256,11 @@ struct intel_mid_dma_desc {
 	u32				cfg_lo;
 	u32				ctl_lo;
 	u32				ctl_hi;
+	struct pci_pool			*lli_pool;
+	struct intel_mid_dma_lli	*lli;
+	dma_addr_t			lli_phys;
+	unsigned int			lli_length;
+	unsigned int			current_lli;
 	dma_addr_t			next;
 	enum dma_data_direction		dirn;
 	enum dma_status			status;
@@ -255,6 +269,14 @@ struct intel_mid_dma_desc {
 
 };
 
+struct intel_mid_dma_lli {
+	dma_addr_t			sar;
+	dma_addr_t			dar;
+	dma_addr_t			llp;
+	u32				ctl_lo;
+	u32				ctl_hi;
+} __attribute__ ((packed));
+
 static inline int test_ch_en(void __iomem *dma, u32 ch_no)
 {
 	u32 en_reg = ioread32(dma + DMA_CHAN_EN);
diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h
index d9d08b6269b6..befe3fbd9e28 100644
--- a/include/linux/intel_mid_dma.h
+++ b/include/linux/intel_mid_dma.h
@@ -27,6 +27,7 @@
 
 #include <linux/dmaengine.h>
 
+#define DMA_PREP_CIRCULAR_LIST		(1 << 10)
 /*DMA transaction width, src and dstn width would be same
 The DMA length must be width aligned,
 for 32 bit width the length must be 32 bit (4bytes) aligned only*/
@@ -69,6 +70,7 @@ enum intel_mid_dma_msize {
  * @cfg_mode: DMA data transfer mode (per-per/mem-per/mem-mem)
  * @src_msize: Source DMA burst size
  * @dst_msize: Dst DMA burst size
+ * @per_addr: Periphral address
  * @device_instance: DMA peripheral device instance, we can have multiple
  *		peripheral device connected to single DMAC
  */
@@ -80,6 +82,7 @@ struct intel_mid_dma_slave {
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
 	enum intel_mid_dma_msize	src_msize; /*size if src burst*/
 	enum intel_mid_dma_msize	dst_msize; /*size of dst burst*/
+	dma_addr_t			per_addr; /*Peripheral address*/
 	unsigned int		device_instance; /*0, 1 for periphral instance*/
 };
 

From 8b6492231d2a92352a6371eebd622e3bc824a663 Mon Sep 17 00:00:00 2001
From: "Koul, Vinod" <vinod.koul@intel.com>
Date: Mon, 4 Oct 2010 10:38:25 +0000
Subject: [PATCH 16/18] intel_mid_dma: fix the WARN_ONs

Moved the WARN_ON to BUG_ON, as WARN_ON if hit,
can cause null pointer derefrences

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/intel_mid_dma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 2ae1086b9481..ef7ffb813fe9 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -581,15 +581,15 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	enum intel_mid_dma_width width = 0;
 
 	pr_debug("MDMA: Prep for memcpy\n");
-	WARN_ON(!chan);
+	BUG_ON(!chan);
 	if (!len)
 		return NULL;
 
 	mids = chan->private;
-	WARN_ON(!mids);
+	BUG_ON(!mids);
 
 	midc = to_intel_mid_dma_chan(chan);
-	WARN_ON(!midc);
+	BUG_ON(!midc);
 
 	pr_debug("MDMA:called for DMA %x CH %d Length %zu\n",
 				midc->dma->pci_id, midc->ch_id, len);

From 20dd63900d238e17b122fe0c7376ff090867f528 Mon Sep 17 00:00:00 2001
From: "Koul, Vinod" <vinod.koul@intel.com>
Date: Mon, 4 Oct 2010 10:38:43 +0000
Subject: [PATCH 17/18] intel_mid_dma: change the slave interface

In 2.6.36 kernel, dma slave control command was introduced,
this patch changes the intel-mid-dma driver to this
new kernel slave interface

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/intel_mid_dma.c      | 66 +++++++++++++++++++++-----------
 drivers/dma/intel_mid_dma_regs.h | 11 +++++-
 include/linux/intel_mid_dma.h    | 15 +-------
 3 files changed, 53 insertions(+), 39 deletions(-)

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index ef7ffb813fe9..338bc4eed1f3 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -92,13 +92,13 @@ static int get_block_ts(int len, int tx_width, int block_size)
 	int byte_width = 0, block_ts = 0;
 
 	switch (tx_width) {
-	case LNW_DMA_WIDTH_8BIT:
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
 		byte_width = 1;
 		break;
-	case LNW_DMA_WIDTH_16BIT:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
 		byte_width = 2;
 		break;
-	case LNW_DMA_WIDTH_32BIT:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
 	default:
 		byte_width = 4;
 		break;
@@ -367,7 +367,7 @@ static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
 	int i;
 
 	pr_debug("MDMA: Entered midc_lli_fill_sg\n");
-	mids = midc->chan.private;
+	mids = midc->mid_slave;
 
 	lli_bloc_desc = desc->lli;
 	lli_next = desc->lli_phys;
@@ -398,9 +398,9 @@ static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
 		sg_phy_addr = sg_phys(sg);
 		if (desc->dirn ==  DMA_TO_DEVICE) {
 			lli_bloc_desc->sar  = sg_phy_addr;
-			lli_bloc_desc->dar  = mids->per_addr;
+			lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
 		} else if (desc->dirn ==  DMA_FROM_DEVICE) {
-			lli_bloc_desc->sar  = mids->per_addr;
+			lli_bloc_desc->sar  = mids->dma_slave.src_addr;
 			lli_bloc_desc->dar  = sg_phy_addr;
 		}
 		/*Copy values into block descriptor in system memroy*/
@@ -507,6 +507,23 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
 	return ret;
 }
 
+static int dma_slave_control(struct dma_chan *chan, unsigned long arg)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct dma_slave_config  *slave = (struct dma_slave_config *)arg;
+	struct intel_mid_dma_slave *mid_slave;
+
+	BUG_ON(!midc);
+	BUG_ON(!slave);
+	pr_debug("MDMA: slave control called\n");
+
+	mid_slave = to_intel_mid_dma_slave(slave);
+
+	BUG_ON(!mid_slave);
+
+	midc->mid_slave = mid_slave;
+	return 0;
+}
 /**
  * intel_mid_dma_device_control -	DMA device control
  * @chan: chan for DMA control
@@ -523,6 +540,9 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 	struct intel_mid_dma_desc	*desc, *_desc;
 	union intel_mid_dma_cfg_lo cfg_lo;
 
+	if (cmd == DMA_SLAVE_CONFIG)
+		return dma_slave_control(chan, arg);
+
 	if (cmd != DMA_TERMINATE_ALL)
 		return -ENXIO;
 
@@ -540,7 +560,6 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 	/* Disable interrupts */
 	disable_dma_interrupt(midc);
 	midc->descs_allocated = 0;
-	midc->slave = NULL;
 
 	spin_unlock_bh(&midc->lock);
 	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
@@ -578,23 +597,24 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	union intel_mid_dma_ctl_hi ctl_hi;
 	union intel_mid_dma_cfg_lo cfg_lo;
 	union intel_mid_dma_cfg_hi cfg_hi;
-	enum intel_mid_dma_width width = 0;
+	enum dma_slave_buswidth width;
 
 	pr_debug("MDMA: Prep for memcpy\n");
 	BUG_ON(!chan);
 	if (!len)
 		return NULL;
 
-	mids = chan->private;
-	BUG_ON(!mids);
-
 	midc = to_intel_mid_dma_chan(chan);
 	BUG_ON(!midc);
 
+	mids = midc->mid_slave;
+	BUG_ON(!mids);
+
 	pr_debug("MDMA:called for DMA %x CH %d Length %zu\n",
 				midc->dma->pci_id, midc->ch_id, len);
 	pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n",
-		mids->cfg_mode, mids->dirn, mids->hs_mode, mids->src_width);
+			mids->cfg_mode, mids->dma_slave.direction,
+			mids->hs_mode, mids->dma_slave.src_addr_width);
 
 	/*calculate CFG_LO*/
 	if (mids->hs_mode == LNW_DMA_SW_HS) {
@@ -613,13 +633,13 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 		if (midc->dma->pimr_mask) {
 			cfg_hi.cfgx.protctl = 0x0; /*default value*/
 			cfg_hi.cfgx.fifo_mode = 1;
-			if (mids->dirn == DMA_TO_DEVICE) {
+			if (mids->dma_slave.direction == DMA_TO_DEVICE) {
 				cfg_hi.cfgx.src_per = 0;
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.dst_per = 3;
 				if (mids->device_instance == 1)
 					cfg_hi.cfgx.dst_per = 1;
-			} else if (mids->dirn == DMA_FROM_DEVICE) {
+			} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.src_per = 2;
 				if (mids->device_instance == 1)
@@ -636,7 +656,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	/*calculate CTL_HI*/
 	ctl_hi.ctlx.reser = 0;
 	ctl_hi.ctlx.done  = 0;
-	width = mids->src_width;
+	width = mids->dma_slave.src_addr_width;
 
 	ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
 	pr_debug("MDMA:calc len %d for block size %d\n",
@@ -644,21 +664,21 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	/*calculate CTL_LO*/
 	ctl_lo.ctl_lo = 0;
 	ctl_lo.ctlx.int_en = 1;
-	ctl_lo.ctlx.dst_tr_width = mids->dst_width;
-	ctl_lo.ctlx.src_tr_width = mids->src_width;
-	ctl_lo.ctlx.dst_msize = mids->src_msize;
-	ctl_lo.ctlx.src_msize = mids->dst_msize;
+	ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width;
+	ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width;
+	ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst;
+	ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst;
 
 	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
 		ctl_lo.ctlx.tt_fc = 0;
 		ctl_lo.ctlx.sinc = 0;
 		ctl_lo.ctlx.dinc = 0;
 	} else {
-		if (mids->dirn == DMA_TO_DEVICE) {
+		if (mids->dma_slave.direction == DMA_TO_DEVICE) {
 			ctl_lo.ctlx.sinc = 0;
 			ctl_lo.ctlx.dinc = 2;
 			ctl_lo.ctlx.tt_fc = 1;
-		} else if (mids->dirn == DMA_FROM_DEVICE) {
+		} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
 			ctl_lo.ctlx.sinc = 2;
 			ctl_lo.ctlx.dinc = 0;
 			ctl_lo.ctlx.tt_fc = 2;
@@ -681,7 +701,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	desc->ctl_lo = ctl_lo.ctl_lo;
 	desc->ctl_hi = ctl_hi.ctl_hi;
 	desc->width = width;
-	desc->dirn = mids->dirn;
+	desc->dirn = mids->dma_slave.direction;
 	desc->lli_phys = 0;
 	desc->lli = NULL;
 	desc->lli_pool = NULL;
@@ -722,7 +742,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 	midc = to_intel_mid_dma_chan(chan);
 	BUG_ON(!midc);
 
-	mids = chan->private;
+	mids = midc->mid_slave;
 	BUG_ON(!mids);
 
 	if (!midc->dma->pimr_mask) {
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index 7a5ac56d1324..709fecbdde79 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -187,13 +187,13 @@ struct intel_mid_dma_chan {
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
-	struct intel_mid_dma_slave	*slave;
 	unsigned int		descs_allocated;
 	struct middma_device	*dma;
 	bool			busy;
 	bool			in_use;
 	u32			raw_tfr;
 	u32			raw_block;
+	struct intel_mid_dma_slave *mid_slave;
 };
 
 static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
@@ -264,7 +264,7 @@ struct intel_mid_dma_desc {
 	dma_addr_t			next;
 	enum dma_data_direction		dirn;
 	enum dma_status			status;
-	enum intel_mid_dma_width	width; /*width of DMA txn*/
+	enum dma_slave_buswidth		width; /*width of DMA txn*/
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
 
 };
@@ -289,6 +289,13 @@ static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc
 	return container_of(txd, struct intel_mid_dma_desc, txd);
 }
 
+static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave
+		(struct dma_slave_config *slave)
+{
+	return container_of(slave, struct intel_mid_dma_slave, dma_slave);
+}
+
+
 int dma_resume(struct pci_dev *pci);
 
 #endif /*__INTEL_MID_DMAC_REGS_H__*/
diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h
index befe3fbd9e28..10496bd24c5c 100644
--- a/include/linux/intel_mid_dma.h
+++ b/include/linux/intel_mid_dma.h
@@ -28,14 +28,6 @@
 #include <linux/dmaengine.h>
 
 #define DMA_PREP_CIRCULAR_LIST		(1 << 10)
-/*DMA transaction width, src and dstn width would be same
-The DMA length must be width aligned,
-for 32 bit width the length must be 32 bit (4bytes) aligned only*/
-enum intel_mid_dma_width {
-	LNW_DMA_WIDTH_8BIT = 0x0,
-	LNW_DMA_WIDTH_16BIT = 0x1,
-	LNW_DMA_WIDTH_32BIT = 0x2,
-};
 
 /*DMA mode configurations*/
 enum intel_mid_dma_mode {
@@ -75,15 +67,10 @@ enum intel_mid_dma_msize {
  *		peripheral device connected to single DMAC
  */
 struct intel_mid_dma_slave {
-	enum dma_data_direction		dirn;
-	enum intel_mid_dma_width	src_width; /*width of DMA src txn*/
-	enum intel_mid_dma_width	dst_width; /*width of DMA dst txn*/
 	enum intel_mid_dma_hs_mode	hs_mode;  /*handshaking*/
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
-	enum intel_mid_dma_msize	src_msize; /*size if src burst*/
-	enum intel_mid_dma_msize	dst_msize; /*size of dst burst*/
-	dma_addr_t			per_addr; /*Peripheral address*/
 	unsigned int		device_instance; /*0, 1 for periphral instance*/
+	struct dma_slave_config		dma_slave;
 };
 
 #endif /*__INTEL_MID_DMA_H__*/

From 1f1846c6ceed07c03ef036a27864befe0f773997 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 6 Oct 2010 10:25:55 +0200
Subject: [PATCH 18/18] dmaengine: Add Freescale i.MX1/21/27 DMA driver

This driver is currently implemented as a user to the old i.MX
DMA API. This allows us to convert each user of the old API to
the dmaengine API one by one. Once this is done the old DMA
driver can be merged into the i.MX dmaengine driver.

V2: remove some debug leftovers and unused variables

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/Kconfig   |   8 +
 drivers/dma/Makefile  |   1 +
 drivers/dma/imx-dma.c | 422 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 431 insertions(+)
 create mode 100644 drivers/dma/imx-dma.c

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 3cf1d123f2d3..e0e1f40a9fc8 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -203,6 +203,14 @@ config IMX_SDMA
 	  Support the i.MX SDMA engine. This engine is integrated into
 	  Freescale i.MX25/31/35/51 chips.
 
+config IMX_DMA
+	tristate "i.MX DMA support"
+	depends on ARCH_MX1 || ARCH_MX21 || MACH_MX27
+	select DMA_ENGINE
+	help
+	  Support the i.MX DMA engine. This engine is integrated into
+	  Freescale i.MX1/21/27 chips.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 3ed7babd3a99..79ac75b51065 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_SH_DMAE) += shdma.o
 obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
+obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
new file mode 100644
index 000000000000..346be6218058
--- /dev/null
+++ b/drivers/dma/imx-dma.c
@@ -0,0 +1,422 @@
+/*
+ * drivers/dma/imx-dma.c
+ *
+ * This file contains a driver for the Freescale i.MX DMA engine
+ * found on i.MX1/21/27
+ *
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+
+#include <asm/irq.h>
+#include <mach/dma-v1.h>
+#include <mach/hardware.h>
+
+struct imxdma_channel {
+	struct imxdma_engine		*imxdma;
+	unsigned int			channel;
+	unsigned int			imxdma_channel;
+
+	enum dma_slave_buswidth		word_size;
+	dma_addr_t			per_address;
+	u32				watermark_level;
+	struct dma_chan			chan;
+	spinlock_t			lock;
+	struct dma_async_tx_descriptor	desc;
+	dma_cookie_t			last_completed;
+	enum dma_status			status;
+	int				dma_request;
+	struct scatterlist		*sg_list;
+};
+
+#define MAX_DMA_CHANNELS 8
+
+struct imxdma_engine {
+	struct device			*dev;
+	struct dma_device		dma_device;
+	struct imxdma_channel		channel[MAX_DMA_CHANNELS];
+};
+
+static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct imxdma_channel, chan);
+}
+
+static void imxdma_handle(struct imxdma_channel *imxdmac)
+{
+	if (imxdmac->desc.callback)
+		imxdmac->desc.callback(imxdmac->desc.callback_param);
+	imxdmac->last_completed = imxdmac->desc.cookie;
+}
+
+static void imxdma_irq_handler(int channel, void *data)
+{
+	struct imxdma_channel *imxdmac = data;
+
+	imxdmac->status = DMA_SUCCESS;
+	imxdma_handle(imxdmac);
+}
+
+static void imxdma_err_handler(int channel, void *data, int error)
+{
+	struct imxdma_channel *imxdmac = data;
+
+	imxdmac->status = DMA_ERROR;
+	imxdma_handle(imxdmac);
+}
+
+static void imxdma_progression(int channel, void *data,
+		struct scatterlist *sg)
+{
+	struct imxdma_channel *imxdmac = data;
+
+	imxdmac->status = DMA_SUCCESS;
+	imxdma_handle(imxdmac);
+}
+
+static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct dma_slave_config *dmaengine_cfg = (void *)arg;
+	int ret;
+	unsigned int mode = 0;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		imxdmac->status = DMA_ERROR;
+		imx_dma_disable(imxdmac->imxdma_channel);
+		return 0;
+	case DMA_SLAVE_CONFIG:
+		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+			imxdmac->per_address = dmaengine_cfg->src_addr;
+			imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
+			imxdmac->word_size = dmaengine_cfg->src_addr_width;
+		} else {
+			imxdmac->per_address = dmaengine_cfg->dst_addr;
+			imxdmac->watermark_level = dmaengine_cfg->dst_maxburst;
+			imxdmac->word_size = dmaengine_cfg->dst_addr_width;
+		}
+
+		switch (imxdmac->word_size) {
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+			mode = IMX_DMA_MEMSIZE_8;
+			break;
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+			mode = IMX_DMA_MEMSIZE_16;
+			break;
+		default:
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+			mode = IMX_DMA_MEMSIZE_32;
+			break;
+		}
+		ret = imx_dma_config_channel(imxdmac->imxdma_channel,
+				mode | IMX_DMA_TYPE_FIFO,
+				IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR,
+				imxdmac->dma_request, 1);
+
+		if (ret)
+			return ret;
+
+		imx_dma_config_burstlen(imxdmac->imxdma_channel, imxdmac->watermark_level);
+
+		return 0;
+	default:
+		return -ENOSYS;
+	}
+
+	return -EINVAL;
+}
+
+static enum dma_status imxdma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	dma_cookie_t last_used;
+	enum dma_status ret;
+
+	last_used = chan->cookie;
+
+	ret = dma_async_is_complete(cookie, imxdmac->last_completed, last_used);
+	dma_set_tx_state(txstate, imxdmac->last_completed, last_used, 0);
+
+	return ret;
+}
+
+static dma_cookie_t imxdma_assign_cookie(struct imxdma_channel *imxdma)
+{
+	dma_cookie_t cookie = imxdma->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	imxdma->chan.cookie = cookie;
+	imxdma->desc.cookie = cookie;
+
+	return cookie;
+}
+
+static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan);
+	dma_cookie_t cookie;
+
+	spin_lock_irq(&imxdmac->lock);
+
+	cookie = imxdma_assign_cookie(imxdmac);
+
+	imx_dma_enable(imxdmac->imxdma_channel);
+
+	spin_unlock_irq(&imxdmac->lock);
+
+	return cookie;
+}
+
+static int imxdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imx_dma_data *data = chan->private;
+
+	imxdmac->dma_request = data->dma_request;
+
+	dma_async_tx_descriptor_init(&imxdmac->desc, chan);
+	imxdmac->desc.tx_submit = imxdma_tx_submit;
+	/* txd.flags will be overwritten in prep funcs */
+	imxdmac->desc.flags = DMA_CTRL_ACK;
+
+	imxdmac->status = DMA_SUCCESS;
+
+	return 0;
+}
+
+static void imxdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+
+	imx_dma_disable(imxdmac->imxdma_channel);
+
+	if (imxdmac->sg_list) {
+		kfree(imxdmac->sg_list);
+		imxdmac->sg_list = NULL;
+	}
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct scatterlist *sg;
+	int i, ret, dma_length = 0;
+	unsigned int dmamode;
+
+	if (imxdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	imxdmac->status = DMA_IN_PROGRESS;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_length += sg->length;
+	}
+
+	if (direction == DMA_FROM_DEVICE)
+		dmamode = DMA_MODE_READ;
+	else
+		dmamode = DMA_MODE_WRITE;
+
+	ret = imx_dma_setup_sg(imxdmac->imxdma_channel, sgl, sg_len,
+		 dma_length, imxdmac->per_address, dmamode);
+	if (ret)
+		return NULL;
+
+	return &imxdmac->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int i, ret;
+	unsigned int periods = buf_len / period_len;
+	unsigned int dmamode;
+
+	dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
+			__func__, imxdmac->channel, buf_len, period_len);
+
+	if (imxdmac->status == DMA_IN_PROGRESS)
+		return NULL;
+	imxdmac->status = DMA_IN_PROGRESS;
+
+	ret = imx_dma_setup_progression_handler(imxdmac->imxdma_channel,
+			imxdma_progression);
+	if (ret) {
+		dev_err(imxdma->dev, "Failed to setup the DMA handler\n");
+		return NULL;
+	}
+
+	if (imxdmac->sg_list)
+		kfree(imxdmac->sg_list);
+
+	imxdmac->sg_list = kcalloc(periods + 1,
+			sizeof(struct scatterlist), GFP_KERNEL);
+	if (!imxdmac->sg_list)
+		return NULL;
+
+	sg_init_table(imxdmac->sg_list, periods);
+
+	for (i = 0; i < periods; i++) {
+		imxdmac->sg_list[i].page_link = 0;
+		imxdmac->sg_list[i].offset = 0;
+		imxdmac->sg_list[i].dma_address = dma_addr;
+		imxdmac->sg_list[i].length = period_len;
+		dma_addr += period_len;
+	}
+
+	/* close the loop */
+	imxdmac->sg_list[periods].offset = 0;
+	imxdmac->sg_list[periods].length = 0;
+	imxdmac->sg_list[periods].page_link =
+		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
+
+	if (direction == DMA_FROM_DEVICE)
+		dmamode = DMA_MODE_READ;
+	else
+		dmamode = DMA_MODE_WRITE;
+
+	ret = imx_dma_setup_sg(imxdmac->imxdma_channel, imxdmac->sg_list, periods,
+		 IMX_DMA_LENGTH_LOOP, imxdmac->per_address, dmamode);
+	if (ret)
+		return NULL;
+
+	return &imxdmac->desc;
+}
+
+static void imxdma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * Nothing to do. We only have a single descriptor
+	 */
+}
+
+static int __init imxdma_probe(struct platform_device *pdev)
+{
+	struct imxdma_engine *imxdma;
+	int ret, i;
+
+	imxdma = kzalloc(sizeof(*imxdma), GFP_KERNEL);
+	if (!imxdma)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&imxdma->dma_device.channels);
+
+	/* Initialize channel parameters */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+		imxdmac->imxdma_channel = imx_dma_request_by_prio("dmaengine",
+				DMA_PRIO_MEDIUM);
+		if (imxdmac->channel < 0)
+			goto err_init;
+
+		imx_dma_setup_handlers(imxdmac->imxdma_channel,
+		       imxdma_irq_handler, imxdma_err_handler, imxdmac);
+
+		imxdmac->imxdma = imxdma;
+		spin_lock_init(&imxdmac->lock);
+
+		dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask);
+		dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask);
+
+		imxdmac->chan.device = &imxdma->dma_device;
+		imxdmac->chan.chan_id = i;
+		imxdmac->channel = i;
+
+		/* Add the channel to the DMAC list */
+		list_add_tail(&imxdmac->chan.device_node, &imxdma->dma_device.channels);
+	}
+
+	imxdma->dev = &pdev->dev;
+	imxdma->dma_device.dev = &pdev->dev;
+
+	imxdma->dma_device.device_alloc_chan_resources = imxdma_alloc_chan_resources;
+	imxdma->dma_device.device_free_chan_resources = imxdma_free_chan_resources;
+	imxdma->dma_device.device_tx_status = imxdma_tx_status;
+	imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg;
+	imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic;
+	imxdma->dma_device.device_control = imxdma_control;
+	imxdma->dma_device.device_issue_pending = imxdma_issue_pending;
+
+	platform_set_drvdata(pdev, imxdma);
+
+	ret = dma_async_device_register(&imxdma->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto err_init;
+	}
+
+	return 0;
+
+err_init:
+	while (i-- >= 0) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+		imx_dma_free(imxdmac->imxdma_channel);
+	}
+
+	kfree(imxdma);
+	return ret;
+}
+
+static int __exit imxdma_remove(struct platform_device *pdev)
+{
+	struct imxdma_engine *imxdma = platform_get_drvdata(pdev);
+	int i;
+
+        dma_async_device_unregister(&imxdma->dma_device);
+
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+		 imx_dma_free(imxdmac->imxdma_channel);
+	}
+
+        kfree(imxdma);
+
+        return 0;
+}
+
+static struct platform_driver imxdma_driver = {
+	.driver		= {
+		.name	= "imx-dma",
+	},
+	.remove		= __exit_p(imxdma_remove),
+};
+
+static int __init imxdma_module_init(void)
+{
+	return platform_driver_probe(&imxdma_driver, imxdma_probe);
+}
+subsys_initcall(imxdma_module_init);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX dma driver");
+MODULE_LICENSE("GPL");