From dbde5c2934d10f87cc45ed9a9b95cac6f0c0fdd2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 24 Jul 2012 11:00:55 +0300
Subject: [PATCH 01/29] dw_dmac: use devm_* functions to simplify code

Use devm_kzalloc, devm_clk_get, devm_request_irq, and devm_request_and_ioremap
to reduce the code and to simplify the error path.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Viresh Kumar <viresh.linux@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dw_dmac.c      | 53 +++++++-------------------------------
 drivers/dma/dw_dmac_regs.h |  2 --
 2 files changed, 10 insertions(+), 45 deletions(-)

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 57beb5c8e3fd..ed2c9499d3ea 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1392,26 +1392,17 @@ static int __devinit dw_probe(struct platform_device *pdev)
 
 	size = sizeof(struct dw_dma);
 	size += pdata->nr_channels * sizeof(struct dw_dma_chan);
-	dw = kzalloc(size, GFP_KERNEL);
+	dw = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
 	if (!dw)
 		return -ENOMEM;
 
-	if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) {
-		err = -EBUSY;
-		goto err_kfree;
-	}
+	dw->regs = devm_request_and_ioremap(&pdev->dev, io);
+	if (!dw->regs)
+		return -EBUSY;
 
-	dw->regs = ioremap(io->start, DW_REGLEN);
-	if (!dw->regs) {
-		err = -ENOMEM;
-		goto err_release_r;
-	}
-
-	dw->clk = clk_get(&pdev->dev, "hclk");
-	if (IS_ERR(dw->clk)) {
-		err = PTR_ERR(dw->clk);
-		goto err_clk;
-	}
+	dw->clk = devm_clk_get(&pdev->dev, "hclk");
+	if (IS_ERR(dw->clk))
+		return PTR_ERR(dw->clk);
 	clk_prepare_enable(dw->clk);
 
 	/* Calculate all channel mask before DMA setup */
@@ -1423,9 +1414,10 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	/* disable BLOCK interrupts as well */
 	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 
-	err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
+	err = devm_request_irq(&pdev->dev, irq, dw_dma_interrupt, 0,
+			       "dw_dmac", dw);
 	if (err)
-		goto err_irq;
+		return err;
 
 	platform_set_drvdata(pdev, dw);
 
@@ -1491,30 +1483,16 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	dma_async_device_register(&dw->dma);
 
 	return 0;
-
-err_irq:
-	clk_disable_unprepare(dw->clk);
-	clk_put(dw->clk);
-err_clk:
-	iounmap(dw->regs);
-	dw->regs = NULL;
-err_release_r:
-	release_resource(io);
-err_kfree:
-	kfree(dw);
-	return err;
 }
 
 static int __devexit dw_remove(struct platform_device *pdev)
 {
 	struct dw_dma		*dw = platform_get_drvdata(pdev);
 	struct dw_dma_chan	*dwc, *_dwc;
-	struct resource		*io;
 
 	dw_dma_off(dw);
 	dma_async_device_unregister(&dw->dma);
 
-	free_irq(platform_get_irq(pdev, 0), dw);
 	tasklet_kill(&dw->tasklet);
 
 	list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
@@ -1523,17 +1501,6 @@ static int __devexit dw_remove(struct platform_device *pdev)
 		channel_clear_bit(dw, CH_EN, dwc->mask);
 	}
 
-	clk_disable_unprepare(dw->clk);
-	clk_put(dw->clk);
-
-	iounmap(dw->regs);
-	dw->regs = NULL;
-
-	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(io->start, DW_REGLEN);
-
-	kfree(dw);
-
 	return 0;
 }
 
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 50830bee087a..f6d92d72ae40 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -140,8 +140,6 @@ struct dw_dma_regs {
 /* Bitfields in CFG */
 #define DW_CFG_DMA_EN		(1 << 0)
 
-#define DW_REGLEN		0x400
-
 enum dw_dmac_flags {
 	DW_DMA_IS_CYCLIC = 0,
 };

From fed57cd375ece6d271e45d9ad630c7de632e74b6 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 20 Jul 2012 13:31:08 +0530
Subject: [PATCH 02/29] dma: tegra: enable/disable dma clock

Enable the DMA clock when allocating channel and
disable clock when freeing channels.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/tegra20-apb-dma.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index d52dbc6c54ab..24acd711e032 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -1119,15 +1119,21 @@ struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 static int tegra_dma_alloc_chan_resources(struct dma_chan *dc)
 {
 	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma *tdma = tdc->tdma;
+	int ret;
 
 	dma_cookie_init(&tdc->dma_chan);
 	tdc->config_init = false;
-	return 0;
+	ret = clk_prepare_enable(tdma->dma_clk);
+	if (ret < 0)
+		dev_err(tdc2dev(tdc), "clk_prepare_enable failed: %d\n", ret);
+	return ret;
 }
 
 static void tegra_dma_free_chan_resources(struct dma_chan *dc)
 {
 	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma *tdma = tdc->tdma;
 
 	struct tegra_dma_desc *dma_desc;
 	struct tegra_dma_sg_req *sg_req;
@@ -1163,6 +1169,7 @@ static void tegra_dma_free_chan_resources(struct dma_chan *dc)
 		list_del(&sg_req->node);
 		kfree(sg_req);
 	}
+	clk_disable_unprepare(tdma->dma_clk);
 }
 
 /* Tegra20 specific DMA controller information */
@@ -1255,6 +1262,13 @@ static int __devinit tegra_dma_probe(struct platform_device *pdev)
 		}
 	}
 
+	/* Enable clock before accessing registers */
+	ret = clk_prepare_enable(tdma->dma_clk);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "clk_prepare_enable failed: %d\n", ret);
+		goto err_pm_disable;
+	}
+
 	/* Reset DMA controller */
 	tegra_periph_reset_assert(tdma->dma_clk);
 	udelay(2);
@@ -1265,6 +1279,8 @@ static int __devinit tegra_dma_probe(struct platform_device *pdev)
 	tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
 	tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFFul);
 
+	clk_disable_unprepare(tdma->dma_clk);
+
 	INIT_LIST_HEAD(&tdma->dma_dev.channels);
 	for (i = 0; i < cdata->nr_channels; i++) {
 		struct tegra_dma_channel *tdc = &tdma->channels[i];

From 94d3901c11ab4115740ec3891d07f086f18c8cd6 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sat, 4 Aug 2012 10:35:30 +0200
Subject: [PATCH 03/29] drivers/dma/sirf-dma.c: fix usage of devm functions

Fix some problems with the use of devm_ functions.

devm_kzalloc: devm_kfree is not needed

devm_ioremap: iounmap should not be used, no free is needed

devm_request_irq: the devm_free_irq is followed by irq_dispose_mapping.  I
don't know if it is safe to move the freeing of the irq in this case, so I
have just un-devm'd this function, since the implicit freeing is never
taken advantage of.

In the original code failure of of_address_to_resource jumped to free_mem,
but should have jumped to irq_dispose, since irq_of_parse_and_map has
completed at this point.

In the original code unmap_mem was after irq_dispose, but it should have
been before, again since irq_of_parse_and_map has completed at this point.

One of these problems was found using the following semantic match:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression x;
@@

*x = devm_ioremap(...)
...
iounmap(x);
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Acked-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/sirf-dma.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 434ad31174f2..1af9e4804a2c 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -570,21 +570,19 @@ static int __devinit sirfsoc_dma_probe(struct platform_device *op)
 
 	if (of_property_read_u32(dn, "cell-index", &id)) {
 		dev_err(dev, "Fail to get DMAC index\n");
-		ret = -ENODEV;
-		goto free_mem;
+		return -ENODEV;
 	}
 
 	sdma->irq = irq_of_parse_and_map(dn, 0);
 	if (sdma->irq == NO_IRQ) {
 		dev_err(dev, "Error mapping IRQ!\n");
-		ret = -EINVAL;
-		goto free_mem;
+		return -EINVAL;
 	}
 
 	ret = of_address_to_resource(dn, 0, &res);
 	if (ret) {
 		dev_err(dev, "Error parsing memory region!\n");
-		goto free_mem;
+		goto irq_dispose;
 	}
 
 	regs_start = res.start;
@@ -597,12 +595,11 @@ static int __devinit sirfsoc_dma_probe(struct platform_device *op)
 		goto irq_dispose;
 	}
 
-	ret = devm_request_irq(dev, sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME,
-		sdma);
+	ret = request_irq(sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME, sdma);
 	if (ret) {
 		dev_err(dev, "Error requesting IRQ!\n");
 		ret = -EINVAL;
-		goto unmap_mem;
+		goto irq_dispose;
 	}
 
 	dma = &sdma->dma;
@@ -652,13 +649,9 @@ static int __devinit sirfsoc_dma_probe(struct platform_device *op)
 	return 0;
 
 free_irq:
-	devm_free_irq(dev, sdma->irq, sdma);
+	free_irq(sdma->irq, sdma);
 irq_dispose:
 	irq_dispose_mapping(sdma->irq);
-unmap_mem:
-	iounmap(sdma->base);
-free_mem:
-	devm_kfree(dev, sdma);
 	return ret;
 }
 
@@ -668,10 +661,8 @@ static int __devexit sirfsoc_dma_remove(struct platform_device *op)
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 
 	dma_async_device_unregister(&sdma->dma);
-	devm_free_irq(dev, sdma->irq, sdma);
+	free_irq(sdma->irq, sdma);
 	irq_dispose_mapping(sdma->irq);
-	iounmap(sdma->base);
-	devm_kfree(dev, sdma);
 	return 0;
 }
 

From 983d7beb7de4ba7c715ea62e982c9fe54fa554a1 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Tue, 14 Aug 2012 14:58:32 +0200
Subject: [PATCH 04/29] drivers/dma/amba-pl08x.c: fix error return code

Convert a 0 error return code to a negative one, as returned elsewhere in the
function.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier ret;
expression e,e1,e2,e3,e4,x;
@@

(
if (\(ret != 0\|ret < 0\) || ...) { ... return ...; }
|
ret = 0
)
... when != ret = e1
*x = \(kmalloc\|kzalloc\|kcalloc\|devm_kzalloc\|ioremap\|ioremap_nocache\|devm_ioremap\|devm_ioremap_nocache\)(...);
... when != x = e2
    when != ret = e3
*if (x == NULL || ...)
{
  ... when != ret = e4
*  return ret;
}
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/amba-pl08x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 6fbeebb9486f..d1cc5791476b 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1892,6 +1892,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	pl08x->pd = dev_get_platdata(&adev->dev);
 	if (!pl08x->pd) {
 		dev_err(&adev->dev, "no platform data supplied\n");
+		ret = -EINVAL;
 		goto out_no_platdata;
 	}
 
@@ -1943,6 +1944,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 		dev_err(&adev->dev, "%s failed to allocate "
 			"physical channel holders\n",
 			__func__);
+		ret = -ENOMEM;
 		goto out_no_phychans;
 	}
 

From b707c65865e74b5cd98c66cb67fda8543248cb3b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 23 Aug 2012 13:41:58 +0200
Subject: [PATCH 05/29] dma/ste_dma40: Fixup clock usage during probe

Fixup some errorhandling for clocks during probe and make sure
to use clk_prepare as well as clk_enable.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/ste_dma40.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 000d309602b2..826d0d5d5b0b 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2920,19 +2920,23 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	struct d40_base *base = NULL;
 	int num_log_chans = 0;
 	int num_phy_chans;
+	int clk_ret = -EINVAL;
 	int i;
 	u32 pid;
 	u32 cid;
 	u8 rev;
 
 	clk = clk_get(&pdev->dev, NULL);
-
 	if (IS_ERR(clk)) {
 		d40_err(&pdev->dev, "No matching clock found\n");
 		goto failure;
 	}
 
-	clk_enable(clk);
+	clk_ret = clk_prepare_enable(clk);
+	if (clk_ret) {
+		d40_err(&pdev->dev, "Failed to prepare/enable clock\n");
+		goto failure;
+	}
 
 	/* Get IO for DMAC base address */
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base");
@@ -3062,10 +3066,10 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	return base;
 
 failure:
-	if (!IS_ERR(clk)) {
-		clk_disable(clk);
+	if (!clk_ret)
+		clk_disable_unprepare(clk);
+	if (!IS_ERR(clk))
 		clk_put(clk);
-	}
 	if (virtbase)
 		iounmap(virtbase);
 	if (res)

From 8eb4da28b2544d0fed55a811515dfeb6c07a0447 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 24 Aug 2012 16:36:20 -0700
Subject: [PATCH 06/29] ioat: Adding Ivy Bridge IOATDMA PCI device IDs

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Acked-by: Dan Williams <djbw@db.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/ioat/pci.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 5e3a40f79945..c0573061b45d 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -40,6 +40,17 @@ MODULE_VERSION(IOAT_DMA_VERSION);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel Corporation");
 
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB0	0x0e20
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB1	0x0e21
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB2	0x0e22
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB3	0x0e23
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB4	0x0e24
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB5	0x0e25
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB6	0x0e26
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB7	0x0e27
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB8	0x0e2e
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB9	0x0e2f
+
 static struct pci_device_id ioat_pci_tbl[] = {
 	/* I/OAT v1 platforms */
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
@@ -83,6 +94,17 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
 
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
+
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);

From c2dde5f8f2095d7c623ff3565c1462e190272273 Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@ti.com>
Date: Wed, 22 Aug 2012 21:09:34 -0400
Subject: [PATCH 07/29] dmaengine: add TI EDMA DMA engine driver

Add a DMA engine driver for the TI EDMA controller. This driver
is implemented as a wrapper around the existing DaVinci private
DMA implementation. This approach allows for incremental conversion
of each peripheral driver to the DMA engine API. The EDMA driver
supports slave transfers but does not yet support cyclic transfers.

Signed-off-by: Matt Porter <mporter@ti.com>
Tested-by: Tom Rini <trini@ti.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/Kconfig  |  10 +
 drivers/dma/Makefile |   1 +
 drivers/dma/edma.c   | 671 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/edma.h |  29 ++
 4 files changed, 711 insertions(+)
 create mode 100644 drivers/dma/edma.c
 create mode 100644 include/linux/edma.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d06ea2950dd9..03517191cb13 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -208,6 +208,16 @@ config SIRF_DMA
 	help
 	  Enable support for the CSR SiRFprimaII DMA engine.
 
+config TI_EDMA
+	tristate "TI EDMA support"
+	depends on ARCH_DAVINCI
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	default n
+	help
+	  Enable support for the TI EDMA controller. This DMA
+	  engine is found on TI DaVinci and AM33xx parts.
+
 config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 4cf6b128ab9a..f5cf31063fb7 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
+obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
new file mode 100644
index 000000000000..05aea3ce8506
--- /dev/null
+++ b/drivers/dma/edma.c
@@ -0,0 +1,671 @@
+/*
+ * TI EDMA DMA engine driver
+ *
+ * Copyright 2012 Texas Instruments
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <mach/edma.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/*
+ * This will go away when the private EDMA API is folded
+ * into this driver and the platform device(s) are
+ * instantiated in the arch code. We can only get away
+ * with this simplification because DA8XX may not be built
+ * in the same kernel image with other DaVinci parts. This
+ * avoids having to sprinkle dmaengine driver platform devices
+ * and data throughout all the existing board files.
+ */
+#ifdef CONFIG_ARCH_DAVINCI_DA8XX
+#define EDMA_CTLRS	2
+#define EDMA_CHANS	32
+#else
+#define EDMA_CTLRS	1
+#define EDMA_CHANS	64
+#endif /* CONFIG_ARCH_DAVINCI_DA8XX */
+
+/* Max of 16 segments per channel to conserve PaRAM slots */
+#define MAX_NR_SG		16
+#define EDMA_MAX_SLOTS		MAX_NR_SG
+#define EDMA_DESCRIPTORS	16
+
+struct edma_desc {
+	struct virt_dma_desc		vdesc;
+	struct list_head		node;
+	int				absync;
+	int				pset_nr;
+	struct edmacc_param		pset[0];
+};
+
+struct edma_cc;
+
+struct edma_chan {
+	struct virt_dma_chan		vchan;
+	struct list_head		node;
+	struct edma_desc		*edesc;
+	struct edma_cc			*ecc;
+	int				ch_num;
+	bool				alloced;
+	int				slot[EDMA_MAX_SLOTS];
+	dma_addr_t			addr;
+	int				addr_width;
+	int				maxburst;
+};
+
+struct edma_cc {
+	int				ctlr;
+	struct dma_device		dma_slave;
+	struct edma_chan		slave_chans[EDMA_CHANS];
+	int				num_slave_chans;
+	int				dummy_slot;
+};
+
+static inline struct edma_cc *to_edma_cc(struct dma_device *d)
+{
+	return container_of(d, struct edma_cc, dma_slave);
+}
+
+static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct edma_chan, vchan.chan);
+}
+
+static inline struct edma_desc
+*to_edma_desc(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct edma_desc, vdesc.tx);
+}
+
+static void edma_desc_free(struct virt_dma_desc *vdesc)
+{
+	kfree(container_of(vdesc, struct edma_desc, vdesc));
+}
+
+/* Dispatch a queued descriptor to the controller (caller holds lock) */
+static void edma_execute(struct edma_chan *echan)
+{
+	struct virt_dma_desc *vdesc = vchan_next_desc(&echan->vchan);
+	struct edma_desc *edesc;
+	int i;
+
+	if (!vdesc) {
+		echan->edesc = NULL;
+		return;
+	}
+
+	list_del(&vdesc->node);
+
+	echan->edesc = edesc = to_edma_desc(&vdesc->tx);
+
+	/* Write descriptor PaRAM set(s) */
+	for (i = 0; i < edesc->pset_nr; i++) {
+		edma_write_slot(echan->slot[i], &edesc->pset[i]);
+		dev_dbg(echan->vchan.chan.device->dev,
+			"\n pset[%d]:\n"
+			"  chnum\t%d\n"
+			"  slot\t%d\n"
+			"  opt\t%08x\n"
+			"  src\t%08x\n"
+			"  dst\t%08x\n"
+			"  abcnt\t%08x\n"
+			"  ccnt\t%08x\n"
+			"  bidx\t%08x\n"
+			"  cidx\t%08x\n"
+			"  lkrld\t%08x\n",
+			i, echan->ch_num, echan->slot[i],
+			edesc->pset[i].opt,
+			edesc->pset[i].src,
+			edesc->pset[i].dst,
+			edesc->pset[i].a_b_cnt,
+			edesc->pset[i].ccnt,
+			edesc->pset[i].src_dst_bidx,
+			edesc->pset[i].src_dst_cidx,
+			edesc->pset[i].link_bcntrld);
+		/* Link to the previous slot if not the last set */
+		if (i != (edesc->pset_nr - 1))
+			edma_link(echan->slot[i], echan->slot[i+1]);
+		/* Final pset links to the dummy pset */
+		else
+			edma_link(echan->slot[i], echan->ecc->dummy_slot);
+	}
+
+	edma_start(echan->ch_num);
+}
+
+static int edma_terminate_all(struct edma_chan *echan)
+{
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+
+	/*
+	 * Stop DMA activity: we assume the callback will not be called
+	 * after edma_dma() returns (even if it does, it will see
+	 * echan->edesc is NULL and exit.)
+	 */
+	if (echan->edesc) {
+		echan->edesc = NULL;
+		edma_stop(echan->ch_num);
+	}
+
+	vchan_get_all_descriptors(&echan->vchan, &head);
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&echan->vchan, &head);
+
+	return 0;
+}
+
+
+static int edma_slave_config(struct edma_chan *echan,
+	struct dma_slave_config *config)
+{
+	if ((config->src_addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+	    (config->dst_addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES))
+		return -EINVAL;
+
+	if (config->direction == DMA_MEM_TO_DEV) {
+		if (config->dst_addr)
+			echan->addr = config->dst_addr;
+		if (config->dst_addr_width)
+			echan->addr_width = config->dst_addr_width;
+		if (config->dst_maxburst)
+			echan->maxburst = config->dst_maxburst;
+	} else if (config->direction == DMA_DEV_TO_MEM) {
+		if (config->src_addr)
+			echan->addr = config->src_addr;
+		if (config->src_addr_width)
+			echan->addr_width = config->src_addr_width;
+		if (config->src_maxburst)
+			echan->maxburst = config->src_maxburst;
+	}
+
+	return 0;
+}
+
+static int edma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			unsigned long arg)
+{
+	int ret = 0;
+	struct dma_slave_config *config;
+	struct edma_chan *echan = to_edma_chan(chan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		edma_terminate_all(echan);
+		break;
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+		ret = edma_slave_config(echan, config);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static struct dma_async_tx_descriptor *edma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl,
+	unsigned int sg_len, enum dma_transfer_direction direction,
+	unsigned long tx_flags, void *context)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	struct edma_desc *edesc;
+	struct scatterlist *sg;
+	int i;
+	int acnt, bcnt, ccnt, src, dst, cidx;
+	int src_bidx, dst_bidx, src_cidx, dst_cidx;
+
+	if (unlikely(!echan || !sgl || !sg_len))
+		return NULL;
+
+	if (echan->addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		dev_err(dev, "Undefined slave buswidth\n");
+		return NULL;
+	}
+
+	if (sg_len > MAX_NR_SG) {
+		dev_err(dev, "Exceeded max SG segments %d > %d\n",
+			sg_len, MAX_NR_SG);
+		return NULL;
+	}
+
+	edesc = kzalloc(sizeof(*edesc) + sg_len *
+		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	if (!edesc) {
+		dev_dbg(dev, "Failed to allocate a descriptor\n");
+		return NULL;
+	}
+
+	edesc->pset_nr = sg_len;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		/* Allocate a PaRAM slot, if needed */
+		if (echan->slot[i] < 0) {
+			echan->slot[i] =
+				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
+						EDMA_SLOT_ANY);
+			if (echan->slot[i] < 0) {
+				dev_err(dev, "Failed to allocate slot\n");
+				return NULL;
+			}
+		}
+
+		acnt = echan->addr_width;
+
+		/*
+		 * If the maxburst is equal to the fifo width, use
+		 * A-synced transfers. This allows for large contiguous
+		 * buffer transfers using only one PaRAM set.
+		 */
+		if (echan->maxburst == 1) {
+			edesc->absync = false;
+			ccnt = sg_dma_len(sg) / acnt / (SZ_64K - 1);
+			bcnt = sg_dma_len(sg) / acnt - ccnt * (SZ_64K - 1);
+			if (bcnt)
+				ccnt++;
+			else
+				bcnt = SZ_64K - 1;
+			cidx = acnt;
+		/*
+		 * If maxburst is greater than the fifo address_width,
+		 * use AB-synced transfers where A count is the fifo
+		 * address_width and B count is the maxburst. In this
+		 * case, we are limited to transfers of C count frames
+		 * of (address_width * maxburst) where C count is limited
+		 * to SZ_64K-1. This places an upper bound on the length
+		 * of an SG segment that can be handled.
+		 */
+		} else {
+			edesc->absync = true;
+			bcnt = echan->maxburst;
+			ccnt = sg_dma_len(sg) / (acnt * bcnt);
+			if (ccnt > (SZ_64K - 1)) {
+				dev_err(dev, "Exceeded max SG segment size\n");
+				return NULL;
+			}
+			cidx = acnt * bcnt;
+		}
+
+		if (direction == DMA_MEM_TO_DEV) {
+			src = sg_dma_address(sg);
+			dst = echan->addr;
+			src_bidx = acnt;
+			src_cidx = cidx;
+			dst_bidx = 0;
+			dst_cidx = 0;
+		} else {
+			src = echan->addr;
+			dst = sg_dma_address(sg);
+			src_bidx = 0;
+			src_cidx = 0;
+			dst_bidx = acnt;
+			dst_cidx = cidx;
+		}
+
+		edesc->pset[i].opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+		/* Configure A or AB synchronized transfers */
+		if (edesc->absync)
+			edesc->pset[i].opt |= SYNCDIM;
+		/* If this is the last set, enable completion interrupt flag */
+		if (i == sg_len - 1)
+			edesc->pset[i].opt |= TCINTEN;
+
+		edesc->pset[i].src = src;
+		edesc->pset[i].dst = dst;
+
+		edesc->pset[i].src_dst_bidx = (dst_bidx << 16) | src_bidx;
+		edesc->pset[i].src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+		edesc->pset[i].a_b_cnt = bcnt << 16 | acnt;
+		edesc->pset[i].ccnt = ccnt;
+		edesc->pset[i].link_bcntrld = 0xffffffff;
+
+	}
+
+	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
+{
+	struct edma_chan *echan = data;
+	struct device *dev = echan->vchan.chan.device->dev;
+	struct edma_desc *edesc;
+	unsigned long flags;
+
+	/* Stop the channel */
+	edma_stop(echan->ch_num);
+
+	switch (ch_status) {
+	case DMA_COMPLETE:
+		dev_dbg(dev, "transfer complete on channel %d\n", ch_num);
+
+		spin_lock_irqsave(&echan->vchan.lock, flags);
+
+		edesc = echan->edesc;
+		if (edesc) {
+			edma_execute(echan);
+			vchan_cookie_complete(&edesc->vdesc);
+		}
+
+		spin_unlock_irqrestore(&echan->vchan.lock, flags);
+
+		break;
+	case DMA_CC_ERROR:
+		dev_dbg(dev, "transfer error on channel %d\n", ch_num);
+		break;
+	default:
+		break;
+	}
+}
+
+/* Alloc channel resources */
+static int edma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	int ret;
+	int a_ch_num;
+	LIST_HEAD(descs);
+
+	a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback,
+					chan, EVENTQ_DEFAULT);
+
+	if (a_ch_num < 0) {
+		ret = -ENODEV;
+		goto err_no_chan;
+	}
+
+	if (a_ch_num != echan->ch_num) {
+		dev_err(dev, "failed to allocate requested channel %u:%u\n",
+			EDMA_CTLR(echan->ch_num),
+			EDMA_CHAN_SLOT(echan->ch_num));
+		ret = -ENODEV;
+		goto err_wrong_chan;
+	}
+
+	echan->alloced = true;
+	echan->slot[0] = echan->ch_num;
+
+	dev_info(dev, "allocated channel for %u:%u\n",
+		 EDMA_CTLR(echan->ch_num), EDMA_CHAN_SLOT(echan->ch_num));
+
+	return 0;
+
+err_wrong_chan:
+	edma_free_channel(a_ch_num);
+err_no_chan:
+	return ret;
+}
+
+/* Free channel resources */
+static void edma_free_chan_resources(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	int i;
+
+	/* Terminate transfers */
+	edma_stop(echan->ch_num);
+
+	vchan_free_chan_resources(&echan->vchan);
+
+	/* Free EDMA PaRAM slots */
+	for (i = 1; i < EDMA_MAX_SLOTS; i++) {
+		if (echan->slot[i] >= 0) {
+			edma_free_slot(echan->slot[i]);
+			echan->slot[i] = -1;
+		}
+	}
+
+	/* Free EDMA channel */
+	if (echan->alloced) {
+		edma_free_channel(echan->ch_num);
+		echan->alloced = false;
+	}
+
+	dev_info(dev, "freeing channel for %u\n", echan->ch_num);
+}
+
+/* Send pending descriptor to hardware */
+static void edma_issue_pending(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+	if (vchan_issue_pending(&echan->vchan) && !echan->edesc)
+		edma_execute(echan);
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+}
+
+static size_t edma_desc_size(struct edma_desc *edesc)
+{
+	int i;
+	size_t size;
+
+	if (edesc->absync)
+		for (size = i = 0; i < edesc->pset_nr; i++)
+			size += (edesc->pset[i].a_b_cnt & 0xffff) *
+				(edesc->pset[i].a_b_cnt >> 16) *
+				 edesc->pset[i].ccnt;
+	else
+		size = (edesc->pset[0].a_b_cnt & 0xffff) *
+			(edesc->pset[0].a_b_cnt >> 16) +
+			(edesc->pset[0].a_b_cnt & 0xffff) *
+			(SZ_64K - 1) * edesc->pset[0].ccnt;
+
+	return size;
+}
+
+/* Check request completion status */
+static enum dma_status edma_tx_status(struct dma_chan *chan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct virt_dma_desc *vdesc;
+	enum dma_status ret;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+	vdesc = vchan_find_desc(&echan->vchan, cookie);
+	if (vdesc) {
+		txstate->residue = edma_desc_size(to_edma_desc(&vdesc->tx));
+	} else if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie) {
+		struct edma_desc *edesc = echan->edesc;
+		txstate->residue = edma_desc_size(edesc);
+	} else {
+		txstate->residue = 0;
+	}
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+
+	return ret;
+}
+
+static void __init edma_chan_init(struct edma_cc *ecc,
+				  struct dma_device *dma,
+				  struct edma_chan *echans)
+{
+	int i, j;
+
+	for (i = 0; i < EDMA_CHANS; i++) {
+		struct edma_chan *echan = &echans[i];
+		echan->ch_num = EDMA_CTLR_CHAN(ecc->ctlr, i);
+		echan->ecc = ecc;
+		echan->vchan.desc_free = edma_desc_free;
+
+		vchan_init(&echan->vchan, dma);
+
+		INIT_LIST_HEAD(&echan->node);
+		for (j = 0; j < EDMA_MAX_SLOTS; j++)
+			echan->slot[j] = -1;
+	}
+}
+
+static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
+			  struct device *dev)
+{
+	dma->device_prep_slave_sg = edma_prep_slave_sg;
+	dma->device_alloc_chan_resources = edma_alloc_chan_resources;
+	dma->device_free_chan_resources = edma_free_chan_resources;
+	dma->device_issue_pending = edma_issue_pending;
+	dma->device_tx_status = edma_tx_status;
+	dma->device_control = edma_control;
+	dma->dev = dev;
+
+	INIT_LIST_HEAD(&dma->channels);
+}
+
+static int __devinit edma_probe(struct platform_device *pdev)
+{
+	struct edma_cc *ecc;
+	int ret;
+
+	ecc = devm_kzalloc(&pdev->dev, sizeof(*ecc), GFP_KERNEL);
+	if (!ecc) {
+		dev_err(&pdev->dev, "Can't allocate controller\n");
+		return -ENOMEM;
+	}
+
+	ecc->ctlr = pdev->id;
+	ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY);
+	if (ecc->dummy_slot < 0) {
+		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
+		return -EIO;
+	}
+
+	dma_cap_zero(ecc->dma_slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, ecc->dma_slave.cap_mask);
+
+	edma_dma_init(ecc, &ecc->dma_slave, &pdev->dev);
+
+	edma_chan_init(ecc, &ecc->dma_slave, ecc->slave_chans);
+
+	ret = dma_async_device_register(&ecc->dma_slave);
+	if (ret)
+		goto err_reg1;
+
+	platform_set_drvdata(pdev, ecc);
+
+	dev_info(&pdev->dev, "TI EDMA DMA engine driver\n");
+
+	return 0;
+
+err_reg1:
+	edma_free_slot(ecc->dummy_slot);
+	return ret;
+}
+
+static int __devexit edma_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+
+	dma_async_device_unregister(&ecc->dma_slave);
+	edma_free_slot(ecc->dummy_slot);
+
+	return 0;
+}
+
+static struct platform_driver edma_driver = {
+	.probe		= edma_probe,
+	.remove		= __devexit_p(edma_remove),
+	.driver = {
+		.name = "edma-dma-engine",
+		.owner = THIS_MODULE,
+	},
+};
+
+bool edma_filter_fn(struct dma_chan *chan, void *param)
+{
+	if (chan->device->dev->driver == &edma_driver.driver) {
+		struct edma_chan *echan = to_edma_chan(chan);
+		unsigned ch_req = *(unsigned *)param;
+		return ch_req == echan->ch_num;
+	}
+	return false;
+}
+EXPORT_SYMBOL(edma_filter_fn);
+
+static struct platform_device *pdev0, *pdev1;
+
+static const struct platform_device_info edma_dev_info0 = {
+	.name = "edma-dma-engine",
+	.id = 0,
+	.dma_mask = DMA_BIT_MASK(32),
+};
+
+static const struct platform_device_info edma_dev_info1 = {
+	.name = "edma-dma-engine",
+	.id = 1,
+	.dma_mask = DMA_BIT_MASK(32),
+};
+
+static int edma_init(void)
+{
+	int ret = platform_driver_register(&edma_driver);
+
+	if (ret == 0) {
+		pdev0 = platform_device_register_full(&edma_dev_info0);
+		if (IS_ERR(pdev0)) {
+			platform_driver_unregister(&edma_driver);
+			ret = PTR_ERR(pdev0);
+			goto out;
+		}
+	}
+
+	if (EDMA_CTLRS == 2) {
+		pdev1 = platform_device_register_full(&edma_dev_info1);
+		if (IS_ERR(pdev1)) {
+			platform_driver_unregister(&edma_driver);
+			platform_device_unregister(pdev0);
+			ret = PTR_ERR(pdev1);
+		}
+	}
+
+out:
+	return ret;
+}
+subsys_initcall(edma_init);
+
+static void __exit edma_exit(void)
+{
+	platform_device_unregister(pdev0);
+	if (pdev1)
+		platform_device_unregister(pdev1);
+	platform_driver_unregister(&edma_driver);
+}
+module_exit(edma_exit);
+
+MODULE_AUTHOR("Matt Porter <mporter@ti.com>");
+MODULE_DESCRIPTION("TI EDMA DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/edma.h b/include/linux/edma.h
new file mode 100644
index 000000000000..a1307e7827e8
--- /dev/null
+++ b/include/linux/edma.h
@@ -0,0 +1,29 @@
+/*
+ * TI EDMA DMA engine driver
+ *
+ * Copyright 2012 Texas Instruments
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef __LINUX_EDMA_H
+#define __LINUX_EDMA_H
+
+struct dma_chan;
+
+#if defined(CONFIG_TI_EDMA) || defined(CONFIG_TI_EDMA_MODULE)
+bool edma_filter_fn(struct dma_chan *, void *);
+#else
+static inline bool edma_filter_fn(struct dma_chan *chan, void *param)
+{
+	return false;
+}
+#endif
+
+#endif

From b5daabbd3d1803cbb4ba89bde3968891dd4e1d1c Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@ti.com>
Date: Wed, 22 Aug 2012 21:09:35 -0400
Subject: [PATCH 08/29] mmc: davinci_mmc: convert to DMA engine API

Removes use of the DaVinci EDMA private DMA API and replaces
it with use of the DMA engine API.

Signed-off-by: Matt Porter <mporter@ti.com>
Tested-by: Tom Rini <trini@ti.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/mmc/host/davinci_mmc.c | 267 ++++++++++-----------------------
 1 file changed, 80 insertions(+), 187 deletions(-)

diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 7cf6c624bf73..c5e1eebcd588 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -30,11 +30,12 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/edma.h>
 #include <linux/mmc/mmc.h>
 
 #include <mach/mmc.h>
-#include <mach/edma.h>
 
 /*
  * Register Definitions
@@ -200,21 +201,13 @@ struct mmc_davinci_host {
 	u32 bytes_left;
 
 	u32 rxdma, txdma;
+	struct dma_chan *dma_tx;
+	struct dma_chan *dma_rx;
 	bool use_dma;
 	bool do_dma;
 	bool sdio_int;
 	bool active_request;
 
-	/* Scatterlist DMA uses one or more parameter RAM entries:
-	 * the main one (associated with rxdma or txdma) plus zero or
-	 * more links.  The entries for a given transfer differ only
-	 * by memory buffer (address, length) and link field.
-	 */
-	struct edmacc_param	tx_template;
-	struct edmacc_param	rx_template;
-	unsigned		n_link;
-	u32			links[MAX_NR_SG - 1];
-
 	/* For PIO we walk scatterlists one segment at a time. */
 	unsigned int		sg_len;
 	struct scatterlist *sg;
@@ -410,153 +403,74 @@ static void mmc_davinci_start_command(struct mmc_davinci_host *host,
 
 static void davinci_abort_dma(struct mmc_davinci_host *host)
 {
-	int sync_dev;
+	struct dma_chan *sync_dev;
 
 	if (host->data_dir == DAVINCI_MMC_DATADIR_READ)
-		sync_dev = host->rxdma;
+		sync_dev = host->dma_rx;
 	else
-		sync_dev = host->txdma;
+		sync_dev = host->dma_tx;
 
-	edma_stop(sync_dev);
-	edma_clean_channel(sync_dev);
+	dmaengine_terminate_all(sync_dev);
 }
 
-static void
-mmc_davinci_xfer_done(struct mmc_davinci_host *host, struct mmc_data *data);
-
-static void mmc_davinci_dma_cb(unsigned channel, u16 ch_status, void *data)
-{
-	if (DMA_COMPLETE != ch_status) {
-		struct mmc_davinci_host *host = data;
-
-		/* Currently means:  DMA Event Missed, or "null" transfer
-		 * request was seen.  In the future, TC errors (like bad
-		 * addresses) might be presented too.
-		 */
-		dev_warn(mmc_dev(host->mmc), "DMA %s error\n",
-			(host->data->flags & MMC_DATA_WRITE)
-				? "write" : "read");
-		host->data->error = -EIO;
-		mmc_davinci_xfer_done(host, host->data);
-	}
-}
-
-/* Set up tx or rx template, to be modified and updated later */
-static void __init mmc_davinci_dma_setup(struct mmc_davinci_host *host,
-		bool tx, struct edmacc_param *template)
-{
-	unsigned	sync_dev;
-	const u16	acnt = 4;
-	const u16	bcnt = rw_threshold >> 2;
-	const u16	ccnt = 0;
-	u32		src_port = 0;
-	u32		dst_port = 0;
-	s16		src_bidx, dst_bidx;
-	s16		src_cidx, dst_cidx;
-
-	/*
-	 * A-B Sync transfer:  each DMA request is for one "frame" of
-	 * rw_threshold bytes, broken into "acnt"-size chunks repeated
-	 * "bcnt" times.  Each segment needs "ccnt" such frames; since
-	 * we tell the block layer our mmc->max_seg_size limit, we can
-	 * trust (later) that it's within bounds.
-	 *
-	 * The FIFOs are read/written in 4-byte chunks (acnt == 4) and
-	 * EDMA will optimize memory operations to use larger bursts.
-	 */
-	if (tx) {
-		sync_dev = host->txdma;
-
-		/* src_prt, ccnt, and link to be set up later */
-		src_bidx = acnt;
-		src_cidx = acnt * bcnt;
-
-		dst_port = host->mem_res->start + DAVINCI_MMCDXR;
-		dst_bidx = 0;
-		dst_cidx = 0;
-	} else {
-		sync_dev = host->rxdma;
-
-		src_port = host->mem_res->start + DAVINCI_MMCDRR;
-		src_bidx = 0;
-		src_cidx = 0;
-
-		/* dst_prt, ccnt, and link to be set up later */
-		dst_bidx = acnt;
-		dst_cidx = acnt * bcnt;
-	}
-
-	/*
-	 * We can't use FIFO mode for the FIFOs because MMC FIFO addresses
-	 * are not 256-bit (32-byte) aligned.  So we use INCR, and the W8BIT
-	 * parameter is ignored.
-	 */
-	edma_set_src(sync_dev, src_port, INCR, W8BIT);
-	edma_set_dest(sync_dev, dst_port, INCR, W8BIT);
-
-	edma_set_src_index(sync_dev, src_bidx, src_cidx);
-	edma_set_dest_index(sync_dev, dst_bidx, dst_cidx);
-
-	edma_set_transfer_params(sync_dev, acnt, bcnt, ccnt, 8, ABSYNC);
-
-	edma_read_slot(sync_dev, template);
-
-	/* don't bother with irqs or chaining */
-	template->opt |= EDMA_CHAN_SLOT(sync_dev) << 12;
-}
-
-static void mmc_davinci_send_dma_request(struct mmc_davinci_host *host,
+static int mmc_davinci_send_dma_request(struct mmc_davinci_host *host,
 		struct mmc_data *data)
 {
-	struct edmacc_param	*template;
-	int			channel, slot;
-	unsigned		link;
-	struct scatterlist	*sg;
-	unsigned		sg_len;
-	unsigned		bytes_left = host->bytes_left;
-	const unsigned		shift = ffs(rw_threshold) - 1;
+	struct dma_chan *chan;
+	struct dma_async_tx_descriptor *desc;
+	int ret = 0;
 
 	if (host->data_dir == DAVINCI_MMC_DATADIR_WRITE) {
-		template = &host->tx_template;
-		channel = host->txdma;
+		struct dma_slave_config dma_tx_conf = {
+			.direction = DMA_MEM_TO_DEV,
+			.dst_addr = host->mem_res->start + DAVINCI_MMCDXR,
+			.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+			.dst_maxburst =
+				rw_threshold / DMA_SLAVE_BUSWIDTH_4_BYTES,
+		};
+		chan = host->dma_tx;
+		dmaengine_slave_config(host->dma_tx, &dma_tx_conf);
+
+		desc = dmaengine_prep_slave_sg(host->dma_tx,
+				data->sg,
+				host->sg_len,
+				DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!desc) {
+			dev_dbg(mmc_dev(host->mmc),
+				"failed to allocate DMA TX descriptor");
+			ret = -1;
+			goto out;
+		}
 	} else {
-		template = &host->rx_template;
-		channel = host->rxdma;
+		struct dma_slave_config dma_rx_conf = {
+			.direction = DMA_DEV_TO_MEM,
+			.src_addr = host->mem_res->start + DAVINCI_MMCDRR,
+			.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+			.src_maxburst =
+				rw_threshold / DMA_SLAVE_BUSWIDTH_4_BYTES,
+		};
+		chan = host->dma_rx;
+		dmaengine_slave_config(host->dma_rx, &dma_rx_conf);
+
+		desc = dmaengine_prep_slave_sg(host->dma_rx,
+				data->sg,
+				host->sg_len,
+				DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!desc) {
+			dev_dbg(mmc_dev(host->mmc),
+				"failed to allocate DMA RX descriptor");
+			ret = -1;
+			goto out;
+		}
 	}
 
-	/* We know sg_len and ccnt will never be out of range because
-	 * we told the mmc layer which in turn tells the block layer
-	 * to ensure that it only hands us one scatterlist segment
-	 * per EDMA PARAM entry.  Update the PARAM
-	 * entries needed for each segment of this scatterlist.
-	 */
-	for (slot = channel, link = 0, sg = data->sg, sg_len = host->sg_len;
-			sg_len-- != 0 && bytes_left;
-			sg = sg_next(sg), slot = host->links[link++]) {
-		u32		buf = sg_dma_address(sg);
-		unsigned	count = sg_dma_len(sg);
+	dmaengine_submit(desc);
+	dma_async_issue_pending(chan);
 
-		template->link_bcntrld = sg_len
-				? (EDMA_CHAN_SLOT(host->links[link]) << 5)
-				: 0xffff;
-
-		if (count > bytes_left)
-			count = bytes_left;
-		bytes_left -= count;
-
-		if (host->data_dir == DAVINCI_MMC_DATADIR_WRITE)
-			template->src = buf;
-		else
-			template->dst = buf;
-		template->ccnt = count >> shift;
-
-		edma_write_slot(slot, template);
-	}
-
-	if (host->version == MMC_CTLR_VERSION_2)
-		edma_clear_event(channel);
-
-	edma_start(channel);
+out:
+	return ret;
 }
 
 static int mmc_davinci_start_dma_transfer(struct mmc_davinci_host *host,
@@ -564,6 +478,7 @@ static int mmc_davinci_start_dma_transfer(struct mmc_davinci_host *host,
 {
 	int i;
 	int mask = rw_threshold - 1;
+	int ret = 0;
 
 	host->sg_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
 				((data->flags & MMC_DATA_WRITE)
@@ -583,70 +498,48 @@ static int mmc_davinci_start_dma_transfer(struct mmc_davinci_host *host,
 	}
 
 	host->do_dma = 1;
-	mmc_davinci_send_dma_request(host, data);
+	ret = mmc_davinci_send_dma_request(host, data);
 
-	return 0;
+	return ret;
 }
 
 static void __init_or_module
 davinci_release_dma_channels(struct mmc_davinci_host *host)
 {
-	unsigned	i;
-
 	if (!host->use_dma)
 		return;
 
-	for (i = 0; i < host->n_link; i++)
-		edma_free_slot(host->links[i]);
-
-	edma_free_channel(host->txdma);
-	edma_free_channel(host->rxdma);
+	dma_release_channel(host->dma_tx);
+	dma_release_channel(host->dma_rx);
 }
 
 static int __init davinci_acquire_dma_channels(struct mmc_davinci_host *host)
 {
-	u32 link_size;
-	int r, i;
+	int r;
+	dma_cap_mask_t mask;
 
-	/* Acquire master DMA write channel */
-	r = edma_alloc_channel(host->txdma, mmc_davinci_dma_cb, host,
-			EVENTQ_DEFAULT);
-	if (r < 0) {
-		dev_warn(mmc_dev(host->mmc), "alloc %s channel err %d\n",
-				"tx", r);
-		return r;
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	host->dma_tx =
+		dma_request_channel(mask, edma_filter_fn, &host->txdma);
+	if (!host->dma_tx) {
+		dev_err(mmc_dev(host->mmc), "Can't get dma_tx channel\n");
+		return -ENODEV;
 	}
-	mmc_davinci_dma_setup(host, true, &host->tx_template);
 
-	/* Acquire master DMA read channel */
-	r = edma_alloc_channel(host->rxdma, mmc_davinci_dma_cb, host,
-			EVENTQ_DEFAULT);
-	if (r < 0) {
-		dev_warn(mmc_dev(host->mmc), "alloc %s channel err %d\n",
-				"rx", r);
+	host->dma_rx =
+		dma_request_channel(mask, edma_filter_fn, &host->rxdma);
+	if (!host->dma_rx) {
+		dev_err(mmc_dev(host->mmc), "Can't get dma_rx channel\n");
+		r = -ENODEV;
 		goto free_master_write;
 	}
-	mmc_davinci_dma_setup(host, false, &host->rx_template);
-
-	/* Allocate parameter RAM slots, which will later be bound to a
-	 * channel as needed to handle a scatterlist.
-	 */
-	link_size = min_t(unsigned, host->nr_sg, ARRAY_SIZE(host->links));
-	for (i = 0; i < link_size; i++) {
-		r = edma_alloc_slot(EDMA_CTLR(host->txdma), EDMA_SLOT_ANY);
-		if (r < 0) {
-			dev_dbg(mmc_dev(host->mmc), "dma PaRAM alloc --> %d\n",
-				r);
-			break;
-		}
-		host->links[i] = r;
-	}
-	host->n_link = i;
 
 	return 0;
 
 free_master_write:
-	edma_free_channel(host->txdma);
+	dma_release_channel(host->dma_tx);
 
 	return r;
 }
@@ -1359,7 +1252,7 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev)
 	 * Each hw_seg uses one EDMA parameter RAM slot, always one
 	 * channel and then usually some linked slots.
 	 */
-	mmc->max_segs		= 1 + host->n_link;
+	mmc->max_segs		= MAX_NR_SG;
 
 	/* EDMA limit per hw segment (one or two MBytes) */
 	mmc->max_seg_size	= MAX_CCNT * rw_threshold;

From 048177ce3b3962852fd34a7e04938959271c7e70 Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@ti.com>
Date: Wed, 22 Aug 2012 21:09:36 -0400
Subject: [PATCH 09/29] spi: spi-davinci: convert to DMA engine API

Removes use of the DaVinci EDMA private DMA API and replaces
it with use of the DMA engine API.

Signed-off-by: Matt Porter <mporter@ti.com>
Tested-by: Tom Rini <trini@ti.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/spi/spi-davinci.c | 284 +++++++++++++++++---------------------
 1 file changed, 126 insertions(+), 158 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 9b2901feaf78..c1ec52d46714 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -25,13 +25,14 @@
 #include <linux/platform_device.h>
 #include <linux/err.h>
 #include <linux/clk.h>
+#include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/edma.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/slab.h>
 
 #include <mach/spi.h>
-#include <mach/edma.h>
 
 #define SPI_NO_RESOURCE		((resource_size_t)-1)
 
@@ -113,14 +114,6 @@
 #define SPIDEF		0x4c
 #define SPIFMT0		0x50
 
-/* We have 2 DMA channels per CS, one for RX and one for TX */
-struct davinci_spi_dma {
-	int			tx_channel;
-	int			rx_channel;
-	int			dummy_param_slot;
-	enum dma_event_q	eventq;
-};
-
 /* SPI Controller driver's private data. */
 struct davinci_spi {
 	struct spi_bitbang	bitbang;
@@ -134,11 +127,14 @@ struct davinci_spi {
 
 	const void		*tx;
 	void			*rx;
-#define SPI_TMP_BUFSZ	(SMP_CACHE_BYTES + 1)
-	u8			rx_tmp_buf[SPI_TMP_BUFSZ];
 	int			rcount;
 	int			wcount;
-	struct davinci_spi_dma	dma;
+
+	struct dma_chan		*dma_rx;
+	struct dma_chan		*dma_tx;
+	int			dma_rx_chnum;
+	int			dma_tx_chnum;
+
 	struct davinci_spi_platform_data *pdata;
 
 	void			(*get_rx)(u32 rx_data, struct davinci_spi *);
@@ -496,21 +492,23 @@ out:
 	return errors;
 }
 
-static void davinci_spi_dma_callback(unsigned lch, u16 status, void *data)
+static void davinci_spi_dma_rx_callback(void *data)
 {
-	struct davinci_spi *dspi = data;
-	struct davinci_spi_dma *dma = &dspi->dma;
+	struct davinci_spi *dspi = (struct davinci_spi *)data;
 
-	edma_stop(lch);
+	dspi->rcount = 0;
 
-	if (status == DMA_COMPLETE) {
-		if (lch == dma->rx_channel)
-			dspi->rcount = 0;
-		if (lch == dma->tx_channel)
-			dspi->wcount = 0;
-	}
+	if (!dspi->wcount && !dspi->rcount)
+		complete(&dspi->done);
+}
 
-	if ((!dspi->wcount && !dspi->rcount) || (status != DMA_COMPLETE))
+static void davinci_spi_dma_tx_callback(void *data)
+{
+	struct davinci_spi *dspi = (struct davinci_spi *)data;
+
+	dspi->wcount = 0;
+
+	if (!dspi->wcount && !dspi->rcount)
 		complete(&dspi->done);
 }
 
@@ -526,20 +524,20 @@ static void davinci_spi_dma_callback(unsigned lch, u16 status, void *data)
 static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 {
 	struct davinci_spi *dspi;
-	int data_type, ret;
+	int data_type, ret = -ENOMEM;
 	u32 tx_data, spidat1;
 	u32 errors = 0;
 	struct davinci_spi_config *spicfg;
 	struct davinci_spi_platform_data *pdata;
 	unsigned uninitialized_var(rx_buf_count);
-	struct device *sdev;
+	void *dummy_buf = NULL;
+	struct scatterlist sg_rx, sg_tx;
 
 	dspi = spi_master_get_devdata(spi->master);
 	pdata = dspi->pdata;
 	spicfg = (struct davinci_spi_config *)spi->controller_data;
 	if (!spicfg)
 		spicfg = &davinci_spi_default_cfg;
-	sdev = dspi->bitbang.master->dev.parent;
 
 	/* convert len to words based on bits_per_word */
 	data_type = dspi->bytes_per_word[spi->chip_select];
@@ -567,112 +565,83 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		spidat1 |= tx_data & 0xFFFF;
 		iowrite32(spidat1, dspi->base + SPIDAT1);
 	} else {
-		struct davinci_spi_dma *dma;
-		unsigned long tx_reg, rx_reg;
-		struct edmacc_param param;
-		void *rx_buf;
-		int b, c;
+		struct dma_slave_config dma_rx_conf = {
+			.direction = DMA_DEV_TO_MEM,
+			.src_addr = (unsigned long)dspi->pbase + SPIBUF,
+			.src_addr_width = data_type,
+			.src_maxburst = 1,
+		};
+		struct dma_slave_config dma_tx_conf = {
+			.direction = DMA_MEM_TO_DEV,
+			.dst_addr = (unsigned long)dspi->pbase + SPIDAT1,
+			.dst_addr_width = data_type,
+			.dst_maxburst = 1,
+		};
+		struct dma_async_tx_descriptor *rxdesc;
+		struct dma_async_tx_descriptor *txdesc;
+		void *buf;
 
-		dma = &dspi->dma;
+		dummy_buf = kzalloc(t->len, GFP_KERNEL);
+		if (!dummy_buf)
+			goto err_alloc_dummy_buf;
 
-		tx_reg = (unsigned long)dspi->pbase + SPIDAT1;
-		rx_reg = (unsigned long)dspi->pbase + SPIBUF;
+		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
+		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
 
-		/*
-		 * Transmit DMA setup
-		 *
-		 * If there is transmit data, map the transmit buffer, set it
-		 * as the source of data and set the source B index to data
-		 * size. If there is no transmit data, set the transmit register
-		 * as the source of data, and set the source B index to zero.
-		 *
-		 * The destination is always the transmit register itself. And
-		 * the destination never increments.
-		 */
-
-		if (t->tx_buf) {
-			t->tx_dma = dma_map_single(&spi->dev, (void *)t->tx_buf,
-						t->len, DMA_TO_DEVICE);
-			if (dma_mapping_error(&spi->dev, t->tx_dma)) {
-				dev_dbg(sdev, "Unable to DMA map %d bytes"
-						"TX buffer\n", t->len);
-				return -ENOMEM;
-			}
-		}
-
-		/*
-		 * If number of words is greater than 65535, then we need
-		 * to configure a 3 dimension transfer.  Use the BCNTRLD
-		 * feature to allow for transfers that aren't even multiples
-		 * of 65535 (or any other possible b size) by first transferring
-		 * the remainder amount then grabbing the next N blocks of
-		 * 65535 words.
-		 */
-
-		c = dspi->wcount / (SZ_64K - 1);	/* N 65535 Blocks */
-		b = dspi->wcount - c * (SZ_64K - 1);	/* Remainder */
-		if (b)
-			c++;
+		sg_init_table(&sg_rx, 1);
+		if (!t->rx_buf)
+			buf = dummy_buf;
 		else
-			b = SZ_64K - 1;
-
-		param.opt = TCINTEN | EDMA_TCC(dma->tx_channel);
-		param.src = t->tx_buf ? t->tx_dma : tx_reg;
-		param.a_b_cnt = b << 16 | data_type;
-		param.dst = tx_reg;
-		param.src_dst_bidx = t->tx_buf ? data_type : 0;
-		param.link_bcntrld = 0xffffffff;
-		param.src_dst_cidx = t->tx_buf ? data_type : 0;
-		param.ccnt = c;
-		edma_write_slot(dma->tx_channel, &param);
-		edma_link(dma->tx_channel, dma->dummy_param_slot);
-
-		/*
-		 * Receive DMA setup
-		 *
-		 * If there is receive buffer, use it to receive data. If there
-		 * is none provided, use a temporary receive buffer. Set the
-		 * destination B index to 0 so effectively only one byte is used
-		 * in the temporary buffer (address does not increment).
-		 *
-		 * The source of receive data is the receive data register. The
-		 * source address never increments.
-		 */
-
-		if (t->rx_buf) {
-			rx_buf = t->rx_buf;
-			rx_buf_count = t->len;
-		} else {
-			rx_buf = dspi->rx_tmp_buf;
-			rx_buf_count = sizeof(dspi->rx_tmp_buf);
+			buf = t->rx_buf;
+		t->rx_dma = dma_map_single(&spi->dev, buf,
+				t->len, DMA_FROM_DEVICE);
+		if (!t->rx_dma) {
+			ret = -EFAULT;
+			goto err_rx_map;
 		}
+		sg_dma_address(&sg_rx) = t->rx_dma;
+		sg_dma_len(&sg_rx) = t->len;
 
-		t->rx_dma = dma_map_single(&spi->dev, rx_buf, rx_buf_count,
-							DMA_FROM_DEVICE);
-		if (dma_mapping_error(&spi->dev, t->rx_dma)) {
-			dev_dbg(sdev, "Couldn't DMA map a %d bytes RX buffer\n",
-								rx_buf_count);
-			if (t->tx_buf)
-				dma_unmap_single(&spi->dev, t->tx_dma, t->len,
-								DMA_TO_DEVICE);
-			return -ENOMEM;
+		sg_init_table(&sg_tx, 1);
+		if (!t->tx_buf)
+			buf = dummy_buf;
+		else
+			buf = (void *)t->tx_buf;
+		t->tx_dma = dma_map_single(&spi->dev, buf,
+				t->len, DMA_FROM_DEVICE);
+		if (!t->tx_dma) {
+			ret = -EFAULT;
+			goto err_tx_map;
 		}
+		sg_dma_address(&sg_tx) = t->tx_dma;
+		sg_dma_len(&sg_tx) = t->len;
 
-		param.opt = TCINTEN | EDMA_TCC(dma->rx_channel);
-		param.src = rx_reg;
-		param.a_b_cnt = b << 16 | data_type;
-		param.dst = t->rx_dma;
-		param.src_dst_bidx = (t->rx_buf ? data_type : 0) << 16;
-		param.link_bcntrld = 0xffffffff;
-		param.src_dst_cidx = (t->rx_buf ? data_type : 0) << 16;
-		param.ccnt = c;
-		edma_write_slot(dma->rx_channel, &param);
+		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
+				&sg_rx, 1, DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!rxdesc)
+			goto err_desc;
+
+		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
+				&sg_tx, 1, DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!txdesc)
+			goto err_desc;
+
+		rxdesc->callback = davinci_spi_dma_rx_callback;
+		rxdesc->callback_param = (void *)dspi;
+		txdesc->callback = davinci_spi_dma_tx_callback;
+		txdesc->callback_param = (void *)dspi;
 
 		if (pdata->cshold_bug)
 			iowrite16(spidat1 >> 16, dspi->base + SPIDAT1 + 2);
 
-		edma_start(dma->rx_channel);
-		edma_start(dma->tx_channel);
+		dmaengine_submit(rxdesc);
+		dmaengine_submit(txdesc);
+
+		dma_async_issue_pending(dspi->dma_rx);
+		dma_async_issue_pending(dspi->dma_tx);
+
 		set_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
 	}
 
@@ -690,15 +659,13 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
 	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
-
-		if (t->tx_buf)
-			dma_unmap_single(&spi->dev, t->tx_dma, t->len,
-								DMA_TO_DEVICE);
-
-		dma_unmap_single(&spi->dev, t->rx_dma, rx_buf_count,
-							DMA_FROM_DEVICE);
-
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
+
+		dma_unmap_single(&spi->dev, t->rx_dma,
+				t->len, DMA_FROM_DEVICE);
+		dma_unmap_single(&spi->dev, t->tx_dma,
+				t->len, DMA_TO_DEVICE);
+		kfree(dummy_buf);
 	}
 
 	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
@@ -716,11 +683,20 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	if (dspi->rcount != 0 || dspi->wcount != 0) {
-		dev_err(sdev, "SPI data transfer error\n");
+		dev_err(&spi->dev, "SPI data transfer error\n");
 		return -EIO;
 	}
 
 	return t->len;
+
+err_desc:
+	dma_unmap_single(&spi->dev, t->tx_dma, t->len, DMA_TO_DEVICE);
+err_tx_map:
+	dma_unmap_single(&spi->dev, t->rx_dma, t->len, DMA_FROM_DEVICE);
+err_rx_map:
+	kfree(dummy_buf);
+err_alloc_dummy_buf:
+	return ret;
 }
 
 /**
@@ -751,39 +727,33 @@ static irqreturn_t davinci_spi_irq(s32 irq, void *data)
 
 static int davinci_spi_request_dma(struct davinci_spi *dspi)
 {
+	dma_cap_mask_t mask;
+	struct device *sdev = dspi->bitbang.master->dev.parent;
 	int r;
-	struct davinci_spi_dma *dma = &dspi->dma;
 
-	r = edma_alloc_channel(dma->rx_channel, davinci_spi_dma_callback, dspi,
-								dma->eventq);
-	if (r < 0) {
-		pr_err("Unable to request DMA channel for SPI RX\n");
-		r = -EAGAIN;
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	dspi->dma_rx = dma_request_channel(mask, edma_filter_fn,
+					   &dspi->dma_rx_chnum);
+	if (!dspi->dma_rx) {
+		dev_err(sdev, "request RX DMA channel failed\n");
+		r = -ENODEV;
 		goto rx_dma_failed;
 	}
 
-	r = edma_alloc_channel(dma->tx_channel, davinci_spi_dma_callback, dspi,
-								dma->eventq);
-	if (r < 0) {
-		pr_err("Unable to request DMA channel for SPI TX\n");
-		r = -EAGAIN;
+	dspi->dma_tx = dma_request_channel(mask, edma_filter_fn,
+					   &dspi->dma_tx_chnum);
+	if (!dspi->dma_tx) {
+		dev_err(sdev, "request TX DMA channel failed\n");
+		r = -ENODEV;
 		goto tx_dma_failed;
 	}
 
-	r = edma_alloc_slot(EDMA_CTLR(dma->tx_channel), EDMA_SLOT_ANY);
-	if (r < 0) {
-		pr_err("Unable to request SPI TX DMA param slot\n");
-		r = -EAGAIN;
-		goto param_failed;
-	}
-	dma->dummy_param_slot = r;
-	edma_link(dma->dummy_param_slot, dma->dummy_param_slot);
-
 	return 0;
-param_failed:
-	edma_free_channel(dma->tx_channel);
+
 tx_dma_failed:
-	edma_free_channel(dma->rx_channel);
+	dma_release_channel(dspi->dma_rx);
 rx_dma_failed:
 	return r;
 }
@@ -898,9 +868,8 @@ static int __devinit davinci_spi_probe(struct platform_device *pdev)
 	dspi->bitbang.txrx_bufs = davinci_spi_bufs;
 	if (dma_rx_chan != SPI_NO_RESOURCE &&
 	    dma_tx_chan != SPI_NO_RESOURCE) {
-		dspi->dma.rx_channel = dma_rx_chan;
-		dspi->dma.tx_channel = dma_tx_chan;
-		dspi->dma.eventq = pdata->dma_event_q;
+		dspi->dma_rx_chnum = dma_rx_chan;
+		dspi->dma_tx_chnum = dma_tx_chan;
 
 		ret = davinci_spi_request_dma(dspi);
 		if (ret)
@@ -955,9 +924,8 @@ static int __devinit davinci_spi_probe(struct platform_device *pdev)
 	return ret;
 
 free_dma:
-	edma_free_channel(dspi->dma.tx_channel);
-	edma_free_channel(dspi->dma.rx_channel);
-	edma_free_slot(dspi->dma.dummy_param_slot);
+	dma_release_channel(dspi->dma_rx);
+	dma_release_channel(dspi->dma_tx);
 free_clk:
 	clk_disable(dspi->clk);
 	clk_put(dspi->clk);

From b5f14330590118e6a0659255476c0f24ab681e05 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Sat, 1 Sep 2012 06:29:22 +0530
Subject: [PATCH 10/29] spi: davici - make davinci select edma

Reported-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/spi/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 5f84b5563c2d..b9fb9a137fff 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -134,6 +134,7 @@ config SPI_DAVINCI
 	tristate "Texas Instruments DaVinci/DA8x/OMAP-L/AM1x SoC SPI controller"
 	depends on ARCH_DAVINCI
 	select SPI_BITBANG
+	select TI_EDMA
 	help
 	  SPI master controller for DaVinci/DA8x/OMAP-L/AM1x SPI modules.
 

From c8acd6aa6bed3c0fd7898202f4ebc534db9085f2 Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zhangfei.gao@marvell.com>
Date: Mon, 3 Sep 2012 11:03:45 +0800
Subject: [PATCH 11/29] dmaengine: mmp-pdma support

1. virtual channel vs. physical channel
Virtual channel is managed by dmaengine
Physical channel handling resource, such as irq
Physical channel is alloced dynamically as descending priority,
freed immediately when irq done.
The availble highest priority physically channel will alwayes be alloced

Issue pending list -> alloc highest dma physically channel available -> dma done -> free physically channel

2. list: running list & pending list
submit: desc list -> pending list
issue_pending_list: if (IDLE) pending list -> running list; free pending list (RUN)
irq: free running list (IDLE)
     check pendlist -> pending list -> running list; free pending list (RUN)

3. irq:
Each list generate one irq, calling callback
One list may contain several desc chain, in such case, make sure only the last desc list generate irq.

4. async
Submit will add desc chain to pending list, which can be multi-called
If multi desc chain is submitted, only the last desc would generate irq -> call back
If IDLE, issue_pending_list start pending_list, transforming pendlist to running list
If RUN, irq will start pending list

5. test
5.1 pxa3xx_nand on pxa910
5.2 insmod dmatest.ko (threads_per_chan=y)
By default drivers/dma/dmatest.c test every channel and test memcpy with 1 threads per channel

Signed-off-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/Kconfig                   |   7 +
 drivers/dma/Makefile                  |   1 +
 drivers/dma/mmp_pdma.c                | 875 ++++++++++++++++++++++++++
 include/linux/platform_data/mmp_dma.h |  19 +
 4 files changed, 902 insertions(+)
 create mode 100644 drivers/dma/mmp_pdma.c
 create mode 100644 include/linux/platform_data/mmp_dma.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 03517191cb13..677cd6e4e1a1 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -302,6 +302,13 @@ config DMA_OMAP
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 
+config MMP_PDMA
+	bool "MMP PDMA support"
+	depends on (ARCH_MMP || ARCH_PXA)
+	select DMA_ENGINE
+	help
+	  Support the MMP PDMA engine for PXA and MMP platfrom.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index f5cf31063fb7..7428feaa8705 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -33,3 +33,4 @@ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
new file mode 100644
index 000000000000..14da1f403edf
--- /dev/null
+++ b/drivers/dma/mmp_pdma.c
@@ -0,0 +1,875 @@
+/*
+ * Copyright 2012 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+
+#include "dmaengine.h"
+
+#define DCSR		0x0000
+#define DALGN		0x00a0
+#define DINT		0x00f0
+#define DDADR		0x0200
+#define DSADR		0x0204
+#define DTADR		0x0208
+#define DCMD		0x020c
+
+#define DCSR_RUN	(1 << 31)	/* Run Bit (read / write) */
+#define DCSR_NODESC	(1 << 30)	/* No-Descriptor Fetch (read / write) */
+#define DCSR_STOPIRQEN	(1 << 29)	/* Stop Interrupt Enable (read / write) */
+#define DCSR_REQPEND	(1 << 8)	/* Request Pending (read-only) */
+#define DCSR_STOPSTATE	(1 << 3)	/* Stop State (read-only) */
+#define DCSR_ENDINTR	(1 << 2)	/* End Interrupt (read / write) */
+#define DCSR_STARTINTR	(1 << 1)	/* Start Interrupt (read / write) */
+#define DCSR_BUSERR	(1 << 0)	/* Bus Error Interrupt (read / write) */
+
+#define DCSR_EORIRQEN	(1 << 28)       /* End of Receive Interrupt Enable (R/W) */
+#define DCSR_EORJMPEN	(1 << 27)       /* Jump to next descriptor on EOR */
+#define DCSR_EORSTOPEN	(1 << 26)       /* STOP on an EOR */
+#define DCSR_SETCMPST	(1 << 25)       /* Set Descriptor Compare Status */
+#define DCSR_CLRCMPST	(1 << 24)       /* Clear Descriptor Compare Status */
+#define DCSR_CMPST	(1 << 10)       /* The Descriptor Compare Status */
+#define DCSR_EORINTR	(1 << 9)        /* The end of Receive */
+
+#define DRCMR_MAPVLD	(1 << 7)	/* Map Valid (read / write) */
+#define DRCMR_CHLNUM	0x1f		/* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
+#define DDADR_STOP	(1 << 0)	/* Stop (read / write) */
+
+#define DCMD_INCSRCADDR	(1 << 31)	/* Source Address Increment Setting. */
+#define DCMD_INCTRGADDR	(1 << 30)	/* Target Address Increment Setting. */
+#define DCMD_FLOWSRC	(1 << 29)	/* Flow Control by the source. */
+#define DCMD_FLOWTRG	(1 << 28)	/* Flow Control by the target. */
+#define DCMD_STARTIRQEN	(1 << 22)	/* Start Interrupt Enable */
+#define DCMD_ENDIRQEN	(1 << 21)	/* End Interrupt Enable */
+#define DCMD_ENDIAN	(1 << 18)	/* Device Endian-ness. */
+#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
+#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
+#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
+#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
+#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
+#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
+#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
+
+#define PDMA_ALIGNMENT		3
+#define PDMA_MAX_DESC_BYTES	0x1000
+
+struct mmp_pdma_desc_hw {
+	u32 ddadr;	/* Points to the next descriptor + flags */
+	u32 dsadr;	/* DSADR value for the current transfer */
+	u32 dtadr;	/* DTADR value for the current transfer */
+	u32 dcmd;	/* DCMD value for the current transfer */
+} __aligned(32);
+
+struct mmp_pdma_desc_sw {
+	struct mmp_pdma_desc_hw desc;
+	struct list_head node;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor async_tx;
+};
+
+struct mmp_pdma_phy;
+
+struct mmp_pdma_chan {
+	struct device *dev;
+	struct dma_chan chan;
+	struct dma_async_tx_descriptor desc;
+	struct mmp_pdma_phy *phy;
+	enum dma_transfer_direction dir;
+
+	/* channel's basic info */
+	struct tasklet_struct tasklet;
+	u32 dcmd;
+	u32 drcmr;
+	u32 dev_addr;
+
+	/* list for desc */
+	spinlock_t desc_lock;		/* Descriptor list lock */
+	struct list_head chain_pending;	/* Link descriptors queue for pending */
+	struct list_head chain_running;	/* Link descriptors queue for running */
+	bool idle;			/* channel statue machine */
+
+	struct dma_pool *desc_pool;	/* Descriptors pool */
+};
+
+struct mmp_pdma_phy {
+	int idx;
+	void __iomem *base;
+	struct mmp_pdma_chan *vchan;
+};
+
+struct mmp_pdma_device {
+	int				dma_channels;
+	void __iomem			*base;
+	struct device			*dev;
+	struct dma_device		device;
+	struct mmp_pdma_phy		*phy;
+};
+
+#define tx_to_mmp_pdma_desc(tx) container_of(tx, struct mmp_pdma_desc_sw, async_tx)
+#define to_mmp_pdma_desc(lh) container_of(lh, struct mmp_pdma_desc_sw, node)
+#define to_mmp_pdma_chan(dchan) container_of(dchan, struct mmp_pdma_chan, chan)
+#define to_mmp_pdma_dev(dmadev) container_of(dmadev, struct mmp_pdma_device, device)
+
+static void set_desc(struct mmp_pdma_phy *phy, dma_addr_t addr)
+{
+	u32 reg = (phy->idx << 4) + DDADR;
+
+	writel(addr, phy->base + reg);
+}
+
+static void enable_chan(struct mmp_pdma_phy *phy)
+{
+	u32 reg;
+
+	if (!phy->vchan)
+		return;
+
+	reg = phy->vchan->drcmr;
+	reg = (((reg) < 64) ? 0x0100 : 0x1100) + (((reg) & 0x3f) << 2);
+	writel(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+
+	reg = (phy->idx << 2) + DCSR;
+	writel(readl(phy->base + reg) | DCSR_RUN,
+					phy->base + reg);
+}
+
+static void disable_chan(struct mmp_pdma_phy *phy)
+{
+	u32 reg;
+
+	if (phy) {
+		reg = (phy->idx << 2) + DCSR;
+		writel(readl(phy->base + reg) & ~DCSR_RUN,
+						phy->base + reg);
+	}
+}
+
+static int clear_chan_irq(struct mmp_pdma_phy *phy)
+{
+	u32 dcsr;
+	u32 dint = readl(phy->base + DINT);
+	u32 reg = (phy->idx << 2) + DCSR;
+
+	if (dint & BIT(phy->idx)) {
+		/* clear irq */
+		dcsr = readl(phy->base + reg);
+		writel(dcsr, phy->base + reg);
+		if ((dcsr & DCSR_BUSERR) && (phy->vchan))
+			dev_warn(phy->vchan->dev, "DCSR_BUSERR\n");
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+static irqreturn_t mmp_pdma_chan_handler(int irq, void *dev_id)
+{
+	struct mmp_pdma_phy *phy = dev_id;
+
+	if (clear_chan_irq(phy) == 0) {
+		tasklet_schedule(&phy->vchan->tasklet);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+static irqreturn_t mmp_pdma_int_handler(int irq, void *dev_id)
+{
+	struct mmp_pdma_device *pdev = dev_id;
+	struct mmp_pdma_phy *phy;
+	u32 dint = readl(pdev->base + DINT);
+	int i, ret;
+	int irq_num = 0;
+
+	while (dint) {
+		i = __ffs(dint);
+		dint &= (dint - 1);
+		phy = &pdev->phy[i];
+		ret = mmp_pdma_chan_handler(irq, phy);
+		if (ret == IRQ_HANDLED)
+			irq_num++;
+	}
+
+	if (irq_num)
+		return IRQ_HANDLED;
+	else
+		return IRQ_NONE;
+}
+
+/* lookup free phy channel as descending priority */
+static struct mmp_pdma_phy *lookup_phy(struct mmp_pdma_chan *pchan)
+{
+	int prio, i;
+	struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
+	struct mmp_pdma_phy *phy;
+
+	/*
+	 * dma channel priorities
+	 * ch 0 - 3,  16 - 19  <--> (0)
+	 * ch 4 - 7,  20 - 23  <--> (1)
+	 * ch 8 - 11, 24 - 27  <--> (2)
+	 * ch 12 - 15, 28 - 31  <--> (3)
+	 */
+	for (prio = 0; prio <= (((pdev->dma_channels - 1) & 0xf) >> 2); prio++) {
+		for (i = 0; i < pdev->dma_channels; i++) {
+			if (prio != ((i & 0xf) >> 2))
+				continue;
+			phy = &pdev->phy[i];
+			if (!phy->vchan) {
+				phy->vchan = pchan;
+				return phy;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+/* desc->tx_list ==> pending list */
+static void append_pending_queue(struct mmp_pdma_chan *chan,
+					struct mmp_pdma_desc_sw *desc)
+{
+	struct mmp_pdma_desc_sw *tail =
+				to_mmp_pdma_desc(chan->chain_pending.prev);
+
+	if (list_empty(&chan->chain_pending))
+		goto out_splice;
+
+	/* one irq per queue, even appended */
+	tail->desc.ddadr = desc->async_tx.phys;
+	tail->desc.dcmd &= ~DCMD_ENDIRQEN;
+
+	/* softly link to pending list */
+out_splice:
+	list_splice_tail_init(&desc->tx_list, &chan->chain_pending);
+}
+
+/**
+ * start_pending_queue - transfer any pending transactions
+ * pending list ==> running list
+ */
+static void start_pending_queue(struct mmp_pdma_chan *chan)
+{
+	struct mmp_pdma_desc_sw *desc;
+
+	/* still in running, irq will start the pending list */
+	if (!chan->idle) {
+		dev_dbg(chan->dev, "DMA controller still busy\n");
+		return;
+	}
+
+	if (list_empty(&chan->chain_pending)) {
+		/* chance to re-fetch phy channel with higher prio */
+		if (chan->phy) {
+			chan->phy->vchan = NULL;
+			chan->phy = NULL;
+		}
+		dev_dbg(chan->dev, "no pending list\n");
+		return;
+	}
+
+	if (!chan->phy) {
+		chan->phy = lookup_phy(chan);
+		if (!chan->phy) {
+			dev_dbg(chan->dev, "no free dma channel\n");
+			return;
+		}
+	}
+
+	/*
+	 * pending -> running
+	 * reintilize pending list
+	 */
+	desc = list_first_entry(&chan->chain_pending,
+				struct mmp_pdma_desc_sw, node);
+	list_splice_tail_init(&chan->chain_pending, &chan->chain_running);
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_desc(chan->phy, desc->async_tx.phys);
+	enable_chan(chan->phy);
+	chan->idle = false;
+}
+
+
+/* desc->tx_list ==> pending list */
+static dma_cookie_t mmp_pdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(tx->chan);
+	struct mmp_pdma_desc_sw *desc = tx_to_mmp_pdma_desc(tx);
+	struct mmp_pdma_desc_sw *child;
+	unsigned long flags;
+	dma_cookie_t cookie = -EBUSY;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	list_for_each_entry(child, &desc->tx_list, node) {
+		cookie = dma_cookie_assign(&child->async_tx);
+	}
+
+	append_pending_queue(chan, desc);
+
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	return cookie;
+}
+
+struct mmp_pdma_desc_sw *mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan)
+{
+	struct mmp_pdma_desc_sw *desc;
+	dma_addr_t pdesc;
+
+	desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+	if (!desc) {
+		dev_err(chan->dev, "out of memory for link descriptor\n");
+		return NULL;
+	}
+
+	memset(desc, 0, sizeof(*desc));
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
+	/* each desc has submit */
+	desc->async_tx.tx_submit = mmp_pdma_tx_submit;
+	desc->async_tx.phys = pdesc;
+
+	return desc;
+}
+
+/**
+ * mmp_pdma_alloc_chan_resources - Allocate resources for DMA channel.
+ *
+ * This function will create a dma pool for descriptor allocation.
+ * Request irq only when channel is requested
+ * Return - The number of allocated descriptors.
+ */
+
+static int mmp_pdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+
+	if (chan->desc_pool)
+		return 1;
+
+	chan->desc_pool =
+		dma_pool_create(dev_name(&dchan->dev->device), chan->dev,
+				  sizeof(struct mmp_pdma_desc_sw),
+				  __alignof__(struct mmp_pdma_desc_sw), 0);
+	if (!chan->desc_pool) {
+		dev_err(chan->dev, "unable to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+	if (chan->phy) {
+		chan->phy->vchan = NULL;
+		chan->phy = NULL;
+	}
+	chan->idle = true;
+	chan->dev_addr = 0;
+	return 1;
+}
+
+static void mmp_pdma_free_desc_list(struct mmp_pdma_chan *chan,
+				  struct list_head *list)
+{
+	struct mmp_pdma_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, list, node) {
+		list_del(&desc->node);
+		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+	}
+}
+
+static void mmp_pdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	mmp_pdma_free_desc_list(chan, &chan->chain_pending);
+	mmp_pdma_free_desc_list(chan, &chan->chain_running);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+	chan->idle = true;
+	chan->dev_addr = 0;
+	if (chan->phy) {
+		chan->phy->vchan = NULL;
+		chan->phy = NULL;
+	}
+	return;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_memcpy(struct dma_chan *dchan,
+	dma_addr_t dma_dst, dma_addr_t dma_src,
+	size_t len, unsigned long flags)
+{
+	struct mmp_pdma_chan *chan;
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
+	size_t copy = 0;
+
+	if (!dchan)
+		return NULL;
+
+	if (!len)
+		return NULL;
+
+	chan = to_mmp_pdma_chan(dchan);
+
+	if (!chan->dir) {
+		chan->dir = DMA_MEM_TO_MEM;
+		chan->dcmd = DCMD_INCTRGADDR | DCMD_INCSRCADDR;
+		chan->dcmd |= DCMD_BURST32;
+	}
+
+	do {
+		/* Allocate the link descriptor from DMA pool */
+		new = mmp_pdma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, "no memory for desc\n");
+			goto fail;
+		}
+
+		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+
+		new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & copy);
+		new->desc.dsadr = dma_src;
+		new->desc.dtadr = dma_dst;
+
+		if (!first)
+			first = new;
+		else
+			prev->desc.ddadr = new->async_tx.phys;
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= copy;
+
+		if (chan->dir == DMA_MEM_TO_DEV) {
+			dma_src += copy;
+		} else if (chan->dir == DMA_DEV_TO_MEM) {
+			dma_dst += copy;
+		} else if (chan->dir == DMA_MEM_TO_MEM) {
+			dma_src += copy;
+			dma_dst += copy;
+		}
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	first->async_tx.flags = flags; /* client is in control of this ack */
+	first->async_tx.cookie = -EBUSY;
+
+	/* last desc and fire IRQ */
+	new->desc.ddadr = DDADR_STOP;
+	new->desc.dcmd |= DCMD_ENDIRQEN;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+			 unsigned int sg_len, enum dma_transfer_direction dir,
+			 unsigned long flags, void *context)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+	size_t len, avail;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	int i;
+
+	if ((sgl == NULL) || (sg_len == 0))
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		addr = sg_dma_address(sg);
+		avail = sg_dma_len(sgl);
+
+		do {
+			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+
+			/* allocate and populate the descriptor */
+			new = mmp_pdma_alloc_descriptor(chan);
+			if (!new) {
+				dev_err(chan->dev, "no memory for desc\n");
+				goto fail;
+			}
+
+			new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & len);
+			if (dir == DMA_MEM_TO_DEV) {
+				new->desc.dsadr = addr;
+				new->desc.dtadr = chan->dev_addr;
+			} else {
+				new->desc.dsadr = chan->dev_addr;
+				new->desc.dtadr = addr;
+			}
+
+			if (!first)
+				first = new;
+			else
+				prev->desc.ddadr = new->async_tx.phys;
+
+			new->async_tx.cookie = 0;
+			async_tx_ack(&new->async_tx);
+			prev = new;
+
+			/* Insert the link descriptor to the LD ring */
+			list_add_tail(&new->node, &first->tx_list);
+
+			/* update metadata */
+			addr += len;
+			avail -= len;
+		} while (avail);
+	}
+
+	first->async_tx.cookie = -EBUSY;
+	first->async_tx.flags = flags;
+
+	/* last desc and fire IRQ */
+	new->desc.ddadr = DDADR_STOP;
+	new->desc.dcmd |= DCMD_ENDIRQEN;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static int mmp_pdma_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	struct dma_slave_config *cfg = (void *)arg;
+	unsigned long flags;
+	int ret = 0;
+	u32 maxburst = 0, addr = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+	if (!dchan)
+		return -EINVAL;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		disable_chan(chan->phy);
+		if (chan->phy) {
+			chan->phy->vchan = NULL;
+			chan->phy = NULL;
+		}
+		spin_lock_irqsave(&chan->desc_lock, flags);
+		mmp_pdma_free_desc_list(chan, &chan->chain_pending);
+		mmp_pdma_free_desc_list(chan, &chan->chain_running);
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		chan->idle = true;
+		break;
+	case DMA_SLAVE_CONFIG:
+		if (cfg->direction == DMA_DEV_TO_MEM) {
+			chan->dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC;
+			maxburst = cfg->src_maxburst;
+			width = cfg->src_addr_width;
+			addr = cfg->src_addr;
+		} else if (cfg->direction == DMA_MEM_TO_DEV) {
+			chan->dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG;
+			maxburst = cfg->dst_maxburst;
+			width = cfg->dst_addr_width;
+			addr = cfg->dst_addr;
+		}
+
+		if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+			chan->dcmd |= DCMD_WIDTH1;
+		else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+			chan->dcmd |= DCMD_WIDTH2;
+		else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+			chan->dcmd |= DCMD_WIDTH4;
+
+		if (maxburst == 8)
+			chan->dcmd |= DCMD_BURST8;
+		else if (maxburst == 16)
+			chan->dcmd |= DCMD_BURST16;
+		else if (maxburst == 32)
+			chan->dcmd |= DCMD_BURST32;
+
+		if (cfg) {
+			chan->dir = cfg->direction;
+			chan->drcmr = cfg->slave_id;
+		}
+		chan->dev_addr = addr;
+		break;
+	default:
+		return -ENOSYS;
+	}
+
+	return ret;
+}
+
+static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	enum dma_status ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	return ret;
+}
+
+/**
+ * mmp_pdma_issue_pending - Issue the DMA start command
+ * pending list ==> running list
+ */
+static void mmp_pdma_issue_pending(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	start_pending_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+}
+
+/*
+ * dma_do_tasklet
+ * Do call back
+ * Start pending list
+ */
+static void dma_do_tasklet(unsigned long data)
+{
+	struct mmp_pdma_chan *chan = (struct mmp_pdma_chan *)data;
+	struct mmp_pdma_desc_sw *desc, *_desc;
+	LIST_HEAD(chain_cleanup);
+	unsigned long flags;
+
+	/* submit pending list; callback for each desc; free desc */
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	/* update the cookie if we have some descriptors to cleanup */
+	if (!list_empty(&chan->chain_running)) {
+		dma_cookie_t cookie;
+
+		desc = to_mmp_pdma_desc(chan->chain_running.prev);
+		cookie = desc->async_tx.cookie;
+		dma_cookie_complete(&desc->async_tx);
+
+		dev_dbg(chan->dev, "completed_cookie=%d\n", cookie);
+	}
+
+	/*
+	 * move the descriptors to a temporary list so we can drop the lock
+	 * during the entire cleanup operation
+	 */
+	list_splice_tail_init(&chan->chain_running, &chain_cleanup);
+
+	/* the hardware is now idle and ready for more */
+	chan->idle = true;
+
+	/* Start any pending transactions automatically */
+	start_pending_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chain_cleanup, node) {
+		struct dma_async_tx_descriptor *txd = &desc->async_tx;
+
+		/* Remove from the list of transactions */
+		list_del(&desc->node);
+		/* Run the link descriptor callback function */
+		if (txd->callback)
+			txd->callback(txd->callback_param);
+
+		dma_pool_free(chan->desc_pool, desc, txd->phys);
+	}
+}
+
+static int __devexit mmp_pdma_remove(struct platform_device *op)
+{
+	struct mmp_pdma_device *pdev = platform_get_drvdata(op);
+
+	dma_async_device_unregister(&pdev->device);
+	return 0;
+}
+
+static int __devinit mmp_pdma_chan_init(struct mmp_pdma_device *pdev,
+							int idx, int irq)
+{
+	struct mmp_pdma_phy *phy  = &pdev->phy[idx];
+	struct mmp_pdma_chan *chan;
+	int ret;
+
+	chan = devm_kzalloc(pdev->dev,
+			sizeof(struct mmp_pdma_chan), GFP_KERNEL);
+	if (chan == NULL)
+		return -ENOMEM;
+
+	phy->idx = idx;
+	phy->base = pdev->base;
+
+	if (irq) {
+		ret = devm_request_irq(pdev->dev, irq,
+			mmp_pdma_chan_handler, IRQF_DISABLED, "pdma", phy);
+		if (ret) {
+			dev_err(pdev->dev, "channel request irq fail!\n");
+			return ret;
+		}
+	}
+
+	spin_lock_init(&chan->desc_lock);
+	chan->dev = pdev->dev;
+	chan->chan.device = &pdev->device;
+	tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan);
+	INIT_LIST_HEAD(&chan->chain_pending);
+	INIT_LIST_HEAD(&chan->chain_running);
+
+	/* register virt channel to dma engine */
+	list_add_tail(&chan->chan.device_node,
+			&pdev->device.channels);
+
+	return 0;
+}
+
+static struct of_device_id mmp_pdma_dt_ids[] = {
+	{ .compatible = "marvell,pdma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mmp_pdma_dt_ids);
+
+static int __devinit mmp_pdma_probe(struct platform_device *op)
+{
+	struct mmp_pdma_device *pdev;
+	const struct of_device_id *of_id;
+	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+	struct resource *iores;
+	int i, ret, irq = 0;
+	int dma_channels = 0, irq_num = 0;
+
+	pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+	if (!pdev)
+		return -ENOMEM;
+	pdev->dev = &op->dev;
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
+	pdev->base = devm_request_and_ioremap(pdev->dev, iores);
+	if (!pdev->base)
+		return -EADDRNOTAVAIL;
+
+	of_id = of_match_device(mmp_pdma_dt_ids, pdev->dev);
+	if (of_id)
+		of_property_read_u32(pdev->dev->of_node,
+				"#dma-channels", &dma_channels);
+	else if (pdata && pdata->dma_channels)
+		dma_channels = pdata->dma_channels;
+	else
+		dma_channels = 32;	/* default 32 channel */
+	pdev->dma_channels = dma_channels;
+
+	for (i = 0; i < dma_channels; i++) {
+		if (platform_get_irq(op, i) > 0)
+			irq_num++;
+	}
+
+	pdev->phy = devm_kzalloc(pdev->dev,
+		dma_channels * sizeof(struct mmp_pdma_chan), GFP_KERNEL);
+	if (pdev->phy == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pdev->device.channels);
+
+	if (irq_num != dma_channels) {
+		/* all chan share one irq, demux inside */
+		irq = platform_get_irq(op, 0);
+		ret = devm_request_irq(pdev->dev, irq,
+			mmp_pdma_int_handler, IRQF_DISABLED, "pdma", pdev);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < dma_channels; i++) {
+		irq = (irq_num != dma_channels) ? 0 : platform_get_irq(op, i);
+		ret = mmp_pdma_chan_init(pdev, i, irq);
+		if (ret)
+			return ret;
+	}
+
+	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, pdev->device.cap_mask);
+	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
+	pdev->device.dev = &op->dev;
+	pdev->device.device_alloc_chan_resources = mmp_pdma_alloc_chan_resources;
+	pdev->device.device_free_chan_resources = mmp_pdma_free_chan_resources;
+	pdev->device.device_tx_status = mmp_pdma_tx_status;
+	pdev->device.device_prep_dma_memcpy = mmp_pdma_prep_memcpy;
+	pdev->device.device_prep_slave_sg = mmp_pdma_prep_slave_sg;
+	pdev->device.device_issue_pending = mmp_pdma_issue_pending;
+	pdev->device.device_control = mmp_pdma_control;
+	pdev->device.copy_align = PDMA_ALIGNMENT;
+
+	if (pdev->dev->coherent_dma_mask)
+		dma_set_mask(pdev->dev, pdev->dev->coherent_dma_mask);
+	else
+		dma_set_mask(pdev->dev, DMA_BIT_MASK(64));
+
+	ret = dma_async_device_register(&pdev->device);
+	if (ret) {
+		dev_err(pdev->device.dev, "unable to register\n");
+		return ret;
+	}
+
+	dev_info(pdev->device.dev, "initialized\n");
+	return 0;
+}
+
+static const struct platform_device_id mmp_pdma_id_table[] = {
+	{ "mmp-pdma", },
+	{ },
+};
+
+static struct platform_driver mmp_pdma_driver = {
+	.driver		= {
+		.name	= "mmp-pdma",
+		.owner  = THIS_MODULE,
+		.of_match_table = mmp_pdma_dt_ids,
+	},
+	.id_table	= mmp_pdma_id_table,
+	.probe		= mmp_pdma_probe,
+	.remove		= __devexit_p(mmp_pdma_remove),
+};
+
+module_platform_driver(mmp_pdma_driver);
+
+MODULE_DESCRIPTION("MARVELL MMP Periphera DMA Driver");
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/mmp_dma.h b/include/linux/platform_data/mmp_dma.h
new file mode 100644
index 000000000000..2a330ec9e2af
--- /dev/null
+++ b/include/linux/platform_data/mmp_dma.h
@@ -0,0 +1,19 @@
+/*
+ *  MMP Platform DMA Management
+ *
+ *  Copyright (c) 2011 Marvell Semiconductors Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+
+#ifndef MMP_DMA_H
+#define MMP_DMA_H
+
+struct mmp_dma_platdata {
+	int dma_channels;
+};
+
+#endif /* MMP_DMA_H */

From f1a7757008b810217b2380d01b740244f21c09bd Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zhangfei.gao@marvell.com>
Date: Mon, 3 Sep 2012 11:03:46 +0800
Subject: [PATCH 12/29] dmaengine: mmp_tdma: add dt support

Signed-off-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/mmp_tdma.c | 61 +++++++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index 8a15cf2163dc..b93d73ca84ff 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -20,6 +20,7 @@
 #include <linux/device.h>
 #include <mach/regs-icu.h>
 #include <mach/sram.h>
+#include <linux/of_device.h>
 
 #include "dmaengine.h"
 
@@ -127,7 +128,6 @@ struct mmp_tdma_device {
 	void __iomem			*base;
 	struct dma_device		device;
 	struct mmp_tdma_chan		*tdmac[TDMA_CHANNEL_NUM];
-	int				irq;
 };
 
 #define to_mmp_tdma_chan(dchan) container_of(dchan, struct mmp_tdma_chan, chan)
@@ -492,7 +492,7 @@ static int __devinit mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
 		return -ENOMEM;
 	}
 	if (irq)
-		tdmac->irq = irq + idx;
+		tdmac->irq = irq;
 	tdmac->dev	   = tdev->dev;
 	tdmac->chan.device = &tdev->device;
 	tdmac->idx	   = idx;
@@ -505,34 +505,43 @@ static int __devinit mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
 	/* add the channel to tdma_chan list */
 	list_add_tail(&tdmac->chan.device_node,
 			&tdev->device.channels);
-
 	return 0;
 }
 
+static struct of_device_id mmp_tdma_dt_ids[] = {
+	{ .compatible = "marvell,adma-1.0", .data = (void *)MMP_AUD_TDMA},
+	{ .compatible = "marvell,pxa910-squ", .data = (void *)PXA910_SQU},
+	{}
+};
+MODULE_DEVICE_TABLE(of, mmp_tdma_dt_ids);
+
 static int __devinit mmp_tdma_probe(struct platform_device *pdev)
 {
-	const struct platform_device_id *id = platform_get_device_id(pdev);
-	enum mmp_tdma_type type = id->driver_data;
+	enum mmp_tdma_type type;
+	const struct of_device_id *of_id;
 	struct mmp_tdma_device *tdev;
 	struct resource *iores;
 	int i, ret;
-	int irq = 0;
+	int irq = 0, irq_num = 0;
 	int chan_num = TDMA_CHANNEL_NUM;
 
+	of_id = of_match_device(mmp_tdma_dt_ids, &pdev->dev);
+	if (of_id)
+		type = (enum mmp_tdma_type) of_id->data;
+	else
+		type = platform_get_device_id(pdev)->driver_data;
+
 	/* always have couple channels */
 	tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL);
 	if (!tdev)
 		return -ENOMEM;
 
 	tdev->dev = &pdev->dev;
-	iores = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!iores)
-		return -EINVAL;
 
-	if (resource_size(iores) != chan_num)
-		tdev->irq = iores->start;
-	else
-		irq = iores->start;
+	for (i = 0; i < chan_num; i++) {
+		if (platform_get_irq(pdev, i) > 0)
+			irq_num++;
+	}
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!iores)
@@ -542,25 +551,26 @@ static int __devinit mmp_tdma_probe(struct platform_device *pdev)
 	if (!tdev->base)
 		return -EADDRNOTAVAIL;
 
-	if (tdev->irq) {
-		ret = devm_request_irq(&pdev->dev, tdev->irq,
+	INIT_LIST_HEAD(&tdev->device.channels);
+
+	if (irq_num != chan_num) {
+		irq = platform_get_irq(pdev, 0);
+		ret = devm_request_irq(&pdev->dev, irq,
 			mmp_tdma_int_handler, IRQF_DISABLED, "tdma", tdev);
 		if (ret)
 			return ret;
 	}
 
+	/* initialize channel parameters */
+	for (i = 0; i < chan_num; i++) {
+		irq = (irq_num != chan_num) ? 0 : platform_get_irq(pdev, i);
+		ret = mmp_tdma_chan_init(tdev, i, irq, type);
+		if (ret)
+			return ret;
+	}
+
 	dma_cap_set(DMA_SLAVE, tdev->device.cap_mask);
 	dma_cap_set(DMA_CYCLIC, tdev->device.cap_mask);
-
-	INIT_LIST_HEAD(&tdev->device.channels);
-
-	/* initialize channel parameters */
-	for (i = 0; i < chan_num; i++) {
-		ret = mmp_tdma_chan_init(tdev, i, irq, type);
-		if (ret)
-			return ret;
-	}
-
 	tdev->device.dev = &pdev->dev;
 	tdev->device.device_alloc_chan_resources =
 					mmp_tdma_alloc_chan_resources;
@@ -595,6 +605,7 @@ static struct platform_driver mmp_tdma_driver = {
 	.driver		= {
 		.name	= "mmp-tdma",
 		.owner  = THIS_MODULE,
+		.of_match_table = mmp_tdma_dt_ids,
 	},
 	.id_table	= mmp_tdma_id_table,
 	.probe		= mmp_tdma_probe,

From 8fccc5bfd7f83fd321db42bcad36e2d9fe13d65b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 3 Sep 2012 13:46:19 +0300
Subject: [PATCH 13/29] dw_dmac: utilize slave_id to pass request line

There is slave_id field in the generic slave config structure that is dedicated
for the uniq slave number. In our case we have the request lines wired to the
certain hardware. Therefore the number of the request line is uniq and could be
used as slave_id. It allows us in some cases to drop out the usage of the
custom slave config structure.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dw_dmac.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 4f4ff1337cac..efbd16a296f1 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -177,6 +177,11 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
 		cfghi = dws->cfg_hi;
 		cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
+	} else {
+		if (dwc->dma_sconfig.direction == DMA_MEM_TO_DEV)
+			cfghi = DWC_CFGH_DST_PER(dwc->dma_sconfig.slave_id);
+		else if (dwc->dma_sconfig.direction == DMA_DEV_TO_MEM)
+			cfghi = DWC_CFGH_SRC_PER(dwc->dma_sconfig.slave_id);
 	}
 
 	channel_writel(dwc, CFG_LO, cfglo);

From 5e97fa91492b4943f25228e08d24abb31e5bce50 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Tue, 4 Sep 2012 06:04:25 +0200
Subject: [PATCH 14/29] mxs/dma: Enlarge the CCW descriptor area to 4 pages

In case of a large SPI flash, the amount of DMA descriptors
available to the DMA driver is not large enough anymore. For
example 8MB SPI flash now needs 129 descriptors to be transfered
in one long read. There are currently 53 descriptors available in
one PAGE_SIZE-big block. Enlarge the allocated descriptor area to
four PAGE_SIZE blocks to fulfill such requirements.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Dan Williams <djbw@fb.com>
Cc: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/mxs-dma.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 7f41b25805fa..e269325d0f00 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -101,7 +101,8 @@ struct mxs_dma_ccw {
 	u32		pio_words[MXS_PIO_WORDS];
 };
 
-#define NUM_CCW	(int)(PAGE_SIZE / sizeof(struct mxs_dma_ccw))
+#define CCW_BLOCK_SIZE	(4 * PAGE_SIZE)
+#define NUM_CCW	(int)(CCW_BLOCK_SIZE / sizeof(struct mxs_dma_ccw))
 
 struct mxs_dma_chan {
 	struct mxs_dma_engine		*mxs_dma;
@@ -354,14 +355,15 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 
 	mxs_chan->chan_irq = data->chan_irq;
 
-	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
-				&mxs_chan->ccw_phys, GFP_KERNEL);
+	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev,
+				CCW_BLOCK_SIZE, &mxs_chan->ccw_phys,
+				GFP_KERNEL);
 	if (!mxs_chan->ccw) {
 		ret = -ENOMEM;
 		goto err_alloc;
 	}
 
-	memset(mxs_chan->ccw, 0, PAGE_SIZE);
+	memset(mxs_chan->ccw, 0, CCW_BLOCK_SIZE);
 
 	if (mxs_chan->chan_irq != NO_IRQ) {
 		ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
@@ -387,7 +389,7 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 err_clk:
 	free_irq(mxs_chan->chan_irq, mxs_dma);
 err_irq:
-	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+	dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE,
 			mxs_chan->ccw, mxs_chan->ccw_phys);
 err_alloc:
 	return ret;
@@ -402,7 +404,7 @@ static void mxs_dma_free_chan_resources(struct dma_chan *chan)
 
 	free_irq(mxs_chan->chan_irq, mxs_dma);
 
-	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+	dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE,
 			mxs_chan->ccw, mxs_chan->ccw_phys);
 
 	clk_disable_unprepare(mxs_dma->clk);

From 2cc44e63184245c2b977ca8739b2e8aab63e3d38 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Wed, 5 Sep 2012 15:08:56 +0800
Subject: [PATCH 15/29] dma: tegra: use list_move_tail instead of
 list_del/list_add_tail

Using list_move_tail() instead of list_del() + list_add_tail().

spatch with a semantic match is used to found this problem.
(http://coccinelle.lip6.fr/)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/tegra20-apb-dma.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 24acd711e032..6ed3f43be41e 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -475,8 +475,7 @@ static void tegra_dma_abort_all(struct tegra_dma_channel *tdc)
 	while (!list_empty(&tdc->pending_sg_req)) {
 		sgreq = list_first_entry(&tdc->pending_sg_req,
 						typeof(*sgreq), node);
-		list_del(&sgreq->node);
-		list_add_tail(&sgreq->node, &tdc->free_sg_req);
+		list_move_tail(&sgreq->node, &tdc->free_sg_req);
 		if (sgreq->last_sg) {
 			dma_desc = sgreq->dma_desc;
 			dma_desc->dma_status = DMA_ERROR;
@@ -570,8 +569,7 @@ static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
 
 	/* If not last req then put at end of pending list */
 	if (!list_is_last(&sgreq->node, &tdc->pending_sg_req)) {
-		list_del(&sgreq->node);
-		list_add_tail(&sgreq->node, &tdc->pending_sg_req);
+		list_move_tail(&sgreq->node, &tdc->pending_sg_req);
 		sgreq->configured = false;
 		st = handle_continuous_head_request(tdc, sgreq, to_terminate);
 		if (!st)

From 7fc7bf084bba30c66e751dae47b87bc00891f288 Mon Sep 17 00:00:00 2001
From: Tushar Behera <tushar.behera@linaro.org>
Date: Wed, 29 Aug 2012 10:16:24 +0530
Subject: [PATCH 16/29] ARM: EXYNOS: Set the capability of pdm0 and pdm1 as
 DMA_PRIVATE

DMA clients pdma0 and pdma1 are internal to the SoC and are used only
by dedicated peripherals. Since they cannot be used for generic
purpose, their capability should be set as DMA_PRIVATE.

CC: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Tushar Behera <tushar.behera@linaro.org>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 arch/arm/mach-exynos/dma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-exynos/dma.c b/arch/arm/mach-exynos/dma.c
index f60b66dbcf84..21d568b3b149 100644
--- a/arch/arm/mach-exynos/dma.c
+++ b/arch/arm/mach-exynos/dma.c
@@ -303,10 +303,12 @@ static int __init exynos_dma_init(void)
 
 	dma_cap_set(DMA_SLAVE, exynos_pdma0_pdata.cap_mask);
 	dma_cap_set(DMA_CYCLIC, exynos_pdma0_pdata.cap_mask);
+	dma_cap_set(DMA_PRIVATE, exynos_pdma0_pdata.cap_mask);
 	amba_device_register(&exynos_pdma0_device, &iomem_resource);
 
 	dma_cap_set(DMA_SLAVE, exynos_pdma1_pdata.cap_mask);
 	dma_cap_set(DMA_CYCLIC, exynos_pdma1_pdata.cap_mask);
+	dma_cap_set(DMA_PRIVATE, exynos_pdma1_pdata.cap_mask);
 	amba_device_register(&exynos_pdma1_device, &iomem_resource);
 
 	dma_cap_set(DMA_MEMCPY, exynos_mdma1_pdata.cap_mask);

From 5557a419d44d063be665100086adf4721d41ea49 Mon Sep 17 00:00:00 2001
From: Tushar Behera <tushar.behera@linaro.org>
Date: Wed, 29 Aug 2012 10:16:25 +0530
Subject: [PATCH 17/29] DMA: PL330: Set the capability of pdm0 and pdm1 as
 DMA_PRIVATE

DMA clients pdma0 and pdma1 are internal to the SoC and are used only
by dedicated peripherals. Since they cannot be used for generic
purpose, their capability should be set as DMA_PRIVATE.

CC: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Tushar Behera <tushar.behera@linaro.org>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/pl330.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index e4feba6b03c0..600ea5ea9827 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2954,6 +2954,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		if (pi->pcfg.num_peri) {
 			dma_cap_set(DMA_SLAVE, pd->cap_mask);
 			dma_cap_set(DMA_CYCLIC, pd->cap_mask);
+			dma_cap_set(DMA_PRIVATE, pd->cap_mask);
 		}
 	}
 

From 921eeadbbdd7ffba16937a3b63e408c980c39db6 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Mon, 27 Aug 2012 12:40:42 +0800
Subject: [PATCH 18/29] dmaengine: use kmem_cache_zalloc instead of
 kmem_cache_alloc/memset

Using kmem_cache_zalloc() instead of kmem_cache_alloc() and memset().

spatch with a semantic match is used to found this problem.
(http://coccinelle.lip6.fr/)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/ioat/dma_v2.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 86895760b598..b9d667851445 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -434,12 +434,11 @@ static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t f
 		return NULL;
 	memset(hw, 0, sizeof(*hw));
 
-	desc = kmem_cache_alloc(ioat2_cache, flags);
+	desc = kmem_cache_zalloc(ioat2_cache, flags);
 	if (!desc) {
 		pci_pool_free(dma->dma_pool, hw, phys);
 		return NULL;
 	}
-	memset(desc, 0, sizeof(*desc));
 
 	dma_async_tx_descriptor_init(&desc->txd, chan);
 	desc->txd.tx_submit = ioat2_tx_submit_unlock;

From faf6fbc6f2ca3b34bf464a8bb079a998e571957c Mon Sep 17 00:00:00 2001
From: Inderpal Singh <inderpal.singh@linaro.org>
Date: Fri, 7 Sep 2012 12:14:47 +0530
Subject: [PATCH 19/29] DMA: PL330: Remove controller clock enable/disable

The controller clock is being enabled/disabled in AMBA bus
infrastructre in probe/remove functions. Hence, its not required
at driver level probe/remove.

Signed-off-by: Inderpal Singh <inderpal.singh@linaro.org>
Tested-by: Chander Kashyap <chander.kashyap@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/pl330.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 600ea5ea9827..3a434c4b63e6 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2896,11 +2896,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	amba_set_drvdata(adev, pdmac);
 
-#ifndef CONFIG_PM_RUNTIME
-	/* enable dma clk */
-	clk_enable(pdmac->clk);
-#endif
-
 	irq = adev->irq[0];
 	ret = request_irq(irq, pl330_irq_handler, 0,
 			dev_name(&adev->dev), pi);
@@ -2988,9 +2983,6 @@ probe_err5:
 probe_err4:
 	free_irq(irq, pi);
 probe_err3:
-#ifndef CONFIG_PM_RUNTIME
-	clk_disable(pdmac->clk);
-#endif
 	clk_put(pdmac->clk);
 probe_err2:
 	iounmap(pi->base);
@@ -3038,10 +3030,6 @@ static int __devexit pl330_remove(struct amba_device *adev)
 	res = &adev->res;
 	release_mem_region(res->start, resource_size(res));
 
-#ifndef CONFIG_PM_RUNTIME
-	clk_disable(pdmac->clk);
-#endif
-
 	kfree(pdmac);
 
 	return 0;

From 7c71b8eb268ee38235f7e924d943ea9d90e59469 Mon Sep 17 00:00:00 2001
From: Inderpal Singh <inderpal.singh@linaro.org>
Date: Fri, 7 Sep 2012 12:14:48 +0530
Subject: [PATCH 20/29] DMA: PL330: Remove redundant runtime_suspend/resume
 functions

The driver's  runtime_suspend/resume functions just disable/enable
the clock which is already being managed at AMBA bus level
runtime_suspend/resume functions.

Hence, remove the driver's runtime_suspend/resume functions.

Signed-off-by: Inderpal Singh <inderpal.singh@linaro.org>
Tested-by: Chander Kashyap <chander.kashyap@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/pl330.c | 61 ++++-----------------------------------------
 1 file changed, 5 insertions(+), 56 deletions(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 3a434c4b63e6..7e543698fa23 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -23,7 +23,6 @@
 #include <linux/dmaengine.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl330.h>
-#include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
 #include <linux/of.h>
 
@@ -586,8 +585,6 @@ struct dma_pl330_dmac {
 
 	/* Peripheral channels connected to this DMAC */
 	struct dma_pl330_chan *peripherals; /* keep at end */
-
-	struct clk *clk;
 };
 
 struct dma_pl330_desc {
@@ -2887,24 +2884,17 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		goto probe_err1;
 	}
 
-	pdmac->clk = clk_get(&adev->dev, "dma");
-	if (IS_ERR(pdmac->clk)) {
-		dev_err(&adev->dev, "Cannot get operation clock.\n");
-		ret = -EINVAL;
-		goto probe_err2;
-	}
-
 	amba_set_drvdata(adev, pdmac);
 
 	irq = adev->irq[0];
 	ret = request_irq(irq, pl330_irq_handler, 0,
 			dev_name(&adev->dev), pi);
 	if (ret)
-		goto probe_err3;
+		goto probe_err2;
 
 	ret = pl330_add(pi);
 	if (ret)
-		goto probe_err4;
+		goto probe_err3;
 
 	INIT_LIST_HEAD(&pdmac->desc_pool);
 	spin_lock_init(&pdmac->pool_lock);
@@ -2965,7 +2955,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	ret = dma_async_device_register(pd);
 	if (ret) {
 		dev_err(&adev->dev, "unable to register DMAC\n");
-		goto probe_err5;
+		goto probe_err4;
 	}
 
 	dev_info(&adev->dev,
@@ -2978,12 +2968,10 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	return 0;
 
-probe_err5:
-	pl330_del(pi);
 probe_err4:
-	free_irq(irq, pi);
+	pl330_del(pi);
 probe_err3:
-	clk_put(pdmac->clk);
+	free_irq(irq, pi);
 probe_err2:
 	iounmap(pi->base);
 probe_err1:
@@ -3045,49 +3033,10 @@ static struct amba_id pl330_ids[] = {
 
 MODULE_DEVICE_TABLE(amba, pl330_ids);
 
-#ifdef CONFIG_PM_RUNTIME
-static int pl330_runtime_suspend(struct device *dev)
-{
-	struct dma_pl330_dmac *pdmac = dev_get_drvdata(dev);
-
-	if (!pdmac) {
-		dev_err(dev, "failed to get dmac\n");
-		return -ENODEV;
-	}
-
-	clk_disable(pdmac->clk);
-
-	return 0;
-}
-
-static int pl330_runtime_resume(struct device *dev)
-{
-	struct dma_pl330_dmac *pdmac = dev_get_drvdata(dev);
-
-	if (!pdmac) {
-		dev_err(dev, "failed to get dmac\n");
-		return -ENODEV;
-	}
-
-	clk_enable(pdmac->clk);
-
-	return 0;
-}
-#else
-#define pl330_runtime_suspend	NULL
-#define pl330_runtime_resume	NULL
-#endif /* CONFIG_PM_RUNTIME */
-
-static const struct dev_pm_ops pl330_pm_ops = {
-	.runtime_suspend = pl330_runtime_suspend,
-	.runtime_resume = pl330_runtime_resume,
-};
-
 static struct amba_driver pl330_driver = {
 	.drv = {
 		.owner = THIS_MODULE,
 		.name = "dma-pl330",
-		.pm = &pl330_pm_ops,
 	},
 	.id_table = pl330_ids,
 	.probe = pl330_probe,

From 027478851791df751176398be02a3b1c5f6aa824 Mon Sep 17 00:00:00 2001
From: Inderpal Singh <inderpal.singh@linaro.org>
Date: Mon, 17 Sep 2012 09:57:45 +0530
Subject: [PATCH 21/29] DMA: PL330: return ENOMEM instead of 0 from
 pl330_alloc_chan_resources

Since 0 is not considered as error at dmaengine level, return ENOMEM
from pl330_alloc_chan_resources in case of failure.

Signed-off-by: Inderpal Singh <inderpal.singh@linaro.org>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/pl330.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 7e543698fa23..28322dced886 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2390,7 +2390,7 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan)
 	pch->pl330_chid = pl330_request_channel(&pdmac->pif);
 	if (!pch->pl330_chid) {
 		spin_unlock_irqrestore(&pch->lock, flags);
-		return 0;
+		return -ENOMEM;
 	}
 
 	tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch);

From f52b36d27b96cba186328f5e77b12ee39376db13 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 21 Sep 2012 15:05:44 +0300
Subject: [PATCH 22/29] dw_dmac: mark dwc_dump_chan_regs as inline

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dw_dmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index efbd16a296f1..e817610d97c9 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -211,7 +211,7 @@ static inline unsigned int dwc_fast_fls(unsigned long long v)
 	return 0;
 }
 
-static void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
+static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
 {
 	dev_err(chan2dev(&dwc->chan),
 		"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",

From 2a9fe9ae3a7b3986a4229900e900bfaa72c7a152 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 21 Sep 2012 15:05:45 +0300
Subject: [PATCH 23/29] dw_dmac: fill optional encoded parameters in register
 structure

There is a block of the registers that are optional. However, if enabled they
contain useful information about the controller hardware configuration. We will
use this piece of data to autoconfigure the driver.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dw_dmac_regs.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index f6d92d72ae40..8a3a81adbf78 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -82,9 +82,39 @@ struct dw_dma_regs {
 	DW_REG(ID);
 	DW_REG(TEST);
 
+	/* reserved */
+	DW_REG(__reserved0);
+	DW_REG(__reserved1);
+
 	/* optional encoded params, 0x3c8..0x3f7 */
+	u32	__reserved;
+
+	/* per-channel configuration registers */
+	u32	DWC_PARAMS[DW_DMA_MAX_NR_CHANNELS];
+	u32	MULTI_BLK_TYPE;
+	u32	MAX_BLK_SIZE;
+
+	/* top-level parameters */
+	u32	DW_PARAMS;
 };
 
+/* To access the registers in early stage of probe */
+#define dma_read_byaddr(addr, name) \
+	readl((addr) + offsetof(struct dw_dma_regs, name))
+
+/* Bitfields in DW_PARAMS */
+#define DW_PARAMS_NR_CHAN	8		/* number of channels */
+#define DW_PARAMS_NR_MASTER	11		/* number of AHB masters */
+#define DW_PARAMS_DATA_WIDTH(n)	(15 + 2 * (n))
+#define DW_PARAMS_DATA_WIDTH1	15		/* master 1 data width */
+#define DW_PARAMS_DATA_WIDTH2	17		/* master 2 data width */
+#define DW_PARAMS_DATA_WIDTH3	19		/* master 3 data width */
+#define DW_PARAMS_DATA_WIDTH4	21		/* master 4 data width */
+#define DW_PARAMS_EN		28		/* encoded parameters */
+
+/* Bitfields in DWC_PARAMS */
+#define DWC_PARAMS_MBLK_EN	11		/* multi block transfer */
+
 /* Bitfields in CTL_LO */
 #define DWC_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
 #define DWC_CTLL_DST_WIDTH(n)	((n)<<1)	/* bytes per element */

From 482c67ea7bab80b956185a3e7553151820bc5876 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 21 Sep 2012 15:05:46 +0300
Subject: [PATCH 24/29] dw_dmac: get number of channels from hardware if
 possible

In case the controller has the encoded parameters feature enabled the driver
will use it to get the number of channels. In the future it will be used for
the other important parameters as well.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dw_dmac.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index e817610d97c9..d71bc7167891 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1379,6 +1379,10 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	struct resource		*io;
 	struct dw_dma		*dw;
 	size_t			size;
+	void __iomem		*regs;
+	bool			autocfg;
+	unsigned int		dw_params;
+	unsigned int		nr_channels;
 	int			irq;
 	int			err;
 	int			i;
@@ -1395,23 +1399,32 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	size = sizeof(struct dw_dma);
-	size += pdata->nr_channels * sizeof(struct dw_dma_chan);
+	regs = devm_request_and_ioremap(&pdev->dev, io);
+	if (!regs)
+		return -EBUSY;
+
+	dw_params = dma_read_byaddr(regs, DW_PARAMS);
+	autocfg = dw_params >> DW_PARAMS_EN & 0x1;
+
+	if (autocfg)
+		nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 0x7) + 1;
+	else
+		nr_channels = pdata->nr_channels;
+
+	size = sizeof(struct dw_dma) + nr_channels * sizeof(struct dw_dma_chan);
 	dw = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
 	if (!dw)
 		return -ENOMEM;
 
-	dw->regs = devm_request_and_ioremap(&pdev->dev, io);
-	if (!dw->regs)
-		return -EBUSY;
-
 	dw->clk = devm_clk_get(&pdev->dev, "hclk");
 	if (IS_ERR(dw->clk))
 		return PTR_ERR(dw->clk);
 	clk_prepare_enable(dw->clk);
 
+	dw->regs = regs;
+
 	/* Calculate all channel mask before DMA setup */
-	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+	dw->all_chan_mask = (1 << nr_channels) - 1;
 
 	/* force dma off, just in case */
 	dw_dma_off(dw);
@@ -1429,7 +1442,7 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
 
 	INIT_LIST_HEAD(&dw->dma.channels);
-	for (i = 0; i < pdata->nr_channels; i++) {
+	for (i = 0; i < nr_channels; i++) {
 		struct dw_dma_chan	*dwc = &dw->chan[i];
 
 		dwc->chan.device = &dw->dma;
@@ -1442,7 +1455,7 @@ static int __devinit dw_probe(struct platform_device *pdev)
 
 		/* 7 is highest priority & 0 is lowest. */
 		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
-			dwc->priority = pdata->nr_channels - i - 1;
+			dwc->priority = nr_channels - i - 1;
 		else
 			dwc->priority = i;
 
@@ -1483,7 +1496,7 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	dma_writel(dw, CFG, DW_CFG_DMA_EN);
 
 	printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n",
-			dev_name(&pdev->dev), pdata->nr_channels);
+			dev_name(&pdev->dev), nr_channels);
 
 	dma_async_device_register(&dw->dma);
 

From 4a63a8b3e8d2e4f56174deb728085010aa3ac2a1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 21 Sep 2012 15:05:47 +0300
Subject: [PATCH 25/29] dw_dmac: autoconfigure block_size or use platform data

The maximum block size is a configurable parameter for the chip. So, driver
will try to get it from the encoded component parameters. Otherwise it will
come from the platform data.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 arch/arm/mach-spear13xx/spear13xx.c |  1 +
 arch/avr32/mach-at32ap/at32ap700x.c |  1 +
 drivers/dma/dw_dmac.c               | 37 ++++++++++++++++-------------
 drivers/dma/dw_dmac_regs.h          |  3 +++
 include/linux/dw_dmac.h             |  2 ++
 5 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/arch/arm/mach-spear13xx/spear13xx.c b/arch/arm/mach-spear13xx/spear13xx.c
index cf936b106e27..c64d8123518f 100644
--- a/arch/arm/mach-spear13xx/spear13xx.c
+++ b/arch/arm/mach-spear13xx/spear13xx.c
@@ -78,6 +78,7 @@ struct dw_dma_platform_data dmac_plat_data = {
 	.nr_channels = 8,
 	.chan_allocation_order = CHAN_ALLOCATION_DESCENDING,
 	.chan_priority = CHAN_PRIORITY_DESCENDING,
+	.block_size = 4095U,
 };
 
 void __init spear13xx_l2x0_init(void)
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 0445c4fd67e3..2c4aefeb86b5 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -605,6 +605,7 @@ static void __init genclk_init_parent(struct clk *clk)
 
 static struct dw_dma_platform_data dw_dmac0_data = {
 	.nr_channels	= 3,
+	.block_size	= 4095U,
 };
 
 static struct resource dw_dmac0_resource[] = {
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index d71bc7167891..c143b7e40716 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -55,16 +55,6 @@
 		 | DWC_CTLL_SMS(_sms));				\
 	})
 
-/*
- * This is configuration-dependent and usually a funny size like 4095.
- *
- * Note that this is a transfer count, i.e. if we transfer 32-bit
- * words, we can do 16380 bytes per descriptor.
- *
- * This parameter is also system-specific.
- */
-#define DWC_MAX_COUNT	4095U
-
 /*
  * Number of descriptors to allocate for each channel. This should be
  * made configurable somehow; preferably, the clients (at least the
@@ -672,7 +662,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 	for (offset = 0; offset < len; offset += xfer_count << src_width) {
 		xfer_count = min_t(size_t, (len - offset) >> src_width,
-				DWC_MAX_COUNT);
+					   dwc->block_size);
 
 		desc = dwc_desc_get(dwc);
 		if (!desc)
@@ -773,8 +763,8 @@ slave_sg_todev_fill_desc:
 			desc->lli.sar = mem;
 			desc->lli.dar = reg;
 			desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
-			if ((len >> mem_width) > DWC_MAX_COUNT) {
-				dlen = DWC_MAX_COUNT << mem_width;
+			if ((len >> mem_width) > dwc->block_size) {
+				dlen = dwc->block_size << mem_width;
 				mem += dlen;
 				len -= dlen;
 			} else {
@@ -833,8 +823,8 @@ slave_sg_fromdev_fill_desc:
 			desc->lli.sar = reg;
 			desc->lli.dar = mem;
 			desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
-			if ((len >> reg_width) > DWC_MAX_COUNT) {
-				dlen = DWC_MAX_COUNT << reg_width;
+			if ((len >> reg_width) > dwc->block_size) {
+				dlen = dwc->block_size << reg_width;
 				mem += dlen;
 				len -= dlen;
 			} else {
@@ -1217,7 +1207,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 	periods = buf_len / period_len;
 
 	/* Check for too big/unaligned periods and unaligned DMA buffer. */
-	if (period_len > (DWC_MAX_COUNT << reg_width))
+	if (period_len > (dwc->block_size << reg_width))
 		goto out_err;
 	if (unlikely(period_len & ((1 << reg_width) - 1)))
 		goto out_err;
@@ -1383,6 +1373,7 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	bool			autocfg;
 	unsigned int		dw_params;
 	unsigned int		nr_channels;
+	unsigned int		max_blk_size = 0;
 	int			irq;
 	int			err;
 	int			i;
@@ -1423,6 +1414,10 @@ static int __devinit dw_probe(struct platform_device *pdev)
 
 	dw->regs = regs;
 
+	/* get hardware configuration parameters */
+	if (autocfg)
+		max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
+
 	/* Calculate all channel mask before DMA setup */
 	dw->all_chan_mask = (1 << nr_channels) - 1;
 
@@ -1468,6 +1463,16 @@ static int __devinit dw_probe(struct platform_device *pdev)
 		INIT_LIST_HEAD(&dwc->free_list);
 
 		channel_clear_bit(dw, CH_EN, dwc->mask);
+
+		/* hardware configuration */
+		if (autocfg)
+			/* Decode maximum block size for given channel. The
+			 * stored 4 bit value represents blocks from 0x00 for 3
+			 * up to 0x0a for 4095. */
+			dwc->block_size =
+				(4 << ((max_blk_size >> 4 * i) & 0xf)) - 1;
+		else
+			dwc->block_size = pdata->block_size;
 	}
 
 	/* Clear all interrupts on all channels. */
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 8a3a81adbf78..2a1cc533f0c8 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -193,6 +193,9 @@ struct dw_dma_chan {
 
 	unsigned int		descs_allocated;
 
+	/* hardware configuration */
+	unsigned int		block_size;
+
 	/* configuration passed via DMA_SLAVE_CONFIG */
 	struct dma_slave_config dma_sconfig;
 };
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 2412e02d7c0f..3315ef9c785b 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -19,6 +19,7 @@
  * @nr_channels: Number of channels supported by hardware (max 8)
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
+ * @block_size: Maximum block size supported by the controller
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
@@ -29,6 +30,7 @@ struct dw_dma_platform_data {
 #define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
 #define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
 	unsigned char	chan_priority;
+	unsigned short	block_size;
 };
 
 /* bursts size */

From a09820043c9e11149145a1ec221eed4a7b42dcce Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 21 Sep 2012 15:05:48 +0300
Subject: [PATCH 26/29] dw_dmac: autoconfigure data_width or get it via
 platform data

Not all of the controllers support the 64 bit data width. Make it configurable
via platform data. The driver will try to get a value from the component
parameters, otherwise it will use the platform data.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 arch/arm/mach-spear13xx/spear13xx.c |  2 ++
 arch/avr32/mach-at32ap/at32ap700x.c |  2 ++
 drivers/dma/dw_dmac.c               | 46 +++++++++++++++++++++++++----
 drivers/dma/dw_dmac_regs.h          |  7 +++++
 include/linux/dw_dmac.h             |  5 ++++
 5 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-spear13xx/spear13xx.c b/arch/arm/mach-spear13xx/spear13xx.c
index c64d8123518f..6a7dfe1857b6 100644
--- a/arch/arm/mach-spear13xx/spear13xx.c
+++ b/arch/arm/mach-spear13xx/spear13xx.c
@@ -79,6 +79,8 @@ struct dw_dma_platform_data dmac_plat_data = {
 	.chan_allocation_order = CHAN_ALLOCATION_DESCENDING,
 	.chan_priority = CHAN_PRIORITY_DESCENDING,
 	.block_size = 4095U,
+	.nr_masters = 2,
+	.data_width = { 3, 3, 0, 0 },
 };
 
 void __init spear13xx_l2x0_init(void)
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 2c4aefeb86b5..b323d8d3185b 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -606,6 +606,8 @@ static void __init genclk_init_parent(struct clk *clk)
 static struct dw_dma_platform_data dw_dmac0_data = {
 	.nr_channels	= 3,
 	.block_size	= 4095U,
+	.nr_masters	= 2,
+	.data_width	= { 2, 2, 0, 0 },
 };
 
 static struct resource dw_dmac0_resource[] = {
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index c143b7e40716..cdc0a1fe2c64 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -36,12 +36,22 @@
  * which does not support descriptor writeback.
  */
 
+static inline unsigned int dwc_get_dms(struct dw_dma_slave *slave)
+{
+	return slave ? slave->dst_master : 0;
+}
+
+static inline unsigned int dwc_get_sms(struct dw_dma_slave *slave)
+{
+	return slave ? slave->src_master : 1;
+}
+
 #define DWC_DEFAULT_CTLLO(_chan) ({				\
 		struct dw_dma_slave *__slave = (_chan->private);	\
 		struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan);	\
 		struct dma_slave_config	*_sconfig = &_dwc->dma_sconfig;	\
-		int _dms = __slave ? __slave->dst_master : 0;	\
-		int _sms = __slave ? __slave->src_master : 1;	\
+		int _dms = dwc_get_dms(__slave);		\
+		int _sms = dwc_get_sms(__slave);		\
 		u8 _smsize = __slave ? _sconfig->src_maxburst :	\
 			DW_DMA_MSIZE_16;			\
 		u8 _dmsize = __slave ? _sconfig->dst_maxburst :	\
@@ -631,6 +641,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		size_t len, unsigned long flags)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma_slave	*dws = chan->private;
 	struct dw_desc		*desc;
 	struct dw_desc		*first;
 	struct dw_desc		*prev;
@@ -650,7 +661,11 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		return NULL;
 	}
 
-	src_width = dst_width = dwc_fast_fls(src | dest | len);
+	src_width = min_t(unsigned int, dwc->dw->data_width[dwc_get_sms(dws)],
+			  dwc_fast_fls(src | len));
+
+	dst_width = min_t(unsigned int, dwc->dw->data_width[dwc_get_dms(dws)],
+			  dwc_fast_fls(dest | len));
 
 	ctllo = DWC_DEFAULT_CTLLO(chan)
 			| DWC_CTLL_DST_WIDTH(dst_width)
@@ -720,6 +735,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	dma_addr_t		reg;
 	unsigned int		reg_width;
 	unsigned int		mem_width;
+	unsigned int		data_width;
 	unsigned int		i;
 	struct scatterlist	*sg;
 	size_t			total_len = 0;
@@ -743,6 +759,8 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
 			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
 
+		data_width = dwc->dw->data_width[dwc_get_sms(dws)];
+
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
@@ -750,7 +768,8 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			mem_width = dwc_fast_fls(mem | len);
+			mem_width = min_t(unsigned int,
+					  data_width, dwc_fast_fls(mem | len));
 
 slave_sg_todev_fill_desc:
 			desc = dwc_desc_get(dwc);
@@ -803,6 +822,8 @@ slave_sg_todev_fill_desc:
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
 			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
 
+		data_width = dwc->dw->data_width[dwc_get_dms(dws)];
+
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
@@ -810,7 +831,8 @@ slave_sg_todev_fill_desc:
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			mem_width = dwc_fast_fls(mem | len);
+			mem_width = min_t(unsigned int,
+					  data_width, dwc_fast_fls(mem | len));
 
 slave_sg_fromdev_fill_desc:
 			desc = dwc_desc_get(dwc);
@@ -1415,9 +1437,19 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	dw->regs = regs;
 
 	/* get hardware configuration parameters */
-	if (autocfg)
+	if (autocfg) {
 		max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
 
+		dw->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
+		for (i = 0; i < dw->nr_masters; i++) {
+			dw->data_width[i] =
+				(dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
+		}
+	} else {
+		dw->nr_masters = pdata->nr_masters;
+		memcpy(dw->data_width, pdata->data_width, 4);
+	}
+
 	/* Calculate all channel mask before DMA setup */
 	dw->all_chan_mask = (1 << nr_channels) - 1;
 
@@ -1464,6 +1496,8 @@ static int __devinit dw_probe(struct platform_device *pdev)
 
 		channel_clear_bit(dw, CH_EN, dwc->mask);
 
+		dwc->dw = dw;
+
 		/* hardware configuration */
 		if (autocfg)
 			/* Decode maximum block size for given channel. The
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 2a1cc533f0c8..06f03914f022 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -198,6 +198,9 @@ struct dw_dma_chan {
 
 	/* configuration passed via DMA_SLAVE_CONFIG */
 	struct dma_slave_config dma_sconfig;
+
+	/* backlink to dw_dma */
+	struct dw_dma		*dw;
 };
 
 static inline struct dw_dma_chan_regs __iomem *
@@ -224,6 +227,10 @@ struct dw_dma {
 
 	u8			all_chan_mask;
 
+	/* hardware configuration */
+	unsigned char		nr_masters;
+	unsigned char		data_width[4];
+
 	struct dw_dma_chan	chan[0];
 };
 
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 3315ef9c785b..e1c8c9e919ac 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -20,6 +20,9 @@
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
  * @block_size: Maximum block size supported by the controller
+ * @nr_masters: Number of AHB masters supported by the controller
+ * @data_width: Maximum data width supported by hardware per AHB master
+ *		(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
@@ -31,6 +34,8 @@ struct dw_dma_platform_data {
 #define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
 	unsigned char	chan_priority;
 	unsigned short	block_size;
+	unsigned char	nr_masters;
+	unsigned char	data_width[4];
 };
 
 /* bursts size */

From fed2574b3c9f44556ed4f5cb17f63b15edd87d06 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 21 Sep 2012 15:05:49 +0300
Subject: [PATCH 27/29] dw_dmac: introduce software emulation of LLP transfers

Some controllers have the reduced functionality where the LLP multi block
transfers are not supported. This patch introduces a support of such
controllers. In case of memory copy or scatter-gather lists it emulates LLP
transfers via bunch of the regular single block ones.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dw_dmac.c      | 99 ++++++++++++++++++++++++++++++++++++--
 drivers/dma/dw_dmac_regs.h |  6 +++
 2 files changed, 101 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index cdc0a1fe2c64..9ca9ca41b83e 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -232,10 +232,29 @@ static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc)
 
 /*----------------------------------------------------------------------*/
 
+/* Perform single block transfer */
+static inline void dwc_do_single_block(struct dw_dma_chan *dwc,
+				       struct dw_desc *desc)
+{
+	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+	u32		ctllo;
+
+	/* Software emulation of LLP mode relies on interrupts to continue
+	 * multi block transfer. */
+	ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN;
+
+	channel_writel(dwc, SAR, desc->lli.sar);
+	channel_writel(dwc, DAR, desc->lli.dar);
+	channel_writel(dwc, CTL_LO, ctllo);
+	channel_writel(dwc, CTL_HI, desc->lli.ctlhi);
+	channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
 /* Called with dwc->lock held and bh disabled */
 static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 {
 	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+	unsigned long	was_soft_llp;
 
 	/* ASSERT:  channel is idle */
 	if (dma_readl(dw, CH_EN) & dwc->mask) {
@@ -247,6 +266,26 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 		return;
 	}
 
+	if (dwc->nollp) {
+		was_soft_llp = test_and_set_bit(DW_DMA_IS_SOFT_LLP,
+						&dwc->flags);
+		if (was_soft_llp) {
+			dev_err(chan2dev(&dwc->chan),
+				"BUG: Attempted to start new LLP transfer "
+				"inside ongoing one\n");
+			return;
+		}
+
+		dwc_initialize(dwc);
+
+		dwc->tx_list = &first->tx_list;
+		dwc->tx_node_active = first->tx_list.next;
+
+		dwc_do_single_block(dwc, first);
+
+		return;
+	}
+
 	dwc_initialize(dwc);
 
 	channel_writel(dwc, LLP, first->txd.phys);
@@ -558,8 +597,36 @@ static void dw_dma_tasklet(unsigned long data)
 			dwc_handle_cyclic(dw, dwc, status_err, status_xfer);
 		else if (status_err & (1 << i))
 			dwc_handle_error(dw, dwc);
-		else if (status_xfer & (1 << i))
+		else if (status_xfer & (1 << i)) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&dwc->lock, flags);
+			if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags)) {
+				if (dwc->tx_node_active != dwc->tx_list) {
+					struct dw_desc *desc =
+						list_entry(dwc->tx_node_active,
+							   struct dw_desc,
+							   desc_node);
+
+					dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+					/* move pointer to next descriptor */
+					dwc->tx_node_active =
+						dwc->tx_node_active->next;
+
+					dwc_do_single_block(dwc, desc);
+
+					spin_unlock_irqrestore(&dwc->lock, flags);
+					continue;
+				} else {
+					/* we are done here */
+					clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+				}
+			}
+			spin_unlock_irqrestore(&dwc->lock, flags);
+
 			dwc_scan_descriptors(dw, dwc);
+		}
 	}
 
 	/*
@@ -962,6 +1029,8 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	} else if (cmd == DMA_TERMINATE_ALL) {
 		spin_lock_irqsave(&dwc->lock, flags);
 
+		clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+
 		dwc_chan_disable(dw, dwc);
 
 		dwc->paused = false;
@@ -1204,6 +1273,13 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 	unsigned long			flags;
 
 	spin_lock_irqsave(&dwc->lock, flags);
+	if (dwc->nollp) {
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		dev_dbg(chan2dev(&dwc->chan),
+				"channel doesn't support LLP transfers\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) {
 		spin_unlock_irqrestore(&dwc->lock, flags);
 		dev_dbg(chan2dev(&dwc->chan),
@@ -1471,6 +1547,7 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&dw->dma.channels);
 	for (i = 0; i < nr_channels; i++) {
 		struct dw_dma_chan	*dwc = &dw->chan[i];
+		int			r = nr_channels - i - 1;
 
 		dwc->chan.device = &dw->dma;
 		dma_cookie_init(&dwc->chan);
@@ -1482,7 +1559,7 @@ static int __devinit dw_probe(struct platform_device *pdev)
 
 		/* 7 is highest priority & 0 is lowest. */
 		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
-			dwc->priority = nr_channels - i - 1;
+			dwc->priority = r;
 		else
 			dwc->priority = i;
 
@@ -1499,14 +1576,28 @@ static int __devinit dw_probe(struct platform_device *pdev)
 		dwc->dw = dw;
 
 		/* hardware configuration */
-		if (autocfg)
+		if (autocfg) {
+			unsigned int dwc_params;
+
+			dwc_params = dma_read_byaddr(regs + r * sizeof(u32),
+						     DWC_PARAMS);
+
 			/* Decode maximum block size for given channel. The
 			 * stored 4 bit value represents blocks from 0x00 for 3
 			 * up to 0x0a for 4095. */
 			dwc->block_size =
 				(4 << ((max_blk_size >> 4 * i) & 0xf)) - 1;
-		else
+			dwc->nollp =
+				(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
+		} else {
 			dwc->block_size = pdata->block_size;
+
+			/* Check if channel supports multi block transfer */
+			channel_writel(dwc, LLP, 0xfffffffc);
+			dwc->nollp =
+				(channel_readl(dwc, LLP) & 0xfffffffc) == 0;
+			channel_writel(dwc, LLP, 0);
+		}
 	}
 
 	/* Clear all interrupts on all channels. */
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 06f03914f022..ff39fa6cd2bc 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -172,6 +172,7 @@ struct dw_dma_regs {
 
 enum dw_dmac_flags {
 	DW_DMA_IS_CYCLIC = 0,
+	DW_DMA_IS_SOFT_LLP = 1,
 };
 
 struct dw_dma_chan {
@@ -182,6 +183,10 @@ struct dw_dma_chan {
 	bool			paused;
 	bool			initialized;
 
+	/* software emulation of the LLP transfers */
+	struct list_head	*tx_list;
+	struct list_head	*tx_node_active;
+
 	spinlock_t		lock;
 
 	/* these other elements are all protected by lock */
@@ -195,6 +200,7 @@ struct dw_dma_chan {
 
 	/* hardware configuration */
 	unsigned int		block_size;
+	bool			nollp;
 
 	/* configuration passed via DMA_SLAVE_CONFIG */
 	struct dma_slave_config dma_sconfig;

From 3d4f860583bf4aa55dd5c6d5ba6ccf1f25fb473a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 1 Oct 2012 13:06:25 +0300
Subject: [PATCH 28/29] dw_dmac: fix a regression in dwc_prep_dma_memcpy

Sometimes memory-to-memory test is failed, that's why we need to choose minimum
data portion between source and destination limits together.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dw_dmac.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 9ca9ca41b83e..c4b0eb3cde81 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -716,6 +716,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	size_t			offset;
 	unsigned int		src_width;
 	unsigned int		dst_width;
+	unsigned int		data_width;
 	u32			ctllo;
 
 	dev_vdbg(chan2dev(chan),
@@ -728,11 +729,11 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		return NULL;
 	}
 
-	src_width = min_t(unsigned int, dwc->dw->data_width[dwc_get_sms(dws)],
-			  dwc_fast_fls(src | len));
+	data_width = min_t(unsigned int, dwc->dw->data_width[dwc_get_sms(dws)],
+					 dwc->dw->data_width[dwc_get_dms(dws)]);
 
-	dst_width = min_t(unsigned int, dwc->dw->data_width[dwc_get_dms(dws)],
-			  dwc_fast_fls(dest | len));
+	src_width = dst_width = min_t(unsigned int, data_width,
+				      dwc_fast_fls(src | dest | len));
 
 	ctllo = DWC_DEFAULT_CTLLO(chan)
 			| DWC_CTLL_DST_WIDTH(dst_width)

From d0fc905429f7f5f3ad365466669c55b04b92c1e5 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 3 Oct 2012 22:48:07 +0530
Subject: [PATCH 29/29] dma: tegra: fix interrupt name issue with apb dma.

When watching the interrupts through /proc/interrupts,
the name of the interrupts are blank or garbage. The
reason is the pointer passed for devname during irq
registration is stack and so it get changed after
dma registration completes.
Allocate the pointer as part of dma channel and pass
this pointer as the devname for irq registration to
avoid change of name.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reported-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/tegra20-apb-dma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 6ed3f43be41e..e447c8aad48a 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -169,6 +169,7 @@ typedef void (*dma_isr_handler)(struct tegra_dma_channel *tdc,
 /* tegra_dma_channel: Channel specific information */
 struct tegra_dma_channel {
 	struct dma_chan		dma_chan;
+	char			name[30];
 	bool			config_init;
 	int			id;
 	int			irq;
@@ -1282,7 +1283,6 @@ static int __devinit tegra_dma_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&tdma->dma_dev.channels);
 	for (i = 0; i < cdata->nr_channels; i++) {
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
-		char irq_name[30];
 
 		tdc->chan_base_offset = TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET +
 					i * TEGRA_APBDMA_CHANNEL_REGISTER_SIZE;
@@ -1294,9 +1294,9 @@ static int __devinit tegra_dma_probe(struct platform_device *pdev)
 			goto err_irq;
 		}
 		tdc->irq = res->start;
-		snprintf(irq_name, sizeof(irq_name), "apbdma.%d", i);
+		snprintf(tdc->name, sizeof(tdc->name), "apbdma.%d", i);
 		ret = devm_request_irq(&pdev->dev, tdc->irq,
-				tegra_dma_isr, 0, irq_name, tdc);
+				tegra_dma_isr, 0, tdc->name, tdc);
 		if (ret) {
 			dev_err(&pdev->dev,
 				"request_irq failed with err %d channel %d\n",