linux_dsm_epyc7002/drivers/dma/fsldma.h

266 lines
7.9 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved.
*
* Author:
* Zhang Wei <wei.zhang@freescale.com>, Jul 2007
* Ebony Zhu <ebony.zhu@freescale.com>, May 2007
*/
#ifndef __DMA_FSLDMA_H
#define __DMA_FSLDMA_H
#include <linux/device.h>
#include <linux/dmapool.h>
#include <linux/dmaengine.h>
/* Define data structures needed by Freescale
* MPC8540 and MPC8349 DMA controller.
*/
#define FSL_DMA_MR_CS 0x00000001
#define FSL_DMA_MR_CC 0x00000002
#define FSL_DMA_MR_CA 0x00000008
#define FSL_DMA_MR_EIE 0x00000040
#define FSL_DMA_MR_XFE 0x00000020
#define FSL_DMA_MR_EOLNIE 0x00000100
#define FSL_DMA_MR_EOLSIE 0x00000080
#define FSL_DMA_MR_EOSIE 0x00000200
#define FSL_DMA_MR_CDSM 0x00000010
#define FSL_DMA_MR_CTM 0x00000004
#define FSL_DMA_MR_EMP_EN 0x00200000
#define FSL_DMA_MR_EMS_EN 0x00040000
#define FSL_DMA_MR_DAHE 0x00002000
#define FSL_DMA_MR_SAHE 0x00001000
#define FSL_DMA_MR_SAHTS_MASK 0x0000C000
#define FSL_DMA_MR_DAHTS_MASK 0x00030000
#define FSL_DMA_MR_BWC_MASK 0x0f000000
/*
* Bandwidth/pause control determines how many bytes a given
* channel is allowed to transfer before the DMA engine pauses
* the current channel and switches to the next channel
*/
#define FSL_DMA_MR_BWC 0x0A000000
/* Special MR definition for MPC8349 */
#define FSL_DMA_MR_EOTIE 0x00000080
#define FSL_DMA_MR_PRC_RM 0x00000800
#define FSL_DMA_SR_CH 0x00000020
#define FSL_DMA_SR_PE 0x00000010
#define FSL_DMA_SR_CB 0x00000004
#define FSL_DMA_SR_TE 0x00000080
#define FSL_DMA_SR_EOSI 0x00000002
#define FSL_DMA_SR_EOLSI 0x00000001
#define FSL_DMA_SR_EOCDI 0x00000001
#define FSL_DMA_SR_EOLNI 0x00000008
#define FSL_DMA_SATR_SBPATMU 0x20000000
#define FSL_DMA_SATR_STRANSINT_RIO 0x00c00000
#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ 0x00050000
#define FSL_DMA_SATR_SREADTYPE_BP_IORH 0x00020000
#define FSL_DMA_SATR_SREADTYPE_BP_NREAD 0x00040000
#define FSL_DMA_SATR_SREADTYPE_BP_MREAD 0x00070000
#define FSL_DMA_DATR_DBPATMU 0x20000000
#define FSL_DMA_DATR_DTRANSINT_RIO 0x00c00000
#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE 0x00050000
#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH 0x00010000
#define FSL_DMA_EOL ((u64)0x1)
#define FSL_DMA_SNEN ((u64)0x10)
#define FSL_DMA_EOSIE 0x8
#define FSL_DMA_NLDA_MASK (~(u64)0x1f)
#define FSL_DMA_BCR_MAX_CNT 0x03ffffffu
#define FSL_DMA_DGSR_TE 0x80
#define FSL_DMA_DGSR_CH 0x20
#define FSL_DMA_DGSR_PE 0x10
#define FSL_DMA_DGSR_EOLNI 0x08
#define FSL_DMA_DGSR_CB 0x04
#define FSL_DMA_DGSR_EOSI 0x02
#define FSL_DMA_DGSR_EOLSI 0x01
dmaengine: fsldma: declare slave capabilities for the generic code Since commit ecc19d17868b ("dmaengine: Add a warning for drivers not using the generic slave caps retrieval"), the dma drivers are required to fill the caps infos in order to support generic slaves caps retrieval. Otherwise we will get a warning like this: WARNING: at drivers/dma/dmaengine.c:830 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 3.19.0-rc2-next-20150106-dirty #271 task: c0000001f70a0000 ti: c0000001f7044000 task.ti: c0000001f7044000 NIP: c00000000032b238 LR: c00000000032b234 CTR: c00000000001d258 REGS: c0000001f7047330 TRAP: 0700 Tainted: G W (3.19.0-rc2-next-20150106-dirty) MSR: 0000000080029000 <CE,EE,ME> CR: 24adbe22 XER: 20000000 SOFTE: 1 GPR00: c00000000032b234 c0000001f70475b0 c0000000009b4848 0000000000000040 GPR04: 0000000000000001 0000000000000001 0000000000000000 000000000000000f GPR08: 0000000000000000 c000000000902988 c000000000902988 00000000000052c8 GPR12: 0000000024adbe22 c00000000fff4000 c000000000002038 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 0000000000000000 c000000000972dc8 c0000000007e6fd0 GPR28: c0000001f76d1d30 c0000001f76d1c10 c0000001f76d1c00 0000000000000000 NIP [c00000000032b238] .dma_async_device_register+0x3f8/0x5b8 LR [c00000000032b234] .dma_async_device_register+0x3f4/0x5b8 Call Trace: [c0000001f70475b0] [c00000000032b234] .dma_async_device_register+0x3f4/0x5b8 (unreliable) [c0000001f70476a0] [c00000000032ca78] .fsldma_of_probe+0x298/0x438 [c0000001f7047750] [c00000000037080c] .platform_drv_probe+0x50/0x9c [c0000001f70477d0] [c00000000036e74c] .really_probe+0xa4/0x29c [c0000001f7047870] [c00000000036eae4] .__driver_attach+0x100/0x104 [c0000001f7047900] [c00000000036c1f0] .bus_for_each_dev+0x84/0xe4 [c0000001f70479a0] [c00000000036e164] .driver_attach+0x24/0x38 [c0000001f7047a10] [c00000000036dcc8] .bus_add_driver+0x1c8/0x2ac [c0000001f7047ab0] [c00000000036f14c] .driver_register+0x8c/0x158 [c0000001f7047b30] [c0000000003707a8] .__platform_driver_register+0x6c/0x80 [c0000001f7047ba0] [c000000000898a3c] .fsldma_init+0x2c/0x40 [c0000001f7047c10] [c000000000001818] .do_one_initcall+0xb8/0x234 [c0000001f7047d00] [c000000000878e2c] .kernel_init_freeable+0x188/0x268 [c0000001f7047db0] [c000000000002054] .kernel_init+0x1c/0xfc8 [c0000001f7047e30] [c000000000000884] .ret_from_kernel_thread+0x58/0xd4 Instruction dump: 7fb9f840 3bffffe0 409effac 7f54d378 48000060 813d0050 2f890000 40befdd0 3c62ffe3 38632450 482f0aa9 60000000 <0fe00000> 4bfffdb8 7f03c378 482ed465 Signed-off-by: Kevin Hao <haokexin@gmail.com> Signed-off-by: Vinod Koul <vinod.koul@intel.com>
2015-01-08 17:38:16 +07:00
#define FSL_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
typedef u64 __bitwise v64;
typedef u32 __bitwise v32;
struct fsl_dma_ld_hw {
v64 src_addr;
v64 dst_addr;
v64 next_ln_addr;
v32 count;
v32 reserve;
} __attribute__((aligned(32)));
struct fsl_desc_sw {
struct fsl_dma_ld_hw hw;
struct list_head node;
struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
} __attribute__((aligned(32)));
struct fsldma_chan_regs {
u32 mr; /* 0x00 - Mode Register */
u32 sr; /* 0x04 - Status Register */
u64 cdar; /* 0x08 - Current descriptor address register */
u64 sar; /* 0x10 - Source Address Register */
u64 dar; /* 0x18 - Destination Address Register */
u32 bcr; /* 0x20 - Byte Count Register */
u64 ndar; /* 0x24 - Next Descriptor Address Register */
};
struct fsldma_chan;
#define FSL_DMA_MAX_CHANS_PER_DEVICE 8
struct fsldma_device {
void __iomem *regs; /* DGSR register base */
struct device *dev;
struct dma_device common;
struct fsldma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
u32 feature; /* The same as DMA channels */
int irq; /* Channel IRQ */
};
/* Define macros for fsldma_chan->feature property */
#define FSL_DMA_LITTLE_ENDIAN 0x00000000
#define FSL_DMA_BIG_ENDIAN 0x00000001
#define FSL_DMA_IP_MASK 0x00000ff0
#define FSL_DMA_IP_85XX 0x00000010
#define FSL_DMA_IP_83XX 0x00000020
#define FSL_DMA_CHAN_PAUSE_EXT 0x00001000
#define FSL_DMA_CHAN_START_EXT 0x00002000
#ifdef CONFIG_PM
struct fsldma_chan_regs_save {
u32 mr;
};
enum fsldma_pm_state {
RUNNING = 0,
SUSPENDED,
};
#endif
struct fsldma_chan {
char name[8]; /* Channel name */
struct fsldma_chan_regs __iomem *regs;
spinlock_t desc_lock; /* Descriptor operation lock */
dmaengine: Freescale: change descriptor release process for supporting async_tx Fix the potential risk when enable config NET_DMA and ASYNC_TX. Async_tx is lack of support in current release process of dma descriptor, all descriptors will be released whatever is acked or no-acked by async_tx, so there is a potential race condition when dma engine is uesd by others clients (e.g. when enable NET_DMA to offload TCP). In our case, a race condition which is raised when use both of talitos and dmaengine to offload xor is because napi scheduler will sync all pending requests in dma channels, it affects the process of raid operations due to ack_tx is not checked in fsl dma. The no-acked descriptor is freed which is submitted just now, as a dependent tx, this freed descriptor trigger BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit(). TASK = ee1a94a0[1390] 'md0_raid5' THREAD: ecf40000 CPU: 0 GPR00: 00000001 ecf41ca0 ee44/921a94a0 0000003f 00000001 c00593e4 00000000 00000001 GPR08: 00000000 a7a7a7a7 00000001 045/920000002 42028042 100a38d4 ed576d98 00000000 GPR16: ed5a11b0 00000000 2b162000 00000200 046/920000000 2d555000 ed3015e8 c15a7aa0 GPR24: 00000000 c155fc40 00000000 ecb63220 ecf41d28 e47/92f640bb0 ef640c30 ecf41ca0 NIP [c02b048c] async_tx_submit+0x6c/0x2b4 LR [c02b068c] async_tx_submit+0x26c/0x2b4 Call Trace: [ecf41ca0] [c02b068c] async_tx_submit+0x26c/0x2b448/92 (unreliable) [ecf41cd0] [c02b0a4c] async_memcpy+0x240/0x25c [ecf41d20] [c0421064] async_copy_data+0xa0/0x17c [ecf41d70] [c0421cf4] __raid_run_ops+0x874/0xe10 [ecf41df0] [c0426ee4] handle_stripe+0x820/0x25e8 [ecf41e90] [c0429080] raid5d+0x3d4/0x5b4 [ecf41f40] [c04329b8] md_thread+0x138/0x16c [ecf41f90] [c008277c] kthread+0x8c/0x90 [ecf41ff0] [c0011630] kernel_thread+0x4c/0x68 Another modification in this patch is the change of completed descriptors, there is a potential risk which caused by exception interrupt, all descriptors in ld_running list are seemed completed when an interrupt raised, it works fine under normal condition, but if there is an exception occured, it cannot work as our excepted. Hardware should not be depend on s/w list, the right way is to read current descriptor address register to find the last completed descriptor. If an interrupt is raised by an error, all descriptors in ld_running should not be seemed finished, or these unfinished descriptors in ld_running will be released wrongly. A simple way to reproduce: Enable dmatest first, then insert some bad descriptors which can trigger Programming Error interrupts before the good descriptors. Last, the good descriptors will be freed before they are processsed because of the exception intrerrupt. Note: the bad descriptors are only for simulating an exception interrupt. This case can illustrate the potential risk in current fsl-dma very well. Signed-off-by: Hongbo Zhang <hongbo.zhang@freescale.com> Signed-off-by: Qiang Liu <qiang.liu@freescale.com> Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu> Signed-off-by: Vinod Koul <vinod.koul@intel.com>
2014-05-21 15:03:03 +07:00
/*
* Descriptors which are queued to run, but have not yet been
* submitted to the hardware for execution
*/
struct list_head ld_pending;
/*
* Descriptors which are currently being executed by the hardware
*/
struct list_head ld_running;
/*
* Descriptors which have finished execution by the hardware. These
* descriptors have already had their cleanup actions run. They are
* waiting for the ACK bit to be set by the async_tx API.
*/
struct list_head ld_completed; /* Link descriptors queue */
struct dma_chan common; /* DMA common channel */
struct dma_pool *desc_pool; /* Descriptors pool */
struct device *dev; /* Channel device */
int irq; /* Channel IRQ */
int id; /* Raw id of this channel */
struct tasklet_struct tasklet;
u32 feature;
bool idle; /* DMA controller is idle */
#ifdef CONFIG_PM
struct fsldma_chan_regs_save regs_save;
enum fsldma_pm_state pm_state;
#endif
void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
void (*set_src_loop_size)(struct fsldma_chan *fsl_chan, int size);
void (*set_dst_loop_size)(struct fsldma_chan *fsl_chan, int size);
void (*set_request_count)(struct fsldma_chan *fsl_chan, int size);
};
#define to_fsl_chan(chan) container_of(chan, struct fsldma_chan, common)
#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
#ifdef CONFIG_PPC
#define fsl_ioread32(p) in_le32(p)
#define fsl_ioread32be(p) in_be32(p)
#define fsl_iowrite32(v, p) out_le32(p, v)
#define fsl_iowrite32be(v, p) out_be32(p, v)
#ifdef __powerpc64__
#define fsl_ioread64(p) in_le64(p)
#define fsl_ioread64be(p) in_be64(p)
#define fsl_iowrite64(v, p) out_le64(p, v)
#define fsl_iowrite64be(v, p) out_be64(p, v)
#else
static u64 fsl_ioread64(const u64 __iomem *addr)
{
fsldma: fix very broken 32-bit ppc ioread64 functionality Commit ef91bb196b0d ("kernel.h: Silence sparse warning in lower_32_bits") caused new warnings to show in the fsldma driver, but that commit was not to blame: it only exposed some very incorrect code that tried to take the low 32 bits of an address. That made no sense for multiple reasons, the most notable one being that that code was intentionally limited to only 32-bit ppc builds, so "only low 32 bits of an address" was completely nonsensical. There were no high bits to mask off to begin with. But even more importantly fropm a correctness standpoint, turning the address into an integer then caused the subsequent address arithmetic to be completely wrong too, and the "+1" actually incremented the address by one, rather than by four. Which again was incorrect, since the code was reading two 32-bit values and trying to make a 64-bit end result of it all. Surprisingly, the iowrite64() did not suffer from the same odd and incorrect model. This code has never worked, but it's questionable whether anybody cared: of the two users that actually read the 64-bit value (by way of some C preprocessor hackery and eventually the 'get_cdar()' inline function), one of them explicitly ignored the value, and the other one might just happen to work despite the incorrect value being read. This patch at least makes it not fail the build any more, and makes the logic superficially sane. Whether it makes any difference to the code _working_ or not shall remain a mystery. Compile-tested-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-08-30 03:50:56 +07:00
u32 val_lo = in_le32((u32 __iomem *)addr);
u32 val_hi = in_le32((u32 __iomem *)addr + 1);
fsldma: fix very broken 32-bit ppc ioread64 functionality Commit ef91bb196b0d ("kernel.h: Silence sparse warning in lower_32_bits") caused new warnings to show in the fsldma driver, but that commit was not to blame: it only exposed some very incorrect code that tried to take the low 32 bits of an address. That made no sense for multiple reasons, the most notable one being that that code was intentionally limited to only 32-bit ppc builds, so "only low 32 bits of an address" was completely nonsensical. There were no high bits to mask off to begin with. But even more importantly fropm a correctness standpoint, turning the address into an integer then caused the subsequent address arithmetic to be completely wrong too, and the "+1" actually incremented the address by one, rather than by four. Which again was incorrect, since the code was reading two 32-bit values and trying to make a 64-bit end result of it all. Surprisingly, the iowrite64() did not suffer from the same odd and incorrect model. This code has never worked, but it's questionable whether anybody cared: of the two users that actually read the 64-bit value (by way of some C preprocessor hackery and eventually the 'get_cdar()' inline function), one of them explicitly ignored the value, and the other one might just happen to work despite the incorrect value being read. This patch at least makes it not fail the build any more, and makes the logic superficially sane. Whether it makes any difference to the code _working_ or not shall remain a mystery. Compile-tested-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-08-30 03:50:56 +07:00
return ((u64)val_hi << 32) + val_lo;
}
static void fsl_iowrite64(u64 val, u64 __iomem *addr)
{
out_le32((u32 __iomem *)addr + 1, val >> 32);
out_le32((u32 __iomem *)addr, (u32)val);
}
static u64 fsl_ioread64be(const u64 __iomem *addr)
{
fsldma: fix very broken 32-bit ppc ioread64 functionality Commit ef91bb196b0d ("kernel.h: Silence sparse warning in lower_32_bits") caused new warnings to show in the fsldma driver, but that commit was not to blame: it only exposed some very incorrect code that tried to take the low 32 bits of an address. That made no sense for multiple reasons, the most notable one being that that code was intentionally limited to only 32-bit ppc builds, so "only low 32 bits of an address" was completely nonsensical. There were no high bits to mask off to begin with. But even more importantly fropm a correctness standpoint, turning the address into an integer then caused the subsequent address arithmetic to be completely wrong too, and the "+1" actually incremented the address by one, rather than by four. Which again was incorrect, since the code was reading two 32-bit values and trying to make a 64-bit end result of it all. Surprisingly, the iowrite64() did not suffer from the same odd and incorrect model. This code has never worked, but it's questionable whether anybody cared: of the two users that actually read the 64-bit value (by way of some C preprocessor hackery and eventually the 'get_cdar()' inline function), one of them explicitly ignored the value, and the other one might just happen to work despite the incorrect value being read. This patch at least makes it not fail the build any more, and makes the logic superficially sane. Whether it makes any difference to the code _working_ or not shall remain a mystery. Compile-tested-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-08-30 03:50:56 +07:00
u32 val_hi = in_be32((u32 __iomem *)addr);
u32 val_lo = in_be32((u32 __iomem *)addr + 1);
fsldma: fix very broken 32-bit ppc ioread64 functionality Commit ef91bb196b0d ("kernel.h: Silence sparse warning in lower_32_bits") caused new warnings to show in the fsldma driver, but that commit was not to blame: it only exposed some very incorrect code that tried to take the low 32 bits of an address. That made no sense for multiple reasons, the most notable one being that that code was intentionally limited to only 32-bit ppc builds, so "only low 32 bits of an address" was completely nonsensical. There were no high bits to mask off to begin with. But even more importantly fropm a correctness standpoint, turning the address into an integer then caused the subsequent address arithmetic to be completely wrong too, and the "+1" actually incremented the address by one, rather than by four. Which again was incorrect, since the code was reading two 32-bit values and trying to make a 64-bit end result of it all. Surprisingly, the iowrite64() did not suffer from the same odd and incorrect model. This code has never worked, but it's questionable whether anybody cared: of the two users that actually read the 64-bit value (by way of some C preprocessor hackery and eventually the 'get_cdar()' inline function), one of them explicitly ignored the value, and the other one might just happen to work despite the incorrect value being read. This patch at least makes it not fail the build any more, and makes the logic superficially sane. Whether it makes any difference to the code _working_ or not shall remain a mystery. Compile-tested-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-08-30 03:50:56 +07:00
return ((u64)val_hi << 32) + val_lo;
}
static void fsl_iowrite64be(u64 val, u64 __iomem *addr)
{
out_be32((u32 __iomem *)addr, val >> 32);
out_be32((u32 __iomem *)addr + 1, (u32)val);
}
#endif
#endif
#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
#define fsl_ioread32(p) ioread32(p)
#define fsl_ioread32be(p) ioread32be(p)
#define fsl_iowrite32(v, p) iowrite32(v, p)
#define fsl_iowrite32be(v, p) iowrite32be(v, p)
#define fsl_ioread64(p) ioread64(p)
#define fsl_ioread64be(p) ioread64be(p)
#define fsl_iowrite64(v, p) iowrite64(v, p)
#define fsl_iowrite64be(v, p) iowrite64be(v, p)
#endif
#define FSL_DMA_IN(fsl_dma, addr, width) \
(((fsl_dma)->feature & FSL_DMA_BIG_ENDIAN) ? \
fsl_ioread##width##be(addr) : fsl_ioread##width(addr))
#define FSL_DMA_OUT(fsl_dma, addr, val, width) \
(((fsl_dma)->feature & FSL_DMA_BIG_ENDIAN) ? \
fsl_iowrite##width##be(val, addr) : fsl_iowrite \
##width(val, addr))
#define DMA_TO_CPU(fsl_chan, d, width) \
(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
be##width##_to_cpu((__force __be##width)(v##width)d) : \
le##width##_to_cpu((__force __le##width)(v##width)d))
#define CPU_TO_DMA(fsl_chan, c, width) \
(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
(__force v##width)cpu_to_be##width(c) : \
(__force v##width)cpu_to_le##width(c))
#endif /* __DMA_FSLDMA_H */