mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-22 19:03:28 +07:00
32e0f5bfa5
If a failure occurs when creating Debug FS entries, unroll all of the work that's been done. Signed-off-by: Gary R Hook <gary.hook@amd.com> Acked-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Jon Mason <jdmason@kudzu.us>
900 lines
21 KiB
C
900 lines
21 KiB
C
/*
|
|
* This file is provided under a dual BSD/GPLv2 license. When using or
|
|
* redistributing this file, you may do so under either license.
|
|
*
|
|
* GPL LICENSE SUMMARY
|
|
*
|
|
* Copyright(c) 2015 Intel Corporation. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of version 2 of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright(c) 2015 Intel Corporation. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copy
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* PCIe NTB Perf Linux driver
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/time.h>
|
|
#include <linux/timer.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/debugfs.h>
|
|
#include <linux/dmaengine.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/sizes.h>
|
|
#include <linux/ntb.h>
|
|
#include <linux/mutex.h>
|
|
|
|
#define DRIVER_NAME "ntb_perf"
|
|
#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
|
|
|
|
#define DRIVER_LICENSE "Dual BSD/GPL"
|
|
#define DRIVER_VERSION "1.0"
|
|
#define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
|
|
|
|
#define PERF_LINK_DOWN_TIMEOUT 10
|
|
#define PERF_VERSION 0xffff0001
|
|
#define MAX_THREADS 32
|
|
#define MAX_TEST_SIZE SZ_1M
|
|
#define MAX_SRCS 32
|
|
#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50)
|
|
#define DMA_RETRIES 20
|
|
#define SZ_4G (1ULL << 32)
|
|
#define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */
|
|
#define PIDX NTB_DEF_PEER_IDX
|
|
|
|
MODULE_LICENSE(DRIVER_LICENSE);
|
|
MODULE_VERSION(DRIVER_VERSION);
|
|
MODULE_AUTHOR(DRIVER_AUTHOR);
|
|
MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
|
|
|
|
static struct dentry *perf_debugfs_dir;
|
|
|
|
static unsigned long max_mw_size;
|
|
module_param(max_mw_size, ulong, 0644);
|
|
MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
|
|
|
|
static unsigned int seg_order = 19; /* 512K */
|
|
module_param(seg_order, uint, 0644);
|
|
MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");
|
|
|
|
static unsigned int run_order = 32; /* 4G */
|
|
module_param(run_order, uint, 0644);
|
|
MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");
|
|
|
|
static bool use_dma; /* default to 0 */
|
|
module_param(use_dma, bool, 0644);
|
|
MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
|
|
|
|
static bool on_node = true; /* default to 1 */
|
|
module_param(on_node, bool, 0644);
|
|
MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)");
|
|
|
|
struct perf_mw {
|
|
phys_addr_t phys_addr;
|
|
resource_size_t phys_size;
|
|
resource_size_t xlat_align;
|
|
resource_size_t xlat_align_size;
|
|
void __iomem *vbase;
|
|
size_t xlat_size;
|
|
size_t buf_size;
|
|
void *virt_addr;
|
|
dma_addr_t dma_addr;
|
|
};
|
|
|
|
struct perf_ctx;
|
|
|
|
struct pthr_ctx {
|
|
struct task_struct *thread;
|
|
struct perf_ctx *perf;
|
|
atomic_t dma_sync;
|
|
struct dma_chan *dma_chan;
|
|
int dma_prep_err;
|
|
int src_idx;
|
|
void *srcs[MAX_SRCS];
|
|
wait_queue_head_t *wq;
|
|
int status;
|
|
u64 copied;
|
|
u64 diff_us;
|
|
};
|
|
|
|
struct perf_ctx {
|
|
struct ntb_dev *ntb;
|
|
spinlock_t db_lock;
|
|
struct perf_mw mw;
|
|
bool link_is_up;
|
|
struct delayed_work link_work;
|
|
wait_queue_head_t link_wq;
|
|
u8 perf_threads;
|
|
/* mutex ensures only one set of threads run at once */
|
|
struct mutex run_mutex;
|
|
struct pthr_ctx pthr_ctx[MAX_THREADS];
|
|
atomic_t tsync;
|
|
atomic_t tdone;
|
|
};
|
|
|
|
enum {
|
|
VERSION = 0,
|
|
MW_SZ_HIGH,
|
|
MW_SZ_LOW,
|
|
MAX_SPAD
|
|
};
|
|
|
|
static void perf_link_event(void *ctx)
|
|
{
|
|
struct perf_ctx *perf = ctx;
|
|
|
|
if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) {
|
|
schedule_delayed_work(&perf->link_work, 2*HZ);
|
|
} else {
|
|
dev_dbg(&perf->ntb->pdev->dev, "link down\n");
|
|
|
|
if (!perf->link_is_up)
|
|
cancel_delayed_work_sync(&perf->link_work);
|
|
|
|
perf->link_is_up = false;
|
|
}
|
|
}
|
|
|
|
static void perf_db_event(void *ctx, int vec)
|
|
{
|
|
struct perf_ctx *perf = ctx;
|
|
u64 db_bits, db_mask;
|
|
|
|
db_mask = ntb_db_vector_mask(perf->ntb, vec);
|
|
db_bits = ntb_db_read(perf->ntb);
|
|
|
|
dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
|
|
vec, db_mask, db_bits);
|
|
}
|
|
|
|
static const struct ntb_ctx_ops perf_ops = {
|
|
.link_event = perf_link_event,
|
|
.db_event = perf_db_event,
|
|
};
|
|
|
|
static void perf_copy_callback(void *data)
|
|
{
|
|
struct pthr_ctx *pctx = data;
|
|
|
|
atomic_dec(&pctx->dma_sync);
|
|
}
|
|
|
|
static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst,
|
|
char *src, size_t size)
|
|
{
|
|
struct perf_ctx *perf = pctx->perf;
|
|
struct dma_async_tx_descriptor *txd;
|
|
struct dma_chan *chan = pctx->dma_chan;
|
|
struct dma_device *device;
|
|
struct dmaengine_unmap_data *unmap;
|
|
dma_cookie_t cookie;
|
|
size_t src_off, dst_off;
|
|
struct perf_mw *mw = &perf->mw;
|
|
void __iomem *vbase;
|
|
void __iomem *dst_vaddr;
|
|
dma_addr_t dst_phys;
|
|
int retries = 0;
|
|
|
|
if (!use_dma) {
|
|
memcpy_toio(dst, src, size);
|
|
return size;
|
|
}
|
|
|
|
if (!chan) {
|
|
dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
device = chan->device;
|
|
src_off = (uintptr_t)src & ~PAGE_MASK;
|
|
dst_off = (uintptr_t __force)dst & ~PAGE_MASK;
|
|
|
|
if (!is_dma_copy_aligned(device, src_off, dst_off, size))
|
|
return -ENODEV;
|
|
|
|
vbase = mw->vbase;
|
|
dst_vaddr = dst;
|
|
dst_phys = mw->phys_addr + (dst_vaddr - vbase);
|
|
|
|
unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
|
|
if (!unmap)
|
|
return -ENOMEM;
|
|
|
|
unmap->len = size;
|
|
unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
|
|
src_off, size, DMA_TO_DEVICE);
|
|
if (dma_mapping_error(device->dev, unmap->addr[0]))
|
|
goto err_get_unmap;
|
|
|
|
unmap->to_cnt = 1;
|
|
|
|
do {
|
|
txd = device->device_prep_dma_memcpy(chan, dst_phys,
|
|
unmap->addr[0],
|
|
size, DMA_PREP_INTERRUPT);
|
|
if (!txd) {
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
schedule_timeout(DMA_OUT_RESOURCE_TO);
|
|
}
|
|
} while (!txd && (++retries < DMA_RETRIES));
|
|
|
|
if (!txd) {
|
|
pctx->dma_prep_err++;
|
|
goto err_get_unmap;
|
|
}
|
|
|
|
txd->callback = perf_copy_callback;
|
|
txd->callback_param = pctx;
|
|
dma_set_unmap(txd, unmap);
|
|
|
|
cookie = dmaengine_submit(txd);
|
|
if (dma_submit_error(cookie))
|
|
goto err_set_unmap;
|
|
|
|
dmaengine_unmap_put(unmap);
|
|
|
|
atomic_inc(&pctx->dma_sync);
|
|
dma_async_issue_pending(chan);
|
|
|
|
return size;
|
|
|
|
err_set_unmap:
|
|
dmaengine_unmap_put(unmap);
|
|
err_get_unmap:
|
|
dmaengine_unmap_put(unmap);
|
|
return 0;
|
|
}
|
|
|
|
static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
|
|
u64 buf_size, u64 win_size, u64 total)
|
|
{
|
|
int chunks, total_chunks, i;
|
|
int copied_chunks = 0;
|
|
u64 copied = 0, result;
|
|
char __iomem *tmp = dst;
|
|
u64 perf, diff_us;
|
|
ktime_t kstart, kstop, kdiff;
|
|
unsigned long last_sleep = jiffies;
|
|
|
|
chunks = div64_u64(win_size, buf_size);
|
|
total_chunks = div64_u64(total, buf_size);
|
|
kstart = ktime_get();
|
|
|
|
for (i = 0; i < total_chunks; i++) {
|
|
result = perf_copy(pctx, tmp, src, buf_size);
|
|
copied += result;
|
|
copied_chunks++;
|
|
if (copied_chunks == chunks) {
|
|
tmp = dst;
|
|
copied_chunks = 0;
|
|
} else
|
|
tmp += buf_size;
|
|
|
|
/* Probably should schedule every 5s to prevent soft hang. */
|
|
if (unlikely((jiffies - last_sleep) > 5 * HZ)) {
|
|
last_sleep = jiffies;
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
schedule_timeout(1);
|
|
}
|
|
|
|
if (unlikely(kthread_should_stop()))
|
|
break;
|
|
}
|
|
|
|
if (use_dma) {
|
|
pr_debug("%s: All DMA descriptors submitted\n", current->comm);
|
|
while (atomic_read(&pctx->dma_sync) != 0) {
|
|
if (kthread_should_stop())
|
|
break;
|
|
msleep(20);
|
|
}
|
|
}
|
|
|
|
kstop = ktime_get();
|
|
kdiff = ktime_sub(kstop, kstart);
|
|
diff_us = ktime_to_us(kdiff);
|
|
|
|
pr_debug("%s: copied %llu bytes\n", current->comm, copied);
|
|
|
|
pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us);
|
|
|
|
perf = div64_u64(copied, diff_us);
|
|
|
|
pr_debug("%s: MBytes/s: %llu\n", current->comm, perf);
|
|
|
|
pctx->copied = copied;
|
|
pctx->diff_us = diff_us;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
|
|
{
|
|
/* Is the channel required to be on the same node as the device? */
|
|
if (!on_node)
|
|
return true;
|
|
|
|
return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
|
|
}
|
|
|
|
static int ntb_perf_thread(void *data)
|
|
{
|
|
struct pthr_ctx *pctx = data;
|
|
struct perf_ctx *perf = pctx->perf;
|
|
struct pci_dev *pdev = perf->ntb->pdev;
|
|
struct perf_mw *mw = &perf->mw;
|
|
char __iomem *dst;
|
|
u64 win_size, buf_size, total;
|
|
void *src;
|
|
int rc, node, i;
|
|
struct dma_chan *dma_chan = NULL;
|
|
|
|
pr_debug("kthread %s starting...\n", current->comm);
|
|
|
|
node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
|
|
|
|
if (use_dma && !pctx->dma_chan) {
|
|
dma_cap_mask_t dma_mask;
|
|
|
|
dma_cap_zero(dma_mask);
|
|
dma_cap_set(DMA_MEMCPY, dma_mask);
|
|
dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
|
|
(void *)(unsigned long)node);
|
|
if (!dma_chan) {
|
|
pr_warn("%s: cannot acquire DMA channel, quitting\n",
|
|
current->comm);
|
|
return -ENODEV;
|
|
}
|
|
pctx->dma_chan = dma_chan;
|
|
}
|
|
|
|
for (i = 0; i < MAX_SRCS; i++) {
|
|
pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
|
|
if (!pctx->srcs[i]) {
|
|
rc = -ENOMEM;
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
win_size = mw->phys_size;
|
|
buf_size = 1ULL << seg_order;
|
|
total = 1ULL << run_order;
|
|
|
|
if (buf_size > MAX_TEST_SIZE)
|
|
buf_size = MAX_TEST_SIZE;
|
|
|
|
dst = (char __iomem *)mw->vbase;
|
|
|
|
atomic_inc(&perf->tsync);
|
|
while (atomic_read(&perf->tsync) != perf->perf_threads)
|
|
schedule();
|
|
|
|
src = pctx->srcs[pctx->src_idx];
|
|
pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
|
|
|
|
rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
|
|
|
|
atomic_dec(&perf->tsync);
|
|
|
|
if (rc < 0) {
|
|
pr_err("%s: failed\n", current->comm);
|
|
rc = -ENXIO;
|
|
goto err;
|
|
}
|
|
|
|
for (i = 0; i < MAX_SRCS; i++) {
|
|
kfree(pctx->srcs[i]);
|
|
pctx->srcs[i] = NULL;
|
|
}
|
|
|
|
atomic_inc(&perf->tdone);
|
|
wake_up(pctx->wq);
|
|
rc = 0;
|
|
goto done;
|
|
|
|
err:
|
|
for (i = 0; i < MAX_SRCS; i++) {
|
|
kfree(pctx->srcs[i]);
|
|
pctx->srcs[i] = NULL;
|
|
}
|
|
|
|
if (dma_chan) {
|
|
dma_release_channel(dma_chan);
|
|
pctx->dma_chan = NULL;
|
|
}
|
|
|
|
done:
|
|
/* Wait until we are told to stop */
|
|
for (;;) {
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
if (kthread_should_stop())
|
|
break;
|
|
schedule();
|
|
}
|
|
__set_current_state(TASK_RUNNING);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static void perf_free_mw(struct perf_ctx *perf)
|
|
{
|
|
struct perf_mw *mw = &perf->mw;
|
|
struct pci_dev *pdev = perf->ntb->pdev;
|
|
|
|
if (!mw->virt_addr)
|
|
return;
|
|
|
|
ntb_mw_clear_trans(perf->ntb, PIDX, 0);
|
|
dma_free_coherent(&pdev->dev, mw->buf_size,
|
|
mw->virt_addr, mw->dma_addr);
|
|
mw->xlat_size = 0;
|
|
mw->buf_size = 0;
|
|
mw->virt_addr = NULL;
|
|
}
|
|
|
|
static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
|
|
{
|
|
struct perf_mw *mw = &perf->mw;
|
|
size_t xlat_size, buf_size;
|
|
int rc;
|
|
|
|
if (!size)
|
|
return -EINVAL;
|
|
|
|
xlat_size = round_up(size, mw->xlat_align_size);
|
|
buf_size = round_up(size, mw->xlat_align);
|
|
|
|
if (mw->xlat_size == xlat_size)
|
|
return 0;
|
|
|
|
if (mw->buf_size)
|
|
perf_free_mw(perf);
|
|
|
|
mw->xlat_size = xlat_size;
|
|
mw->buf_size = buf_size;
|
|
|
|
mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
|
|
&mw->dma_addr, GFP_KERNEL);
|
|
if (!mw->virt_addr) {
|
|
mw->xlat_size = 0;
|
|
mw->buf_size = 0;
|
|
}
|
|
|
|
rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size);
|
|
if (rc) {
|
|
dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
|
|
perf_free_mw(perf);
|
|
return -EIO;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void perf_link_work(struct work_struct *work)
|
|
{
|
|
struct perf_ctx *perf =
|
|
container_of(work, struct perf_ctx, link_work.work);
|
|
struct ntb_dev *ndev = perf->ntb;
|
|
struct pci_dev *pdev = ndev->pdev;
|
|
u32 val;
|
|
u64 size;
|
|
int rc;
|
|
|
|
dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
|
|
|
|
size = perf->mw.phys_size;
|
|
|
|
if (max_mw_size && size > max_mw_size)
|
|
size = max_mw_size;
|
|
|
|
ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size));
|
|
ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size));
|
|
ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION);
|
|
|
|
/* now read what peer wrote */
|
|
val = ntb_spad_read(ndev, VERSION);
|
|
if (val != PERF_VERSION) {
|
|
dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
|
|
goto out;
|
|
}
|
|
|
|
val = ntb_spad_read(ndev, MW_SZ_HIGH);
|
|
size = (u64)val << 32;
|
|
|
|
val = ntb_spad_read(ndev, MW_SZ_LOW);
|
|
size |= val;
|
|
|
|
dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
|
|
|
|
rc = perf_set_mw(perf, size);
|
|
if (rc)
|
|
goto out1;
|
|
|
|
perf->link_is_up = true;
|
|
wake_up(&perf->link_wq);
|
|
|
|
return;
|
|
|
|
out1:
|
|
perf_free_mw(perf);
|
|
|
|
out:
|
|
if (ntb_link_is_up(ndev, NULL, NULL) == 1)
|
|
schedule_delayed_work(&perf->link_work,
|
|
msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
|
|
}
|
|
|
|
static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
|
|
{
|
|
struct perf_mw *mw;
|
|
int rc;
|
|
|
|
mw = &perf->mw;
|
|
|
|
rc = ntb_mw_get_align(ntb, PIDX, 0, &mw->xlat_align,
|
|
&mw->xlat_align_size, NULL);
|
|
if (rc)
|
|
return rc;
|
|
|
|
rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size);
|
|
if (rc)
|
|
return rc;
|
|
|
|
perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
|
|
if (!mw->vbase)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
|
|
size_t count, loff_t *offp)
|
|
{
|
|
struct perf_ctx *perf = filp->private_data;
|
|
char *buf;
|
|
ssize_t ret, out_off = 0;
|
|
struct pthr_ctx *pctx;
|
|
int i;
|
|
u64 rate;
|
|
|
|
if (!perf)
|
|
return 0;
|
|
|
|
buf = kmalloc(1024, GFP_KERNEL);
|
|
if (!buf)
|
|
return -ENOMEM;
|
|
|
|
if (mutex_is_locked(&perf->run_mutex)) {
|
|
out_off = scnprintf(buf, 64, "running\n");
|
|
goto read_from_buf;
|
|
}
|
|
|
|
for (i = 0; i < MAX_THREADS; i++) {
|
|
pctx = &perf->pthr_ctx[i];
|
|
|
|
if (pctx->status == -ENODATA)
|
|
break;
|
|
|
|
if (pctx->status) {
|
|
out_off += scnprintf(buf + out_off, 1024 - out_off,
|
|
"%d: error %d\n", i,
|
|
pctx->status);
|
|
continue;
|
|
}
|
|
|
|
rate = div64_u64(pctx->copied, pctx->diff_us);
|
|
out_off += scnprintf(buf + out_off, 1024 - out_off,
|
|
"%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
|
|
i, pctx->copied, pctx->diff_us, rate);
|
|
}
|
|
|
|
read_from_buf:
|
|
ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off);
|
|
kfree(buf);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void threads_cleanup(struct perf_ctx *perf)
|
|
{
|
|
struct pthr_ctx *pctx;
|
|
int i;
|
|
|
|
for (i = 0; i < MAX_THREADS; i++) {
|
|
pctx = &perf->pthr_ctx[i];
|
|
if (pctx->thread) {
|
|
pctx->status = kthread_stop(pctx->thread);
|
|
pctx->thread = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void perf_clear_thread_status(struct perf_ctx *perf)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < MAX_THREADS; i++)
|
|
perf->pthr_ctx[i].status = -ENODATA;
|
|
}
|
|
|
|
static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
|
|
size_t count, loff_t *offp)
|
|
{
|
|
struct perf_ctx *perf = filp->private_data;
|
|
int node, i;
|
|
DECLARE_WAIT_QUEUE_HEAD(wq);
|
|
|
|
if (wait_event_interruptible(perf->link_wq, perf->link_is_up))
|
|
return -ENOLINK;
|
|
|
|
if (perf->perf_threads == 0)
|
|
return -EINVAL;
|
|
|
|
if (!mutex_trylock(&perf->run_mutex))
|
|
return -EBUSY;
|
|
|
|
perf_clear_thread_status(perf);
|
|
|
|
if (perf->perf_threads > MAX_THREADS) {
|
|
perf->perf_threads = MAX_THREADS;
|
|
pr_info("Reset total threads to: %u\n", MAX_THREADS);
|
|
}
|
|
|
|
/* no greater than 1M */
|
|
if (seg_order > MAX_SEG_ORDER) {
|
|
seg_order = MAX_SEG_ORDER;
|
|
pr_info("Fix seg_order to %u\n", seg_order);
|
|
}
|
|
|
|
if (run_order < seg_order) {
|
|
run_order = seg_order;
|
|
pr_info("Fix run_order to %u\n", run_order);
|
|
}
|
|
|
|
node = on_node ? dev_to_node(&perf->ntb->pdev->dev)
|
|
: NUMA_NO_NODE;
|
|
atomic_set(&perf->tdone, 0);
|
|
|
|
/* launch kernel thread */
|
|
for (i = 0; i < perf->perf_threads; i++) {
|
|
struct pthr_ctx *pctx;
|
|
|
|
pctx = &perf->pthr_ctx[i];
|
|
atomic_set(&pctx->dma_sync, 0);
|
|
pctx->perf = perf;
|
|
pctx->wq = &wq;
|
|
pctx->thread =
|
|
kthread_create_on_node(ntb_perf_thread,
|
|
(void *)pctx,
|
|
node, "ntb_perf %d", i);
|
|
if (IS_ERR(pctx->thread)) {
|
|
pctx->thread = NULL;
|
|
goto err;
|
|
} else {
|
|
wake_up_process(pctx->thread);
|
|
}
|
|
}
|
|
|
|
wait_event_interruptible(wq,
|
|
atomic_read(&perf->tdone) == perf->perf_threads);
|
|
|
|
threads_cleanup(perf);
|
|
mutex_unlock(&perf->run_mutex);
|
|
return count;
|
|
|
|
err:
|
|
threads_cleanup(perf);
|
|
mutex_unlock(&perf->run_mutex);
|
|
return -ENXIO;
|
|
}
|
|
|
|
static const struct file_operations ntb_perf_debugfs_run = {
|
|
.owner = THIS_MODULE,
|
|
.open = simple_open,
|
|
.read = debugfs_run_read,
|
|
.write = debugfs_run_write,
|
|
};
|
|
|
|
static int perf_debugfs_setup(struct perf_ctx *perf)
|
|
{
|
|
struct pci_dev *pdev = perf->ntb->pdev;
|
|
struct dentry *debugfs_node_dir;
|
|
struct dentry *debugfs_run;
|
|
struct dentry *debugfs_threads;
|
|
struct dentry *debugfs_seg_order;
|
|
struct dentry *debugfs_run_order;
|
|
struct dentry *debugfs_use_dma;
|
|
struct dentry *debugfs_on_node;
|
|
|
|
if (!debugfs_initialized())
|
|
return -ENODEV;
|
|
|
|
/* Assumpion: only one NTB device in the system */
|
|
if (!perf_debugfs_dir) {
|
|
perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
|
|
if (!perf_debugfs_dir)
|
|
return -ENODEV;
|
|
}
|
|
|
|
debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
|
|
perf_debugfs_dir);
|
|
if (!debugfs_node_dir)
|
|
goto err;
|
|
|
|
debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
|
|
debugfs_node_dir, perf,
|
|
&ntb_perf_debugfs_run);
|
|
if (!debugfs_run)
|
|
goto err;
|
|
|
|
debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
|
|
debugfs_node_dir,
|
|
&perf->perf_threads);
|
|
if (!debugfs_threads)
|
|
goto err;
|
|
|
|
debugfs_seg_order = debugfs_create_u32("seg_order", 0600,
|
|
debugfs_node_dir,
|
|
&seg_order);
|
|
if (!debugfs_seg_order)
|
|
goto err;
|
|
|
|
debugfs_run_order = debugfs_create_u32("run_order", 0600,
|
|
debugfs_node_dir,
|
|
&run_order);
|
|
if (!debugfs_run_order)
|
|
goto err;
|
|
|
|
debugfs_use_dma = debugfs_create_bool("use_dma", 0600,
|
|
debugfs_node_dir,
|
|
&use_dma);
|
|
if (!debugfs_use_dma)
|
|
goto err;
|
|
|
|
debugfs_on_node = debugfs_create_bool("on_node", 0600,
|
|
debugfs_node_dir,
|
|
&on_node);
|
|
if (!debugfs_on_node)
|
|
goto err;
|
|
|
|
return 0;
|
|
|
|
err:
|
|
debugfs_remove_recursive(perf_debugfs_dir);
|
|
perf_debugfs_dir = NULL;
|
|
return -ENODEV;
|
|
}
|
|
|
|
static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
|
|
{
|
|
struct pci_dev *pdev = ntb->pdev;
|
|
struct perf_ctx *perf;
|
|
int node;
|
|
int rc = 0;
|
|
|
|
if (ntb_spad_count(ntb) < MAX_SPAD) {
|
|
dev_err(&ntb->dev, "Not enough scratch pad registers for %s",
|
|
DRIVER_NAME);
|
|
return -EIO;
|
|
}
|
|
|
|
if (!ntb->ops->mw_set_trans) {
|
|
dev_err(&ntb->dev, "Need inbound MW based NTB API\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
|
|
dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
|
|
|
|
node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
|
|
perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
|
|
if (!perf) {
|
|
rc = -ENOMEM;
|
|
goto err_perf;
|
|
}
|
|
|
|
perf->ntb = ntb;
|
|
perf->perf_threads = 1;
|
|
atomic_set(&perf->tsync, 0);
|
|
mutex_init(&perf->run_mutex);
|
|
spin_lock_init(&perf->db_lock);
|
|
perf_setup_mw(ntb, perf);
|
|
init_waitqueue_head(&perf->link_wq);
|
|
INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
|
|
|
|
rc = ntb_set_ctx(ntb, perf, &perf_ops);
|
|
if (rc)
|
|
goto err_ctx;
|
|
|
|
perf->link_is_up = false;
|
|
ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
|
|
ntb_link_event(ntb);
|
|
|
|
rc = perf_debugfs_setup(perf);
|
|
if (rc)
|
|
goto err_ctx;
|
|
|
|
perf_clear_thread_status(perf);
|
|
|
|
return 0;
|
|
|
|
err_ctx:
|
|
cancel_delayed_work_sync(&perf->link_work);
|
|
kfree(perf);
|
|
err_perf:
|
|
return rc;
|
|
}
|
|
|
|
static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
|
|
{
|
|
struct perf_ctx *perf = ntb->ctx;
|
|
int i;
|
|
|
|
dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
|
|
|
|
mutex_lock(&perf->run_mutex);
|
|
|
|
cancel_delayed_work_sync(&perf->link_work);
|
|
|
|
ntb_clear_ctx(ntb);
|
|
ntb_link_disable(ntb);
|
|
|
|
debugfs_remove_recursive(perf_debugfs_dir);
|
|
perf_debugfs_dir = NULL;
|
|
|
|
if (use_dma) {
|
|
for (i = 0; i < MAX_THREADS; i++) {
|
|
struct pthr_ctx *pctx = &perf->pthr_ctx[i];
|
|
|
|
if (pctx->dma_chan)
|
|
dma_release_channel(pctx->dma_chan);
|
|
}
|
|
}
|
|
|
|
kfree(perf);
|
|
}
|
|
|
|
static struct ntb_client perf_client = {
|
|
.ops = {
|
|
.probe = perf_probe,
|
|
.remove = perf_remove,
|
|
},
|
|
};
|
|
module_ntb_client(perf_client);
|