mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-02-04 19:15:16 +07:00
Merge branch 'smc-get-rid-of-unsafe_global_rkey'
Ursula Braun says: ==================== net/smc: get rid of unsafe_global_rkey The smc code uses the unsafe_global_rkey, exposing all memory for remote reads and writes once a connection is established. Here is now a patch series to get rid of unsafe_global_rkey usage. Main idea is to switch to SG-logic and separate memory regions for RMBs. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
eace92e31b
@ -8,10 +8,6 @@ config SMC
|
||||
The Linux implementation of the SMC-R solution is designed as
|
||||
a separate socket family SMC.
|
||||
|
||||
Warning: SMC will expose all memory for remote reads and writes
|
||||
once a connection is established. Don't enable this option except
|
||||
for tightly controlled lab environment.
|
||||
|
||||
Select this option if you want to run SMC socket applications
|
||||
|
||||
config SMC_DIAG
|
||||
|
@ -338,6 +338,12 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
|
||||
return SMC_CLC_DECL_INTERR;
|
||||
|
||||
smc_wr_remember_qp_attr(link);
|
||||
|
||||
rc = smc_wr_reg_send(link,
|
||||
smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
|
||||
if (rc)
|
||||
return SMC_CLC_DECL_INTERR;
|
||||
|
||||
/* send CONFIRM LINK response over RoCE fabric */
|
||||
rc = smc_llc_send_confirm_link(link,
|
||||
link->smcibdev->mac[link->ibport - 1],
|
||||
@ -430,12 +436,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
|
||||
|
||||
smc_conn_save_peer_info(smc, &aclc);
|
||||
|
||||
rc = smc_sndbuf_create(smc);
|
||||
if (rc) {
|
||||
reason_code = SMC_CLC_DECL_MEM;
|
||||
goto decline_rdma_unlock;
|
||||
}
|
||||
rc = smc_rmb_create(smc);
|
||||
/* create send buffer and rmb */
|
||||
rc = smc_buf_create(smc);
|
||||
if (rc) {
|
||||
reason_code = SMC_CLC_DECL_MEM;
|
||||
goto decline_rdma_unlock;
|
||||
@ -459,7 +461,20 @@ static int smc_connect_rdma(struct smc_sock *smc)
|
||||
reason_code = SMC_CLC_DECL_INTERR;
|
||||
goto decline_rdma_unlock;
|
||||
}
|
||||
} else {
|
||||
struct smc_buf_desc *buf_desc = smc->conn.rmb_desc;
|
||||
|
||||
if (!buf_desc->reused) {
|
||||
/* register memory region for new rmb */
|
||||
rc = smc_wr_reg_send(link,
|
||||
buf_desc->mr_rx[SMC_SINGLE_LINK]);
|
||||
if (rc) {
|
||||
reason_code = SMC_CLC_DECL_INTERR;
|
||||
goto decline_rdma_unlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
smc_rmb_sync_sg_for_device(&smc->conn);
|
||||
|
||||
rc = smc_clc_send_confirm(smc);
|
||||
if (rc)
|
||||
@ -692,6 +707,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
|
||||
int rc;
|
||||
|
||||
link = &lgr->lnk[SMC_SINGLE_LINK];
|
||||
|
||||
rc = smc_wr_reg_send(link,
|
||||
smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
|
||||
if (rc)
|
||||
return SMC_CLC_DECL_INTERR;
|
||||
|
||||
/* send CONFIRM LINK request to client over the RoCE fabric */
|
||||
rc = smc_llc_send_confirm_link(link,
|
||||
link->smcibdev->mac[link->ibport - 1],
|
||||
@ -779,11 +800,6 @@ static void smc_listen_work(struct work_struct *work)
|
||||
mutex_lock(&smc_create_lgr_pending);
|
||||
local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
|
||||
smcibdev, ibport, &pclc.lcl, 0);
|
||||
if (local_contact == SMC_REUSE_CONTACT)
|
||||
/* lock no longer needed, free it due to following
|
||||
* smc_clc_wait_msg() call
|
||||
*/
|
||||
mutex_unlock(&smc_create_lgr_pending);
|
||||
if (local_contact < 0) {
|
||||
rc = local_contact;
|
||||
if (rc == -ENOMEM)
|
||||
@ -794,12 +810,8 @@ static void smc_listen_work(struct work_struct *work)
|
||||
}
|
||||
link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
|
||||
|
||||
rc = smc_sndbuf_create(new_smc);
|
||||
if (rc) {
|
||||
reason_code = SMC_CLC_DECL_MEM;
|
||||
goto decline_rdma;
|
||||
}
|
||||
rc = smc_rmb_create(new_smc);
|
||||
/* create send buffer and rmb */
|
||||
rc = smc_buf_create(new_smc);
|
||||
if (rc) {
|
||||
reason_code = SMC_CLC_DECL_MEM;
|
||||
goto decline_rdma;
|
||||
@ -808,6 +820,21 @@ static void smc_listen_work(struct work_struct *work)
|
||||
smc_close_init(new_smc);
|
||||
smc_rx_init(new_smc);
|
||||
|
||||
if (local_contact != SMC_FIRST_CONTACT) {
|
||||
struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc;
|
||||
|
||||
if (!buf_desc->reused) {
|
||||
/* register memory region for new rmb */
|
||||
rc = smc_wr_reg_send(link,
|
||||
buf_desc->mr_rx[SMC_SINGLE_LINK]);
|
||||
if (rc) {
|
||||
reason_code = SMC_CLC_DECL_INTERR;
|
||||
goto decline_rdma;
|
||||
}
|
||||
}
|
||||
}
|
||||
smc_rmb_sync_sg_for_device(&new_smc->conn);
|
||||
|
||||
rc = smc_clc_send_accept(new_smc, local_contact);
|
||||
if (rc)
|
||||
goto out_err;
|
||||
@ -853,8 +880,7 @@ static void smc_listen_work(struct work_struct *work)
|
||||
if (newsmcsk->sk_state == SMC_INIT)
|
||||
newsmcsk->sk_state = SMC_ACTIVE;
|
||||
enqueue:
|
||||
if (local_contact == SMC_FIRST_CONTACT)
|
||||
mutex_unlock(&smc_create_lgr_pending);
|
||||
mutex_unlock(&smc_create_lgr_pending);
|
||||
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
|
||||
if (lsmc->sk.sk_state == SMC_LISTEN) {
|
||||
smc_accept_enqueue(&lsmc->sk, newsmcsk);
|
||||
|
@ -204,13 +204,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
|
||||
memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
|
||||
hton24(cclc.qpn, link->roce_qp->qp_num);
|
||||
cclc.rmb_rkey =
|
||||
htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
|
||||
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
|
||||
cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
|
||||
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
|
||||
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
|
||||
cclc.rmbe_size = conn->rmbe_size_short;
|
||||
cclc.rmb_dma_addr =
|
||||
cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
|
||||
cclc.rmb_dma_addr = cpu_to_be64(
|
||||
(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
|
||||
hton24(cclc.psn, link->psn_initial);
|
||||
|
||||
memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
|
||||
@ -256,13 +256,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
|
||||
memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
|
||||
hton24(aclc.qpn, link->roce_qp->qp_num);
|
||||
aclc.rmb_rkey =
|
||||
htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
|
||||
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
|
||||
aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
|
||||
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
|
||||
aclc.qp_mtu = link->path_mtu;
|
||||
aclc.rmbe_size = conn->rmbe_size_short,
|
||||
aclc.rmb_dma_addr =
|
||||
cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
|
||||
aclc.rmb_dma_addr = cpu_to_be64(
|
||||
(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
|
||||
hton24(aclc.psn, link->psn_initial);
|
||||
memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
|
||||
|
||||
|
@ -175,7 +175,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
|
||||
rc = smc_wr_alloc_link_mem(lnk);
|
||||
if (rc)
|
||||
goto free_lgr;
|
||||
init_waitqueue_head(&lnk->wr_tx_wait);
|
||||
rc = smc_ib_create_protection_domain(lnk);
|
||||
if (rc)
|
||||
goto free_link_mem;
|
||||
@ -207,17 +206,14 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void smc_sndbuf_unuse(struct smc_connection *conn)
|
||||
static void smc_buf_unuse(struct smc_connection *conn)
|
||||
{
|
||||
if (conn->sndbuf_desc) {
|
||||
conn->sndbuf_desc->used = 0;
|
||||
conn->sndbuf_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void smc_rmb_unuse(struct smc_connection *conn)
|
||||
{
|
||||
if (conn->rmb_desc) {
|
||||
conn->rmb_desc->reused = true;
|
||||
conn->rmb_desc->used = 0;
|
||||
conn->rmbe_size = 0;
|
||||
}
|
||||
@ -232,8 +228,7 @@ void smc_conn_free(struct smc_connection *conn)
|
||||
return;
|
||||
smc_cdc_tx_dismiss_slots(conn);
|
||||
smc_lgr_unregister_conn(conn);
|
||||
smc_rmb_unuse(conn);
|
||||
smc_sndbuf_unuse(conn);
|
||||
smc_buf_unuse(conn);
|
||||
}
|
||||
|
||||
static void smc_link_clear(struct smc_link *lnk)
|
||||
@ -246,48 +241,57 @@ static void smc_link_clear(struct smc_link *lnk)
|
||||
smc_wr_free_link_mem(lnk);
|
||||
}
|
||||
|
||||
static void smc_lgr_free_sndbufs(struct smc_link_group *lgr)
|
||||
static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
|
||||
bool is_rmb)
|
||||
{
|
||||
struct smc_buf_desc *sndbuf_desc, *bf_desc;
|
||||
if (is_rmb) {
|
||||
if (buf_desc->mr_rx[SMC_SINGLE_LINK])
|
||||
smc_ib_put_memory_region(
|
||||
buf_desc->mr_rx[SMC_SINGLE_LINK]);
|
||||
smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
|
||||
DMA_FROM_DEVICE);
|
||||
} else {
|
||||
smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
|
||||
DMA_TO_DEVICE);
|
||||
}
|
||||
sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
|
||||
if (buf_desc->cpu_addr)
|
||||
free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
|
||||
kfree(buf_desc);
|
||||
}
|
||||
|
||||
static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
|
||||
{
|
||||
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
|
||||
struct smc_buf_desc *buf_desc, *bf_desc;
|
||||
struct list_head *buf_list;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SMC_RMBE_SIZES; i++) {
|
||||
list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i],
|
||||
if (is_rmb)
|
||||
buf_list = &lgr->rmbs[i];
|
||||
else
|
||||
buf_list = &lgr->sndbufs[i];
|
||||
list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
|
||||
list) {
|
||||
list_del(&sndbuf_desc->list);
|
||||
smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
|
||||
smc_uncompress_bufsize(i),
|
||||
sndbuf_desc, DMA_TO_DEVICE);
|
||||
kfree(sndbuf_desc->cpu_addr);
|
||||
kfree(sndbuf_desc);
|
||||
list_del(&buf_desc->list);
|
||||
smc_buf_free(buf_desc, lnk, is_rmb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void smc_lgr_free_rmbs(struct smc_link_group *lgr)
|
||||
static void smc_lgr_free_bufs(struct smc_link_group *lgr)
|
||||
{
|
||||
struct smc_buf_desc *rmb_desc, *bf_desc;
|
||||
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SMC_RMBE_SIZES; i++) {
|
||||
list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i],
|
||||
list) {
|
||||
list_del(&rmb_desc->list);
|
||||
smc_ib_buf_unmap(lnk->smcibdev,
|
||||
smc_uncompress_bufsize(i),
|
||||
rmb_desc, DMA_FROM_DEVICE);
|
||||
kfree(rmb_desc->cpu_addr);
|
||||
kfree(rmb_desc);
|
||||
}
|
||||
}
|
||||
/* free send buffers */
|
||||
__smc_lgr_free_bufs(lgr, false);
|
||||
/* free rmbs */
|
||||
__smc_lgr_free_bufs(lgr, true);
|
||||
}
|
||||
|
||||
/* remove a link group */
|
||||
void smc_lgr_free(struct smc_link_group *lgr)
|
||||
{
|
||||
smc_lgr_free_rmbs(lgr);
|
||||
smc_lgr_free_sndbufs(lgr);
|
||||
smc_lgr_free_bufs(lgr);
|
||||
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
|
||||
kfree(lgr);
|
||||
}
|
||||
@ -452,45 +456,25 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
|
||||
return rc ? rc : local_contact;
|
||||
}
|
||||
|
||||
/* try to reuse a sndbuf description slot of the sndbufs list for a certain
|
||||
* buf_size; if not available, return NULL
|
||||
/* try to reuse a sndbuf or rmb description slot for a certain
|
||||
* buffer size; if not available, return NULL
|
||||
*/
|
||||
static inline
|
||||
struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr,
|
||||
int compressed_bufsize)
|
||||
struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
|
||||
int compressed_bufsize,
|
||||
rwlock_t *lock,
|
||||
struct list_head *buf_list)
|
||||
{
|
||||
struct smc_buf_desc *sndbuf_slot;
|
||||
struct smc_buf_desc *buf_slot;
|
||||
|
||||
read_lock_bh(&lgr->sndbufs_lock);
|
||||
list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize],
|
||||
list) {
|
||||
if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) {
|
||||
read_unlock_bh(&lgr->sndbufs_lock);
|
||||
return sndbuf_slot;
|
||||
read_lock_bh(lock);
|
||||
list_for_each_entry(buf_slot, buf_list, list) {
|
||||
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
|
||||
read_unlock_bh(lock);
|
||||
return buf_slot;
|
||||
}
|
||||
}
|
||||
read_unlock_bh(&lgr->sndbufs_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* try to reuse an rmb description slot of the rmbs list for a certain
|
||||
* rmbe_size; if not available, return NULL
|
||||
*/
|
||||
static inline
|
||||
struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr,
|
||||
int compressed_bufsize)
|
||||
{
|
||||
struct smc_buf_desc *rmb_slot;
|
||||
|
||||
read_lock_bh(&lgr->rmbs_lock);
|
||||
list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize],
|
||||
list) {
|
||||
if (cmpxchg(&rmb_slot->used, 0, 1) == 0) {
|
||||
read_unlock_bh(&lgr->rmbs_lock);
|
||||
return rmb_slot;
|
||||
}
|
||||
}
|
||||
read_unlock_bh(&lgr->rmbs_lock);
|
||||
read_unlock_bh(lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -503,136 +487,186 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
|
||||
return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
|
||||
}
|
||||
|
||||
/* create the tx buffer for an SMC socket */
|
||||
int smc_sndbuf_create(struct smc_sock *smc)
|
||||
static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
|
||||
bool is_rmb, int bufsize)
|
||||
{
|
||||
struct smc_connection *conn = &smc->conn;
|
||||
struct smc_link_group *lgr = conn->lgr;
|
||||
int tmp_bufsize, tmp_bufsize_short;
|
||||
struct smc_buf_desc *sndbuf_desc;
|
||||
struct smc_buf_desc *buf_desc;
|
||||
struct smc_link *lnk;
|
||||
int rc;
|
||||
|
||||
/* use socket send buffer size (w/o overhead) as start value */
|
||||
for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
|
||||
tmp_bufsize_short >= 0; tmp_bufsize_short--) {
|
||||
tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
|
||||
/* check for reusable sndbuf_slot in the link group */
|
||||
sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short);
|
||||
if (sndbuf_desc) {
|
||||
memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize);
|
||||
break; /* found reusable slot */
|
||||
}
|
||||
/* try to alloc a new send buffer */
|
||||
sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL);
|
||||
if (!sndbuf_desc)
|
||||
break; /* give up with -ENOMEM */
|
||||
sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize,
|
||||
GFP_KERNEL | __GFP_NOWARN |
|
||||
__GFP_NOMEMALLOC |
|
||||
__GFP_NORETRY);
|
||||
if (!sndbuf_desc->cpu_addr) {
|
||||
kfree(sndbuf_desc);
|
||||
sndbuf_desc = NULL;
|
||||
/* if send buffer allocation has failed,
|
||||
* try a smaller one
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
|
||||
tmp_bufsize, sndbuf_desc,
|
||||
DMA_TO_DEVICE);
|
||||
/* try to alloc a new buffer */
|
||||
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
|
||||
if (!buf_desc)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
buf_desc->cpu_addr =
|
||||
(void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
|
||||
__GFP_NOMEMALLOC |
|
||||
__GFP_NORETRY | __GFP_ZERO,
|
||||
get_order(bufsize));
|
||||
if (!buf_desc->cpu_addr) {
|
||||
kfree(buf_desc);
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
buf_desc->order = get_order(bufsize);
|
||||
|
||||
/* build the sg table from the pages */
|
||||
lnk = &lgr->lnk[SMC_SINGLE_LINK];
|
||||
rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
|
||||
GFP_KERNEL);
|
||||
if (rc) {
|
||||
smc_buf_free(buf_desc, lnk, is_rmb);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
|
||||
buf_desc->cpu_addr, bufsize);
|
||||
|
||||
/* map sg table to DMA address */
|
||||
rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
|
||||
is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
|
||||
/* SMC protocol depends on mapping to one DMA address only */
|
||||
if (rc != 1) {
|
||||
smc_buf_free(buf_desc, lnk, is_rmb);
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
|
||||
/* create a new memory region for the RMB */
|
||||
if (is_rmb) {
|
||||
rc = smc_ib_get_memory_region(lnk->roce_pd,
|
||||
IB_ACCESS_REMOTE_WRITE |
|
||||
IB_ACCESS_LOCAL_WRITE,
|
||||
buf_desc);
|
||||
if (rc) {
|
||||
kfree(sndbuf_desc->cpu_addr);
|
||||
kfree(sndbuf_desc);
|
||||
sndbuf_desc = NULL;
|
||||
continue; /* if mapping failed, try smaller one */
|
||||
smc_buf_free(buf_desc, lnk, is_rmb);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
sndbuf_desc->used = 1;
|
||||
write_lock_bh(&lgr->sndbufs_lock);
|
||||
list_add(&sndbuf_desc->list,
|
||||
&lgr->sndbufs[tmp_bufsize_short]);
|
||||
write_unlock_bh(&lgr->sndbufs_lock);
|
||||
break;
|
||||
}
|
||||
if (sndbuf_desc && sndbuf_desc->cpu_addr) {
|
||||
conn->sndbuf_desc = sndbuf_desc;
|
||||
conn->sndbuf_size = tmp_bufsize;
|
||||
smc->sk.sk_sndbuf = tmp_bufsize * 2;
|
||||
atomic_set(&conn->sndbuf_space, tmp_bufsize);
|
||||
return 0;
|
||||
} else {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return buf_desc;
|
||||
}
|
||||
|
||||
/* create the RMB for an SMC socket (even though the SMC protocol
|
||||
* allows more than one RMB-element per RMB, the Linux implementation
|
||||
* uses just one RMB-element per RMB, i.e. uses an extra RMB for every
|
||||
* connection in a link group
|
||||
*/
|
||||
int smc_rmb_create(struct smc_sock *smc)
|
||||
static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
|
||||
{
|
||||
struct smc_connection *conn = &smc->conn;
|
||||
struct smc_link_group *lgr = conn->lgr;
|
||||
int tmp_bufsize, tmp_bufsize_short;
|
||||
struct smc_buf_desc *rmb_desc;
|
||||
int rc;
|
||||
struct smc_buf_desc *buf_desc = NULL;
|
||||
struct list_head *buf_list;
|
||||
int bufsize, bufsize_short;
|
||||
int sk_buf_size;
|
||||
rwlock_t *lock;
|
||||
|
||||
/* use socket recv buffer size (w/o overhead) as start value */
|
||||
for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2);
|
||||
tmp_bufsize_short >= 0; tmp_bufsize_short--) {
|
||||
tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
|
||||
/* check for reusable rmb_slot in the link group */
|
||||
rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short);
|
||||
if (rmb_desc) {
|
||||
memset(rmb_desc->cpu_addr, 0, tmp_bufsize);
|
||||
if (is_rmb)
|
||||
/* use socket recv buffer size (w/o overhead) as start value */
|
||||
sk_buf_size = smc->sk.sk_rcvbuf / 2;
|
||||
else
|
||||
/* use socket send buffer size (w/o overhead) as start value */
|
||||
sk_buf_size = smc->sk.sk_sndbuf / 2;
|
||||
|
||||
for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
|
||||
bufsize_short >= 0; bufsize_short--) {
|
||||
|
||||
if (is_rmb) {
|
||||
lock = &lgr->rmbs_lock;
|
||||
buf_list = &lgr->rmbs[bufsize_short];
|
||||
} else {
|
||||
lock = &lgr->sndbufs_lock;
|
||||
buf_list = &lgr->sndbufs[bufsize_short];
|
||||
}
|
||||
bufsize = smc_uncompress_bufsize(bufsize_short);
|
||||
if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
|
||||
continue;
|
||||
|
||||
/* check for reusable slot in the link group */
|
||||
buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
|
||||
if (buf_desc) {
|
||||
memset(buf_desc->cpu_addr, 0, bufsize);
|
||||
break; /* found reusable slot */
|
||||
}
|
||||
/* try to alloc a new RMB */
|
||||
rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL);
|
||||
if (!rmb_desc)
|
||||
break; /* give up with -ENOMEM */
|
||||
rmb_desc->cpu_addr = kzalloc(tmp_bufsize,
|
||||
GFP_KERNEL | __GFP_NOWARN |
|
||||
__GFP_NOMEMALLOC |
|
||||
__GFP_NORETRY);
|
||||
if (!rmb_desc->cpu_addr) {
|
||||
kfree(rmb_desc);
|
||||
rmb_desc = NULL;
|
||||
/* if RMB allocation has failed,
|
||||
* try a smaller one
|
||||
*/
|
||||
|
||||
buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
|
||||
if (PTR_ERR(buf_desc) == -ENOMEM)
|
||||
break;
|
||||
if (IS_ERR(buf_desc))
|
||||
continue;
|
||||
}
|
||||
rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
|
||||
tmp_bufsize, rmb_desc,
|
||||
DMA_FROM_DEVICE);
|
||||
if (rc) {
|
||||
kfree(rmb_desc->cpu_addr);
|
||||
kfree(rmb_desc);
|
||||
rmb_desc = NULL;
|
||||
continue; /* if mapping failed, try smaller one */
|
||||
}
|
||||
rmb_desc->rkey[SMC_SINGLE_LINK] =
|
||||
lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey;
|
||||
rmb_desc->used = 1;
|
||||
write_lock_bh(&lgr->rmbs_lock);
|
||||
list_add(&rmb_desc->list,
|
||||
&lgr->rmbs[tmp_bufsize_short]);
|
||||
write_unlock_bh(&lgr->rmbs_lock);
|
||||
break;
|
||||
|
||||
buf_desc->used = 1;
|
||||
write_lock_bh(lock);
|
||||
list_add(&buf_desc->list, buf_list);
|
||||
write_unlock_bh(lock);
|
||||
break; /* found */
|
||||
}
|
||||
if (rmb_desc && rmb_desc->cpu_addr) {
|
||||
conn->rmb_desc = rmb_desc;
|
||||
conn->rmbe_size = tmp_bufsize;
|
||||
conn->rmbe_size_short = tmp_bufsize_short;
|
||||
smc->sk.sk_rcvbuf = tmp_bufsize * 2;
|
||||
atomic_set(&conn->bytes_to_rcv, 0);
|
||||
conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize);
|
||||
return 0;
|
||||
} else {
|
||||
|
||||
if (IS_ERR(buf_desc))
|
||||
return -ENOMEM;
|
||||
|
||||
if (is_rmb) {
|
||||
conn->rmb_desc = buf_desc;
|
||||
conn->rmbe_size = bufsize;
|
||||
conn->rmbe_size_short = bufsize_short;
|
||||
smc->sk.sk_rcvbuf = bufsize * 2;
|
||||
atomic_set(&conn->bytes_to_rcv, 0);
|
||||
conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
|
||||
} else {
|
||||
conn->sndbuf_desc = buf_desc;
|
||||
conn->sndbuf_size = bufsize;
|
||||
smc->sk.sk_sndbuf = bufsize * 2;
|
||||
atomic_set(&conn->sndbuf_space, bufsize);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
|
||||
{
|
||||
struct smc_link_group *lgr = conn->lgr;
|
||||
|
||||
smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
|
||||
conn->sndbuf_desc, DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
|
||||
{
|
||||
struct smc_link_group *lgr = conn->lgr;
|
||||
|
||||
smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
|
||||
conn->sndbuf_desc, DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
|
||||
{
|
||||
struct smc_link_group *lgr = conn->lgr;
|
||||
|
||||
smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
|
||||
conn->rmb_desc, DMA_FROM_DEVICE);
|
||||
}
|
||||
|
||||
void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
|
||||
{
|
||||
struct smc_link_group *lgr = conn->lgr;
|
||||
|
||||
smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
|
||||
conn->rmb_desc, DMA_FROM_DEVICE);
|
||||
}
|
||||
|
||||
/* create the send and receive buffer for an SMC socket;
|
||||
* receive buffers are called RMBs;
|
||||
* (even though the SMC protocol allows more than one RMB-element per RMB,
|
||||
* the Linux implementation uses just one RMB-element per RMB, i.e. uses an
|
||||
* extra RMB for every connection in a link group
|
||||
*/
|
||||
int smc_buf_create(struct smc_sock *smc)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* create send buffer */
|
||||
rc = __smc_buf_create(smc, false);
|
||||
if (rc)
|
||||
return rc;
|
||||
/* create rmb */
|
||||
rc = __smc_buf_create(smc, true);
|
||||
if (rc)
|
||||
smc_buf_free(smc->conn.sndbuf_desc,
|
||||
&smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
|
||||
|
@ -37,6 +37,14 @@ struct smc_wr_buf {
|
||||
u8 raw[SMC_WR_BUF_SIZE];
|
||||
};
|
||||
|
||||
#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */
|
||||
|
||||
enum smc_wr_reg_state {
|
||||
POSTED, /* ib_wr_reg_mr request posted */
|
||||
CONFIRMED, /* ib_wr_reg_mr response: successful */
|
||||
FAILED /* ib_wr_reg_mr response: failure */
|
||||
};
|
||||
|
||||
struct smc_link {
|
||||
struct smc_ib_device *smcibdev; /* ib-device */
|
||||
u8 ibport; /* port - values 1 | 2 */
|
||||
@ -65,6 +73,10 @@ struct smc_link {
|
||||
u64 wr_rx_id; /* seq # of last recv WR */
|
||||
u32 wr_rx_cnt; /* number of WR recv buffers */
|
||||
|
||||
struct ib_reg_wr wr_reg; /* WR register memory region */
|
||||
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
|
||||
enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
|
||||
|
||||
union ib_gid gid; /* gid matching used vlan id */
|
||||
u32 peer_qpn; /* QP number of peer */
|
||||
enum ib_mtu path_mtu; /* used mtu */
|
||||
@ -90,14 +102,15 @@ struct smc_link {
|
||||
/* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
|
||||
struct smc_buf_desc {
|
||||
struct list_head list;
|
||||
u64 dma_addr[SMC_LINKS_PER_LGR_MAX];
|
||||
/* mapped address of buffer */
|
||||
void *cpu_addr; /* virtual address of buffer */
|
||||
u32 rkey[SMC_LINKS_PER_LGR_MAX];
|
||||
/* for rmb only:
|
||||
* rkey provided to peer
|
||||
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
|
||||
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
|
||||
/* for rmb only: memory region
|
||||
* incl. rkey provided to peer
|
||||
*/
|
||||
u32 order; /* allocation order */
|
||||
u32 used; /* currently used / unused */
|
||||
bool reused; /* new created / reused */
|
||||
};
|
||||
|
||||
struct smc_rtoken { /* address/key of remote RMB */
|
||||
@ -173,9 +186,11 @@ struct smc_clc_msg_accept_confirm;
|
||||
|
||||
void smc_lgr_free(struct smc_link_group *lgr);
|
||||
void smc_lgr_terminate(struct smc_link_group *lgr);
|
||||
int smc_sndbuf_create(struct smc_sock *smc);
|
||||
int smc_rmb_create(struct smc_sock *smc);
|
||||
int smc_buf_create(struct smc_sock *smc);
|
||||
int smc_rmb_rtoken_handling(struct smc_connection *conn,
|
||||
struct smc_clc_msg_accept_confirm *clc);
|
||||
|
||||
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
|
||||
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
|
||||
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
|
||||
void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
|
||||
#endif
|
||||
|
130
net/smc/smc_ib.c
130
net/smc/smc_ib.c
@ -13,6 +13,7 @@
|
||||
|
||||
#include <linux/random.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
|
||||
#include "smc_pnet.h"
|
||||
@ -192,8 +193,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
|
||||
{
|
||||
int rc;
|
||||
|
||||
lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
|
||||
IB_PD_UNSAFE_GLOBAL_RKEY);
|
||||
lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
|
||||
rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
|
||||
if (IS_ERR(lnk->roce_pd))
|
||||
lnk->roce_pd = NULL;
|
||||
@ -232,10 +232,10 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
|
||||
.recv_cq = lnk->smcibdev->roce_cq_recv,
|
||||
.srq = NULL,
|
||||
.cap = {
|
||||
.max_send_wr = SMC_WR_BUF_CNT,
|
||||
/* include unsolicited rdma_writes as well,
|
||||
* there are max. 2 RDMA_WRITE per 1 WR_SEND
|
||||
*/
|
||||
.max_send_wr = SMC_WR_BUF_CNT * 3,
|
||||
.max_recv_wr = SMC_WR_BUF_CNT * 3,
|
||||
.max_send_sge = SMC_IB_MAX_SEND_SGE,
|
||||
.max_recv_sge = 1,
|
||||
@ -254,33 +254,117 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* map a new TX or RX buffer to DMA */
|
||||
int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction)
|
||||
void smc_ib_put_memory_region(struct ib_mr *mr)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
if (buf_slot->dma_addr[SMC_SINGLE_LINK])
|
||||
return rc; /* already mapped */
|
||||
buf_slot->dma_addr[SMC_SINGLE_LINK] =
|
||||
ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr,
|
||||
buf_size, data_direction);
|
||||
if (ib_dma_mapping_error(smcibdev->ibdev,
|
||||
buf_slot->dma_addr[SMC_SINGLE_LINK]))
|
||||
rc = -EIO;
|
||||
return rc;
|
||||
ib_dereg_mr(mr);
|
||||
}
|
||||
|
||||
void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int buf_size,
|
||||
static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
|
||||
{
|
||||
unsigned int offset = 0;
|
||||
int sg_num;
|
||||
|
||||
/* map the largest prefix of a dma mapped SG list */
|
||||
sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
|
||||
&offset, PAGE_SIZE);
|
||||
|
||||
return sg_num;
|
||||
}
|
||||
|
||||
/* Allocate a memory region and map the dma mapped SG list of buf_slot */
|
||||
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
|
||||
struct smc_buf_desc *buf_slot)
|
||||
{
|
||||
if (buf_slot->mr_rx[SMC_SINGLE_LINK])
|
||||
return 0; /* already done */
|
||||
|
||||
buf_slot->mr_rx[SMC_SINGLE_LINK] =
|
||||
ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
|
||||
if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
|
||||
int rc;
|
||||
|
||||
rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
|
||||
buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (smc_ib_map_mr_sg(buf_slot) != 1)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* synchronize buffer usage for cpu access */
|
||||
void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
unsigned int i;
|
||||
|
||||
/* for now there is just one DMA address */
|
||||
for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
|
||||
if (!sg_dma_len(sg))
|
||||
break;
|
||||
ib_dma_sync_single_for_cpu(smcibdev->ibdev,
|
||||
sg_dma_address(sg),
|
||||
sg_dma_len(sg),
|
||||
data_direction);
|
||||
}
|
||||
}
|
||||
|
||||
/* synchronize buffer usage for device access */
|
||||
void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
unsigned int i;
|
||||
|
||||
/* for now there is just one DMA address */
|
||||
for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
|
||||
if (!sg_dma_len(sg))
|
||||
break;
|
||||
ib_dma_sync_single_for_device(smcibdev->ibdev,
|
||||
sg_dma_address(sg),
|
||||
sg_dma_len(sg),
|
||||
data_direction);
|
||||
}
|
||||
}
|
||||
|
||||
/* Map a new TX or RX buffer SG-table to DMA */
|
||||
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction)
|
||||
{
|
||||
if (!buf_slot->dma_addr[SMC_SINGLE_LINK])
|
||||
int mapped_nents;
|
||||
|
||||
mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
|
||||
data_direction);
|
||||
if (!mapped_nents)
|
||||
return -ENOMEM;
|
||||
|
||||
return mapped_nents;
|
||||
}
|
||||
|
||||
void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction)
|
||||
{
|
||||
if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
|
||||
return; /* already unmapped */
|
||||
ib_dma_unmap_single(smcibdev->ibdev, *buf_slot->dma_addr, buf_size,
|
||||
data_direction);
|
||||
buf_slot->dma_addr[SMC_SINGLE_LINK] = 0;
|
||||
|
||||
ib_dma_unmap_sg(smcibdev->ibdev,
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].sgl,
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
|
||||
data_direction);
|
||||
buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
|
||||
}
|
||||
|
||||
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
|
||||
|
@ -51,12 +51,12 @@ int smc_ib_register_client(void) __init;
|
||||
void smc_ib_unregister_client(void);
|
||||
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
|
||||
int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
|
||||
int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction);
|
||||
void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int bufsize,
|
||||
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction);
|
||||
void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction);
|
||||
void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
|
||||
int smc_ib_create_protection_domain(struct smc_link *lnk);
|
||||
void smc_ib_destroy_queue_pair(struct smc_link *lnk);
|
||||
@ -65,6 +65,13 @@ int smc_ib_ready_link(struct smc_link *lnk);
|
||||
int smc_ib_modify_qp_rts(struct smc_link *lnk);
|
||||
int smc_ib_modify_qp_reset(struct smc_link *lnk);
|
||||
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
|
||||
|
||||
|
||||
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
|
||||
struct smc_buf_desc *buf_slot);
|
||||
void smc_ib_put_memory_region(struct ib_mr *mr);
|
||||
void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction);
|
||||
void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
|
||||
struct smc_buf_desc *buf_slot,
|
||||
enum dma_data_direction data_direction);
|
||||
#endif
|
||||
|
@ -170,6 +170,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
|
||||
copylen, conn->rmbe_size - cons.count);
|
||||
chunk_len_sum = chunk_len;
|
||||
chunk_off = cons.count;
|
||||
smc_rmb_sync_sg_for_cpu(conn);
|
||||
for (chunk = 0; chunk < 2; chunk++) {
|
||||
if (!(flags & MSG_TRUNC)) {
|
||||
rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off,
|
||||
@ -177,6 +178,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
|
||||
if (rc) {
|
||||
if (!read_done)
|
||||
read_done = -EFAULT;
|
||||
smc_rmb_sync_sg_for_device(conn);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@ -190,6 +192,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
|
||||
chunk_len_sum += chunk_len;
|
||||
chunk_off = 0; /* modulo offset in recv ring buffer */
|
||||
}
|
||||
smc_rmb_sync_sg_for_device(conn);
|
||||
|
||||
/* update cursors */
|
||||
if (!(flags & MSG_PEEK)) {
|
||||
|
@ -174,10 +174,12 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
|
||||
copylen, conn->sndbuf_size - tx_cnt_prep);
|
||||
chunk_len_sum = chunk_len;
|
||||
chunk_off = tx_cnt_prep;
|
||||
smc_sndbuf_sync_sg_for_cpu(conn);
|
||||
for (chunk = 0; chunk < 2; chunk++) {
|
||||
rc = memcpy_from_msg(sndbuf_base + chunk_off,
|
||||
msg, chunk_len);
|
||||
if (rc) {
|
||||
smc_sndbuf_sync_sg_for_device(conn);
|
||||
if (send_done)
|
||||
return send_done;
|
||||
goto out_err;
|
||||
@ -192,6 +194,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
|
||||
chunk_len_sum += chunk_len;
|
||||
chunk_off = 0; /* modulo offset in send ring buffer */
|
||||
}
|
||||
smc_sndbuf_sync_sg_for_device(conn);
|
||||
/* update cursors */
|
||||
smc_curs_add(conn->sndbuf_size, &prep, copylen);
|
||||
smc_curs_write(&conn->tx_curs_prep,
|
||||
@ -277,6 +280,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
|
||||
struct smc_link_group *lgr = conn->lgr;
|
||||
int to_send, rmbespace;
|
||||
struct smc_link *link;
|
||||
dma_addr_t dma_addr;
|
||||
int num_sges;
|
||||
int rc;
|
||||
|
||||
@ -334,12 +338,11 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
|
||||
src_len = conn->sndbuf_size - sent.count;
|
||||
}
|
||||
src_len_sum = src_len;
|
||||
dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
|
||||
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
|
||||
num_sges = 0;
|
||||
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
|
||||
sges[srcchunk].addr =
|
||||
conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] +
|
||||
src_off;
|
||||
sges[srcchunk].addr = dma_addr + src_off;
|
||||
sges[srcchunk].length = src_len;
|
||||
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
|
||||
num_sges++;
|
||||
|
@ -68,6 +68,16 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
|
||||
int i;
|
||||
|
||||
link = wc->qp->qp_context;
|
||||
|
||||
if (wc->opcode == IB_WC_REG_MR) {
|
||||
if (wc->status)
|
||||
link->wr_reg_state = FAILED;
|
||||
else
|
||||
link->wr_reg_state = CONFIRMED;
|
||||
wake_up(&link->wr_reg_wait);
|
||||
return;
|
||||
}
|
||||
|
||||
pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
|
||||
if (pnd_snd_idx == link->wr_tx_cnt)
|
||||
return;
|
||||
@ -243,6 +253,52 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Register a memory region and wait for result. */
|
||||
int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
|
||||
{
|
||||
struct ib_send_wr *failed_wr = NULL;
|
||||
int rc;
|
||||
|
||||
ib_req_notify_cq(link->smcibdev->roce_cq_send,
|
||||
IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
|
||||
link->wr_reg_state = POSTED;
|
||||
link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
|
||||
link->wr_reg.mr = mr;
|
||||
link->wr_reg.key = mr->rkey;
|
||||
failed_wr = &link->wr_reg.wr;
|
||||
rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
|
||||
WARN_ON(failed_wr != &link->wr_reg.wr);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = wait_event_interruptible_timeout(link->wr_reg_wait,
|
||||
(link->wr_reg_state != POSTED),
|
||||
SMC_WR_REG_MR_WAIT_TIME);
|
||||
if (!rc) {
|
||||
/* timeout - terminate connections */
|
||||
struct smc_link_group *lgr;
|
||||
|
||||
lgr = container_of(link, struct smc_link_group,
|
||||
lnk[SMC_SINGLE_LINK]);
|
||||
smc_lgr_terminate(lgr);
|
||||
return -EPIPE;
|
||||
}
|
||||
if (rc == -ERESTARTSYS)
|
||||
return -EINTR;
|
||||
switch (link->wr_reg_state) {
|
||||
case CONFIRMED:
|
||||
rc = 0;
|
||||
break;
|
||||
case FAILED:
|
||||
rc = -EIO;
|
||||
break;
|
||||
case POSTED:
|
||||
rc = -EPIPE;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
|
||||
smc_wr_tx_filter filter,
|
||||
smc_wr_tx_dismisser dismisser,
|
||||
@ -458,6 +514,11 @@ static void smc_wr_init_sge(struct smc_link *lnk)
|
||||
lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
|
||||
lnk->wr_rx_ibs[i].num_sge = 1;
|
||||
}
|
||||
lnk->wr_reg.wr.next = NULL;
|
||||
lnk->wr_reg.wr.num_sge = 0;
|
||||
lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
|
||||
lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
|
||||
lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
|
||||
}
|
||||
|
||||
void smc_wr_free_link(struct smc_link *lnk)
|
||||
@ -602,6 +663,8 @@ int smc_wr_create_link(struct smc_link *lnk)
|
||||
smc_wr_init_sge(lnk);
|
||||
memset(lnk->wr_tx_mask, 0,
|
||||
BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
|
||||
init_waitqueue_head(&lnk->wr_tx_wait);
|
||||
init_waitqueue_head(&lnk->wr_reg_wait);
|
||||
return rc;
|
||||
|
||||
dma_unmap:
|
||||
|
@ -102,5 +102,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
|
||||
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
|
||||
int smc_wr_rx_post_init(struct smc_link *link);
|
||||
void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
|
||||
int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr);
|
||||
|
||||
#endif /* SMC_WR_H */
|
||||
|
Loading…
Reference in New Issue
Block a user