mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-30 00:00:15 +01:00
924 lines
20 KiB
C
924 lines
20 KiB
C
/*
|
|
* Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
|
|
* Copyright (c) 2009 System Fabric Works, Inc. All rights reserved.
|
|
* Copyright (C) 2006-2007 QLogic Corporation, All rights reserved.
|
|
* Copyright (c) 2005. PathScale, Inc. All rights reserved.
|
|
*
|
|
* This software is available to you under a choice of one of two
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
* General Public License (GPL) Version 2, available from the file
|
|
* COPYING in the main directory of this source tree, or the
|
|
* OpenIB.org BSD license below:
|
|
*
|
|
* Redistribution and use in source and binary forms, with or
|
|
* without modification, are permitted provided that the following
|
|
* conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer.
|
|
*
|
|
* - Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer in the documentation and/or other materials
|
|
* provided with the distribution.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <endian.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <pthread.h>
|
|
#include <netinet/in.h>
|
|
#include <sys/mman.h>
|
|
#include <errno.h>
|
|
|
|
#include <endian.h>
|
|
#include <pthread.h>
|
|
#include <stddef.h>
|
|
|
|
#include <infiniband/driver.h>
|
|
#include <infiniband/verbs.h>
|
|
#include <rdma/rdma_user_rxe.h>
|
|
|
|
#include "rxe_queue.h"
|
|
#include "rxe-abi.h"
|
|
#include "rxe.h"
|
|
|
|
static const struct verbs_match_ent hca_table[] = {
|
|
/* FIXME: rxe needs a more reliable way to detect the rxe device */
|
|
VERBS_NAME_MATCH("rxe", NULL),
|
|
{},
|
|
};
|
|
|
|
static int rxe_query_device(struct ibv_context *context,
|
|
struct ibv_device_attr *attr)
|
|
{
|
|
struct ibv_query_device cmd;
|
|
uint64_t raw_fw_ver;
|
|
unsigned major, minor, sub_minor;
|
|
int ret;
|
|
|
|
ret = ibv_cmd_query_device(context, attr, &raw_fw_ver,
|
|
&cmd, sizeof cmd);
|
|
if (ret)
|
|
return ret;
|
|
|
|
major = (raw_fw_ver >> 32) & 0xffff;
|
|
minor = (raw_fw_ver >> 16) & 0xffff;
|
|
sub_minor = raw_fw_ver & 0xffff;
|
|
|
|
snprintf(attr->fw_ver, sizeof attr->fw_ver,
|
|
"%d.%d.%d", major, minor, sub_minor);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int rxe_query_port(struct ibv_context *context, uint8_t port,
|
|
struct ibv_port_attr *attr)
|
|
{
|
|
struct ibv_query_port cmd;
|
|
|
|
return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
|
|
}
|
|
|
|
static struct ibv_pd *rxe_alloc_pd(struct ibv_context *context)
|
|
{
|
|
struct ibv_alloc_pd cmd;
|
|
struct ibv_alloc_pd_resp resp;
|
|
struct ibv_pd *pd;
|
|
|
|
pd = malloc(sizeof *pd);
|
|
if (!pd)
|
|
return NULL;
|
|
|
|
if (ibv_cmd_alloc_pd(context, pd, &cmd, sizeof cmd, &resp, sizeof resp)) {
|
|
free(pd);
|
|
return NULL;
|
|
}
|
|
|
|
return pd;
|
|
}
|
|
|
|
static int rxe_dealloc_pd(struct ibv_pd *pd)
|
|
{
|
|
int ret;
|
|
|
|
ret = ibv_cmd_dealloc_pd(pd);
|
|
if (!ret)
|
|
free(pd);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
|
|
int access)
|
|
{
|
|
struct ibv_mr *mr;
|
|
struct ibv_reg_mr cmd;
|
|
struct ibv_reg_mr_resp resp;
|
|
int ret;
|
|
|
|
mr = malloc(sizeof *mr);
|
|
if (!mr) {
|
|
return NULL;
|
|
}
|
|
|
|
ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, mr,
|
|
&cmd, sizeof cmd, &resp, sizeof resp);
|
|
if (ret) {
|
|
free(mr);
|
|
return NULL;
|
|
}
|
|
|
|
return mr;
|
|
}
|
|
|
|
static int rxe_dereg_mr(struct ibv_mr *mr)
|
|
{
|
|
int ret;
|
|
|
|
ret = ibv_cmd_dereg_mr(mr);
|
|
if (ret)
|
|
return ret;
|
|
|
|
free(mr);
|
|
return 0;
|
|
}
|
|
|
|
static struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
|
|
struct ibv_comp_channel *channel,
|
|
int comp_vector)
|
|
{
|
|
struct rxe_cq *cq;
|
|
struct ibv_create_cq cmd;
|
|
struct rxe_create_cq_resp resp;
|
|
int ret;
|
|
|
|
cq = malloc(sizeof *cq);
|
|
if (!cq) {
|
|
return NULL;
|
|
}
|
|
|
|
ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
|
|
&cq->ibv_cq, &cmd, sizeof cmd,
|
|
&resp.ibv_resp, sizeof resp);
|
|
if (ret) {
|
|
free(cq);
|
|
return NULL;
|
|
}
|
|
|
|
cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
context->cmd_fd, resp.mi.offset);
|
|
if ((void *)cq->queue == MAP_FAILED) {
|
|
ibv_cmd_destroy_cq(&cq->ibv_cq);
|
|
free(cq);
|
|
return NULL;
|
|
}
|
|
|
|
cq->mmap_info = resp.mi;
|
|
pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
|
|
|
|
return &cq->ibv_cq;
|
|
}
|
|
|
|
static int rxe_resize_cq(struct ibv_cq *ibcq, int cqe)
|
|
{
|
|
struct rxe_cq *cq = to_rcq(ibcq);
|
|
struct ibv_resize_cq cmd;
|
|
struct rxe_resize_cq_resp resp;
|
|
int ret;
|
|
|
|
pthread_spin_lock(&cq->lock);
|
|
|
|
ret = ibv_cmd_resize_cq(ibcq, cqe, &cmd, sizeof cmd,
|
|
&resp.ibv_resp, sizeof resp);
|
|
if (ret) {
|
|
pthread_spin_unlock(&cq->lock);
|
|
return ret;
|
|
}
|
|
|
|
munmap(cq->queue, cq->mmap_info.size);
|
|
|
|
cq->queue = mmap(NULL, resp.mi.size,
|
|
PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
ibcq->context->cmd_fd, resp.mi.offset);
|
|
|
|
ret = errno;
|
|
pthread_spin_unlock(&cq->lock);
|
|
|
|
if ((void *)cq->queue == MAP_FAILED) {
|
|
cq->queue = NULL;
|
|
cq->mmap_info.size = 0;
|
|
return ret;
|
|
}
|
|
|
|
cq->mmap_info = resp.mi;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int rxe_destroy_cq(struct ibv_cq *ibcq)
|
|
{
|
|
struct rxe_cq *cq = to_rcq(ibcq);
|
|
int ret;
|
|
|
|
ret = ibv_cmd_destroy_cq(ibcq);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (cq->mmap_info.size)
|
|
munmap(cq->queue, cq->mmap_info.size);
|
|
free(cq);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int rxe_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
|
|
{
|
|
struct rxe_cq *cq = to_rcq(ibcq);
|
|
struct rxe_queue *q;
|
|
int npolled;
|
|
uint8_t *src;
|
|
|
|
pthread_spin_lock(&cq->lock);
|
|
q = cq->queue;
|
|
|
|
for (npolled = 0; npolled < ne; ++npolled, ++wc) {
|
|
if (queue_empty(q))
|
|
break;
|
|
|
|
atomic_thread_fence(memory_order_acquire);
|
|
src = consumer_addr(q);
|
|
memcpy(wc, src, sizeof(*wc));
|
|
advance_consumer(q);
|
|
}
|
|
|
|
pthread_spin_unlock(&cq->lock);
|
|
return npolled;
|
|
}
|
|
|
|
static struct ibv_srq *rxe_create_srq(struct ibv_pd *pd,
|
|
struct ibv_srq_init_attr *attr)
|
|
{
|
|
struct rxe_srq *srq;
|
|
struct ibv_create_srq cmd;
|
|
struct rxe_create_srq_resp resp;
|
|
int ret;
|
|
|
|
srq = malloc(sizeof *srq);
|
|
if (srq == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, &cmd, sizeof cmd,
|
|
&resp.ibv_resp, sizeof resp);
|
|
if (ret) {
|
|
free(srq);
|
|
return NULL;
|
|
}
|
|
|
|
srq->rq.queue = mmap(NULL, resp.mi.size,
|
|
PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
pd->context->cmd_fd, resp.mi.offset);
|
|
if ((void *)srq->rq.queue == MAP_FAILED) {
|
|
ibv_cmd_destroy_srq(&srq->ibv_srq);
|
|
free(srq);
|
|
return NULL;
|
|
}
|
|
|
|
srq->mmap_info = resp.mi;
|
|
srq->rq.max_sge = attr->attr.max_sge;
|
|
pthread_spin_init(&srq->rq.lock, PTHREAD_PROCESS_PRIVATE);
|
|
|
|
return &srq->ibv_srq;
|
|
}
|
|
|
|
static int rxe_modify_srq(struct ibv_srq *ibsrq,
|
|
struct ibv_srq_attr *attr, int attr_mask)
|
|
{
|
|
struct rxe_srq *srq = to_rsrq(ibsrq);
|
|
struct rxe_modify_srq_cmd cmd;
|
|
int rc = 0;
|
|
struct mmap_info mi;
|
|
|
|
mi.offset = 0;
|
|
mi.size = 0;
|
|
|
|
if (attr_mask & IBV_SRQ_MAX_WR)
|
|
pthread_spin_lock(&srq->rq.lock);
|
|
|
|
cmd.mmap_info_addr = (__u64)(uintptr_t) & mi;
|
|
rc = ibv_cmd_modify_srq(ibsrq, attr, attr_mask,
|
|
&cmd.ibv_cmd, sizeof cmd);
|
|
if (rc)
|
|
goto out;
|
|
|
|
if (attr_mask & IBV_SRQ_MAX_WR) {
|
|
(void)munmap(srq->rq.queue, srq->mmap_info.size);
|
|
srq->rq.queue = mmap(NULL, mi.size,
|
|
PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
ibsrq->context->cmd_fd, mi.offset);
|
|
|
|
if ((void *)srq->rq.queue == MAP_FAILED) {
|
|
rc = errno;
|
|
srq->rq.queue = NULL;
|
|
srq->mmap_info.size = 0;
|
|
goto out;
|
|
}
|
|
|
|
srq->mmap_info = mi;
|
|
}
|
|
|
|
out:
|
|
if (attr_mask & IBV_SRQ_MAX_WR)
|
|
pthread_spin_unlock(&srq->rq.lock);
|
|
return rc;
|
|
}
|
|
|
|
static int rxe_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr)
|
|
{
|
|
struct ibv_query_srq cmd;
|
|
|
|
return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
|
|
}
|
|
|
|
static int rxe_destroy_srq(struct ibv_srq *ibvsrq)
|
|
{
|
|
int ret;
|
|
struct rxe_srq *srq = to_rsrq(ibvsrq);
|
|
struct rxe_queue *q = srq->rq.queue;
|
|
|
|
ret = ibv_cmd_destroy_srq(ibvsrq);
|
|
if (!ret) {
|
|
if (srq->mmap_info.size)
|
|
munmap(q, srq->mmap_info.size);
|
|
free(srq);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int rxe_post_one_recv(struct rxe_wq *rq, struct ibv_recv_wr *recv_wr)
|
|
{
|
|
int i;
|
|
struct rxe_recv_wqe *wqe;
|
|
struct rxe_queue *q = rq->queue;
|
|
int length = 0;
|
|
int rc = 0;
|
|
|
|
if (queue_full(q)) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
if (recv_wr->num_sge > rq->max_sge) {
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
wqe = (struct rxe_recv_wqe *)producer_addr(q);
|
|
|
|
wqe->wr_id = recv_wr->wr_id;
|
|
wqe->num_sge = recv_wr->num_sge;
|
|
|
|
memcpy(wqe->dma.sge, recv_wr->sg_list,
|
|
wqe->num_sge*sizeof(*wqe->dma.sge));
|
|
|
|
for (i = 0; i < wqe->num_sge; i++) {
|
|
length += wqe->dma.sge[i].length;
|
|
}
|
|
|
|
wqe->dma.length = length;
|
|
wqe->dma.resid = length;
|
|
wqe->dma.cur_sge = 0;
|
|
wqe->dma.num_sge = wqe->num_sge;
|
|
wqe->dma.sge_offset = 0;
|
|
|
|
advance_producer(q);
|
|
|
|
out:
|
|
return rc;
|
|
}
|
|
|
|
static int rxe_post_srq_recv(struct ibv_srq *ibvsrq,
|
|
struct ibv_recv_wr *recv_wr,
|
|
struct ibv_recv_wr **bad_recv_wr)
|
|
{
|
|
struct rxe_srq *srq = to_rsrq(ibvsrq);
|
|
int rc = 0;
|
|
|
|
pthread_spin_lock(&srq->rq.lock);
|
|
|
|
while (recv_wr) {
|
|
rc = rxe_post_one_recv(&srq->rq, recv_wr);
|
|
if (rc) {
|
|
*bad_recv_wr = recv_wr;
|
|
break;
|
|
}
|
|
|
|
recv_wr = recv_wr->next;
|
|
}
|
|
|
|
pthread_spin_unlock(&srq->rq.lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static struct ibv_qp *rxe_create_qp(struct ibv_pd *pd,
|
|
struct ibv_qp_init_attr *attr)
|
|
{
|
|
struct ibv_create_qp cmd;
|
|
struct rxe_create_qp_resp resp;
|
|
struct rxe_qp *qp;
|
|
int ret;
|
|
|
|
qp = malloc(sizeof *qp);
|
|
if (!qp) {
|
|
return NULL;
|
|
}
|
|
|
|
ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd, sizeof cmd,
|
|
&resp.ibv_resp, sizeof resp);
|
|
if (ret) {
|
|
free(qp);
|
|
return NULL;
|
|
}
|
|
|
|
if (attr->srq) {
|
|
qp->rq.max_sge = 0;
|
|
qp->rq.queue = NULL;
|
|
qp->rq_mmap_info.size = 0;
|
|
} else {
|
|
qp->rq.max_sge = attr->cap.max_recv_sge;
|
|
qp->rq.queue = mmap(NULL, resp.rq_mi.size, PROT_READ | PROT_WRITE,
|
|
MAP_SHARED,
|
|
pd->context->cmd_fd, resp.rq_mi.offset);
|
|
if ((void *)qp->rq.queue == MAP_FAILED) {
|
|
ibv_cmd_destroy_qp(&qp->ibv_qp);
|
|
free(qp);
|
|
return NULL;
|
|
}
|
|
|
|
qp->rq_mmap_info = resp.rq_mi;
|
|
pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE);
|
|
}
|
|
|
|
qp->sq.max_sge = attr->cap.max_send_sge;
|
|
qp->sq.max_inline = attr->cap.max_inline_data;
|
|
qp->sq.queue = mmap(NULL, resp.sq_mi.size, PROT_READ | PROT_WRITE,
|
|
MAP_SHARED,
|
|
pd->context->cmd_fd, resp.sq_mi.offset);
|
|
if ((void *)qp->sq.queue == MAP_FAILED) {
|
|
if (qp->rq_mmap_info.size)
|
|
munmap(qp->rq.queue, qp->rq_mmap_info.size);
|
|
ibv_cmd_destroy_qp(&qp->ibv_qp);
|
|
free(qp);
|
|
return NULL;
|
|
}
|
|
|
|
qp->sq_mmap_info = resp.sq_mi;
|
|
pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE);
|
|
|
|
return &qp->ibv_qp;
|
|
}
|
|
|
|
static int rxe_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
|
int attr_mask,
|
|
struct ibv_qp_init_attr *init_attr)
|
|
{
|
|
struct ibv_query_qp cmd;
|
|
|
|
return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr,
|
|
&cmd, sizeof cmd);
|
|
}
|
|
|
|
static int rxe_modify_qp(struct ibv_qp *ibvqp,
|
|
struct ibv_qp_attr *attr,
|
|
int attr_mask)
|
|
{
|
|
struct ibv_modify_qp cmd = {};
|
|
|
|
return ibv_cmd_modify_qp(ibvqp, attr, attr_mask, &cmd, sizeof cmd);
|
|
}
|
|
|
|
static int rxe_destroy_qp(struct ibv_qp *ibv_qp)
|
|
{
|
|
int ret;
|
|
struct rxe_qp *qp = to_rqp(ibv_qp);
|
|
|
|
ret = ibv_cmd_destroy_qp(ibv_qp);
|
|
if (!ret) {
|
|
if (qp->rq_mmap_info.size)
|
|
munmap(qp->rq.queue, qp->rq_mmap_info.size);
|
|
if (qp->sq_mmap_info.size)
|
|
munmap(qp->sq.queue, qp->sq_mmap_info.size);
|
|
|
|
free(qp);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* basic sanity checks for send work request */
|
|
static int validate_send_wr(struct rxe_wq *sq, struct ibv_send_wr *ibwr,
|
|
unsigned int length)
|
|
{
|
|
enum ibv_wr_opcode opcode = ibwr->opcode;
|
|
|
|
if (ibwr->num_sge > sq->max_sge)
|
|
return -EINVAL;
|
|
|
|
if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
|
|
|| (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD))
|
|
if (length < 8 || ibwr->wr.atomic.remote_addr & 0x7)
|
|
return -EINVAL;
|
|
|
|
if ((ibwr->send_flags & IBV_SEND_INLINE) && (length > sq->max_inline))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void convert_send_wr(struct rxe_send_wr *kwr, struct ibv_send_wr *uwr)
|
|
{
|
|
memset(kwr, 0, sizeof(*kwr));
|
|
|
|
kwr->wr_id = uwr->wr_id;
|
|
kwr->num_sge = uwr->num_sge;
|
|
kwr->opcode = uwr->opcode;
|
|
kwr->send_flags = uwr->send_flags;
|
|
kwr->ex.imm_data = uwr->imm_data;
|
|
|
|
switch(uwr->opcode) {
|
|
case IBV_WR_RDMA_WRITE:
|
|
case IBV_WR_RDMA_WRITE_WITH_IMM:
|
|
case IBV_WR_RDMA_READ:
|
|
kwr->wr.rdma.remote_addr = uwr->wr.rdma.remote_addr;
|
|
kwr->wr.rdma.rkey = uwr->wr.rdma.rkey;
|
|
break;
|
|
|
|
case IBV_WR_SEND:
|
|
case IBV_WR_SEND_WITH_IMM:
|
|
kwr->wr.ud.remote_qpn = uwr->wr.ud.remote_qpn;
|
|
kwr->wr.ud.remote_qkey = uwr->wr.ud.remote_qkey;
|
|
break;
|
|
|
|
case IBV_WR_ATOMIC_CMP_AND_SWP:
|
|
case IBV_WR_ATOMIC_FETCH_AND_ADD:
|
|
kwr->wr.atomic.remote_addr = uwr->wr.atomic.remote_addr;
|
|
kwr->wr.atomic.compare_add = uwr->wr.atomic.compare_add;
|
|
kwr->wr.atomic.swap = uwr->wr.atomic.swap;
|
|
kwr->wr.atomic.rkey = uwr->wr.atomic.rkey;
|
|
break;
|
|
|
|
case IBV_WR_LOCAL_INV:
|
|
case IBV_WR_BIND_MW:
|
|
case IBV_WR_SEND_WITH_INV:
|
|
case IBV_WR_TSO:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int init_send_wqe(struct rxe_qp *qp, struct rxe_wq *sq,
|
|
struct ibv_send_wr *ibwr, unsigned int length,
|
|
struct rxe_send_wqe *wqe)
|
|
{
|
|
int num_sge = ibwr->num_sge;
|
|
int i;
|
|
unsigned int opcode = ibwr->opcode;
|
|
|
|
convert_send_wr(&wqe->wr, ibwr);
|
|
|
|
if (qp_type(qp) == IBV_QPT_UD)
|
|
memcpy(&wqe->av, &to_rah(ibwr->wr.ud.ah)->av,
|
|
sizeof(struct rxe_av));
|
|
|
|
if (ibwr->send_flags & IBV_SEND_INLINE) {
|
|
uint8_t *inline_data = wqe->dma.inline_data;
|
|
|
|
for (i = 0; i < num_sge; i++) {
|
|
memcpy(inline_data,
|
|
(uint8_t *)(long)ibwr->sg_list[i].addr,
|
|
ibwr->sg_list[i].length);
|
|
inline_data += ibwr->sg_list[i].length;
|
|
}
|
|
} else
|
|
memcpy(wqe->dma.sge, ibwr->sg_list,
|
|
num_sge*sizeof(struct ibv_sge));
|
|
|
|
if ((opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
|
|
|| (opcode == IBV_WR_ATOMIC_FETCH_AND_ADD))
|
|
wqe->iova = ibwr->wr.atomic.remote_addr;
|
|
else
|
|
wqe->iova = ibwr->wr.rdma.remote_addr;
|
|
wqe->dma.length = length;
|
|
wqe->dma.resid = length;
|
|
wqe->dma.num_sge = num_sge;
|
|
wqe->dma.cur_sge = 0;
|
|
wqe->dma.sge_offset = 0;
|
|
wqe->state = 0;
|
|
wqe->ssn = qp->ssn++;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int post_one_send(struct rxe_qp *qp, struct rxe_wq *sq,
|
|
struct ibv_send_wr *ibwr)
|
|
{
|
|
int err;
|
|
struct rxe_send_wqe *wqe;
|
|
unsigned int length = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < ibwr->num_sge; i++)
|
|
length += ibwr->sg_list[i].length;
|
|
|
|
err = validate_send_wr(sq, ibwr, length);
|
|
if (err) {
|
|
printf("validate send failed\n");
|
|
return err;
|
|
}
|
|
|
|
wqe = (struct rxe_send_wqe *)producer_addr(sq->queue);
|
|
|
|
err = init_send_wqe(qp, sq, ibwr, length, wqe);
|
|
if (err)
|
|
return err;
|
|
|
|
if (queue_full(sq->queue))
|
|
return -ENOMEM;
|
|
|
|
advance_producer(sq->queue);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* send a null post send as a doorbell */
|
|
static int post_send_db(struct ibv_qp *ibqp)
|
|
{
|
|
struct ibv_post_send cmd;
|
|
struct ibv_post_send_resp resp;
|
|
|
|
cmd.command = IB_USER_VERBS_CMD_POST_SEND;
|
|
cmd.in_words = sizeof(cmd)/4;
|
|
cmd.out_words = sizeof(resp)/4;
|
|
cmd.response = (uintptr_t)&resp;
|
|
cmd.qp_handle = ibqp->handle;
|
|
cmd.wr_count = 0;
|
|
cmd.sge_count = 0;
|
|
cmd.wqe_size = sizeof(struct ibv_send_wr);
|
|
|
|
if (write(ibqp->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
|
|
return errno;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* this API does not make a distinction between
|
|
restartable and non-restartable errors */
|
|
static int rxe_post_send(struct ibv_qp *ibqp,
|
|
struct ibv_send_wr *wr_list,
|
|
struct ibv_send_wr **bad_wr)
|
|
{
|
|
int rc = 0;
|
|
int err;
|
|
struct rxe_qp *qp = to_rqp(ibqp);
|
|
struct rxe_wq *sq = &qp->sq;
|
|
|
|
if (!bad_wr)
|
|
return EINVAL;
|
|
|
|
*bad_wr = NULL;
|
|
|
|
if (!sq || !wr_list || !sq->queue)
|
|
return EINVAL;
|
|
|
|
pthread_spin_lock(&sq->lock);
|
|
|
|
while (wr_list) {
|
|
rc = post_one_send(qp, sq, wr_list);
|
|
if (rc) {
|
|
*bad_wr = wr_list;
|
|
break;
|
|
}
|
|
|
|
wr_list = wr_list->next;
|
|
}
|
|
|
|
pthread_spin_unlock(&sq->lock);
|
|
|
|
err = post_send_db(ibqp);
|
|
return err ? err : rc;
|
|
}
|
|
|
|
static int rxe_post_recv(struct ibv_qp *ibqp,
|
|
struct ibv_recv_wr *recv_wr,
|
|
struct ibv_recv_wr **bad_wr)
|
|
{
|
|
int rc = 0;
|
|
struct rxe_qp *qp = to_rqp(ibqp);
|
|
struct rxe_wq *rq = &qp->rq;
|
|
|
|
if (!bad_wr)
|
|
return EINVAL;
|
|
|
|
*bad_wr = NULL;
|
|
|
|
if (!rq || !recv_wr || !rq->queue)
|
|
return EINVAL;
|
|
|
|
pthread_spin_lock(&rq->lock);
|
|
|
|
while (recv_wr) {
|
|
rc = rxe_post_one_recv(rq, recv_wr);
|
|
if (rc) {
|
|
*bad_wr = recv_wr;
|
|
break;
|
|
}
|
|
|
|
recv_wr = recv_wr->next;
|
|
}
|
|
|
|
pthread_spin_unlock(&rq->lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
|
|
{
|
|
return IN6_IS_ADDR_V4MAPPED(a);
|
|
}
|
|
|
|
static inline int rdma_gid2ip(struct sockaddr *out, union ibv_gid *gid)
|
|
{
|
|
if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
|
|
struct sockaddr_in *out_in = (struct sockaddr_in *)out;
|
|
memset(out_in, 0, sizeof(*out_in));
|
|
memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4);
|
|
} else {
|
|
struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out;
|
|
memset(out_in, 0, sizeof(*out_in));
|
|
out_in->sin6_family = AF_INET6;
|
|
memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static struct ibv_ah *rxe_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
|
|
{
|
|
int err;
|
|
struct rxe_ah *ah;
|
|
struct rxe_av *av;
|
|
union ibv_gid sgid;
|
|
struct ibv_create_ah_resp resp;
|
|
|
|
err = ibv_query_gid(pd->context, attr->port_num, attr->grh.sgid_index,
|
|
&sgid);
|
|
if (err) {
|
|
fprintf(stderr, "rxe: Failed to query sgid.\n");
|
|
return NULL;
|
|
}
|
|
|
|
ah = malloc(sizeof *ah);
|
|
if (ah == NULL)
|
|
return NULL;
|
|
|
|
av = &ah->av;
|
|
av->port_num = attr->port_num;
|
|
memcpy(&av->grh, &attr->grh, sizeof(attr->grh));
|
|
av->network_type =
|
|
ipv6_addr_v4mapped((struct in6_addr *)attr->grh.dgid.raw) ?
|
|
RDMA_NETWORK_IPV4 : RDMA_NETWORK_IPV6;
|
|
|
|
rdma_gid2ip(&av->sgid_addr._sockaddr, &sgid);
|
|
rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid);
|
|
|
|
memset(&resp, 0, sizeof(resp));
|
|
if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp, sizeof(resp))) {
|
|
free(ah);
|
|
return NULL;
|
|
}
|
|
|
|
return &ah->ibv_ah;
|
|
}
|
|
|
|
static int rxe_destroy_ah(struct ibv_ah *ibah)
|
|
{
|
|
int ret;
|
|
struct rxe_ah *ah = to_rah(ibah);
|
|
|
|
ret = ibv_cmd_destroy_ah(&ah->ibv_ah);
|
|
if (ret)
|
|
return ret;
|
|
|
|
free(ah);
|
|
return 0;
|
|
}
|
|
|
|
static struct ibv_context_ops rxe_ctx_ops = {
|
|
.query_device = rxe_query_device,
|
|
.query_port = rxe_query_port,
|
|
.alloc_pd = rxe_alloc_pd,
|
|
.dealloc_pd = rxe_dealloc_pd,
|
|
.reg_mr = rxe_reg_mr,
|
|
.dereg_mr = rxe_dereg_mr,
|
|
.create_cq = rxe_create_cq,
|
|
.poll_cq = rxe_poll_cq,
|
|
.req_notify_cq = ibv_cmd_req_notify_cq,
|
|
.cq_event = NULL,
|
|
.resize_cq = rxe_resize_cq,
|
|
.destroy_cq = rxe_destroy_cq,
|
|
.create_srq = rxe_create_srq,
|
|
.modify_srq = rxe_modify_srq,
|
|
.query_srq = rxe_query_srq,
|
|
.destroy_srq = rxe_destroy_srq,
|
|
.post_srq_recv = rxe_post_srq_recv,
|
|
.create_qp = rxe_create_qp,
|
|
.query_qp = rxe_query_qp,
|
|
.modify_qp = rxe_modify_qp,
|
|
.destroy_qp = rxe_destroy_qp,
|
|
.post_send = rxe_post_send,
|
|
.post_recv = rxe_post_recv,
|
|
.create_ah = rxe_create_ah,
|
|
.destroy_ah = rxe_destroy_ah,
|
|
.attach_mcast = ibv_cmd_attach_mcast,
|
|
.detach_mcast = ibv_cmd_detach_mcast
|
|
};
|
|
|
|
static struct ibv_context *rxe_alloc_context(struct ibv_device *ibdev,
|
|
int cmd_fd)
|
|
{
|
|
struct rxe_context *context;
|
|
struct ibv_get_context cmd;
|
|
struct ibv_get_context_resp resp;
|
|
|
|
context = malloc(sizeof *context);
|
|
if (!context)
|
|
return NULL;
|
|
|
|
memset(context, 0, sizeof *context);
|
|
context->ibv_ctx.cmd_fd = cmd_fd;
|
|
|
|
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd,
|
|
sizeof cmd, &resp, sizeof resp))
|
|
goto out;
|
|
|
|
context->ibv_ctx.ops = rxe_ctx_ops;
|
|
|
|
return &context->ibv_ctx;
|
|
|
|
out:
|
|
free(context);
|
|
return NULL;
|
|
}
|
|
|
|
static void rxe_free_context(struct ibv_context *ibctx)
|
|
{
|
|
struct rxe_context *context = to_rctx(ibctx);
|
|
|
|
free(context);
|
|
}
|
|
|
|
static void rxe_uninit_device(struct verbs_device *verbs_device)
|
|
{
|
|
struct rxe_device *dev = to_rdev(&verbs_device->device);
|
|
|
|
free(dev);
|
|
}
|
|
|
|
static struct verbs_device *rxe_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
|
|
{
|
|
struct rxe_device *dev;
|
|
dev = calloc(1, sizeof(*dev));
|
|
if (!dev)
|
|
return NULL;
|
|
|
|
dev->abi_version = sysfs_dev->abi_ver;
|
|
|
|
return &dev->ibv_dev;
|
|
}
|
|
|
|
static const struct verbs_device_ops rxe_dev_ops = {
|
|
.name = "rxe",
|
|
.match_min_abi_version = 0,
|
|
.match_max_abi_version = INT_MAX,
|
|
.match_table = hca_table,
|
|
.alloc_device = rxe_device_alloc,
|
|
.uninit_device = rxe_uninit_device,
|
|
.alloc_context = rxe_alloc_context,
|
|
.free_context = rxe_free_context,
|
|
};
|
|
PROVIDER_DRIVER(rxe_dev_ops);
|