config root man

Current Path : /sys/amd64/compile/hs32/modules/usr/src/sys/modules/xl/@/ofed/drivers/infiniband/ulp/sdp/

FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64
Upload File :
Current File : //sys/amd64/compile/hs32/modules/usr/src/sys/modules/xl/@/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c

/*
 * Copyright (c) 2009 Mellanox Technologies Ltd.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include "sdp.h"

SDP_MODPARAM_INT(rcvbuf_initial_size, 32 * 1024,
		"Receive buffer initial size in bytes.");
SDP_MODPARAM_SINT(rcvbuf_scale, 0x8,
		"Receive buffer size scale factor.");

/* Like tcp_fin - called when SDP_MID_DISCONNECT is received */
static void
sdp_handle_disconn(struct sdp_sock *ssk)
{

	sdp_dbg(ssk->socket, "%s\n", __func__);

	SDP_WLOCK_ASSERT(ssk);
	if (TCPS_HAVERCVDFIN(ssk->state) == 0)
		socantrcvmore(ssk->socket);

	switch (ssk->state) {
	case TCPS_SYN_RECEIVED:
	case TCPS_ESTABLISHED:
		ssk->state = TCPS_CLOSE_WAIT;
		break;

	case TCPS_FIN_WAIT_1:
		/* Received a reply FIN - start Infiniband tear down */
		sdp_dbg(ssk->socket,
		    "%s: Starting Infiniband tear down sending DREQ\n",
		    __func__);

		sdp_cancel_dreq_wait_timeout(ssk);
		ssk->qp_active = 0;
		if (ssk->id) {
			struct rdma_cm_id *id;

			id = ssk->id;
			SDP_WUNLOCK(ssk);
			rdma_disconnect(id);
			SDP_WLOCK(ssk);
		} else {
			sdp_warn(ssk->socket,
			    "%s: ssk->id is NULL\n", __func__);
			return;
		}
		break;
	case TCPS_TIME_WAIT:
		/* This is a mutual close situation and we've got the DREQ from
		   the peer before the SDP_MID_DISCONNECT */
		break;
	case TCPS_CLOSED:
		/* FIN arrived after IB teardown started - do nothing */
		sdp_dbg(ssk->socket, "%s: fin in state %s\n",
		    __func__, sdp_state_str(ssk->state));
		return;
	default:
		sdp_warn(ssk->socket,
		    "%s: FIN in unexpected state. state=%d\n",
		    __func__, ssk->state);
		break;
	}
}

static int
sdp_post_recv(struct sdp_sock *ssk)
{
	struct sdp_buf *rx_req;
	int i, rc;
	u64 addr;
	struct ib_device *dev;
	struct ib_recv_wr rx_wr = { NULL };
	struct ib_sge ibsge[SDP_MAX_RECV_SGES];
	struct ib_sge *sge = ibsge;
	struct ib_recv_wr *bad_wr;
	struct mbuf *mb, *m;
	struct sdp_bsdh *h;
	int id = ring_head(ssk->rx_ring);

	/* Now, allocate and repost recv */
	sdp_prf(ssk->socket, mb, "Posting mb");
	mb = m_getm2(NULL, ssk->recv_bytes, M_NOWAIT, MT_DATA, M_PKTHDR);
	if (mb == NULL) {
		/* Retry so we can't stall out with no memory. */
		if (!rx_ring_posted(ssk))
			queue_work(rx_comp_wq, &ssk->rx_comp_work);
		return -1;
	}
	for (m = mb; m != NULL; m = m->m_next) {
		m->m_len = (m->m_flags & M_EXT) ? m->m_ext.ext_size :
                        ((m->m_flags & M_PKTHDR) ? MHLEN : MLEN);
		mb->m_pkthdr.len += m->m_len;
	}
	h = mtod(mb, struct sdp_bsdh *);
	rx_req = ssk->rx_ring.buffer + (id & (SDP_RX_SIZE - 1));
	rx_req->mb = mb;
	dev = ssk->ib_device;
        for (i = 0;  mb != NULL; i++, mb = mb->m_next, sge++) {
		addr = ib_dma_map_single(dev, mb->m_data, mb->m_len,
		    DMA_TO_DEVICE);
		/* TODO: proper error handling */
		BUG_ON(ib_dma_mapping_error(dev, addr));
		BUG_ON(i >= SDP_MAX_RECV_SGES);
		rx_req->mapping[i] = addr;
		sge->addr = addr;
		sge->length = mb->m_len;
		sge->lkey = ssk->sdp_dev->mr->lkey;
        }

	rx_wr.next = NULL;
	rx_wr.wr_id = id | SDP_OP_RECV;
	rx_wr.sg_list = ibsge;
	rx_wr.num_sge = i;
	rc = ib_post_recv(ssk->qp, &rx_wr, &bad_wr);
	if (unlikely(rc)) {
		sdp_warn(ssk->socket, "ib_post_recv failed. status %d\n", rc);

		sdp_cleanup_sdp_buf(ssk, rx_req, DMA_FROM_DEVICE);
		m_freem(mb);

		sdp_notify(ssk, ECONNRESET);

		return -1;
	}

	atomic_inc(&ssk->rx_ring.head);
	SDPSTATS_COUNTER_INC(post_recv);

	return 0;
}

static inline int
sdp_post_recvs_needed(struct sdp_sock *ssk)
{
	unsigned long bytes_in_process;
	unsigned long max_bytes;
	int buffer_size;
	int posted;

	if (!ssk->qp_active || !ssk->socket)
		return 0;

	posted = rx_ring_posted(ssk);
	if (posted >= SDP_RX_SIZE)
		return 0;
	if (posted < SDP_MIN_TX_CREDITS)
		return 1;

	buffer_size = ssk->recv_bytes;
	max_bytes = max(ssk->socket->so_snd.sb_hiwat,
	    (1 + SDP_MIN_TX_CREDITS) * buffer_size);
	max_bytes *= rcvbuf_scale;
	/*
	 * Compute bytes in the receive queue and socket buffer.
	 */
	bytes_in_process = (posted - SDP_MIN_TX_CREDITS) * buffer_size;
	bytes_in_process += ssk->socket->so_rcv.sb_cc;

	return bytes_in_process < max_bytes;
}

static inline void
sdp_post_recvs(struct sdp_sock *ssk)
{

	while (sdp_post_recvs_needed(ssk))
		if (sdp_post_recv(ssk))
			return;
}

static inline struct mbuf *
sdp_sock_queue_rcv_mb(struct socket *sk, struct mbuf *mb)
{
	struct sdp_sock *ssk = sdp_sk(sk);
	struct sdp_bsdh *h;

	h = mtod(mb, struct sdp_bsdh *);

#ifdef SDP_ZCOPY
	SDP_SKB_CB(mb)->seq = rcv_nxt(ssk);
	if (h->mid == SDP_MID_SRCAVAIL) {
		struct sdp_srcah *srcah = (struct sdp_srcah *)(h+1);
		struct rx_srcavail_state *rx_sa;
		
		ssk->srcavail_cancel_mseq = 0;

		ssk->rx_sa = rx_sa = RX_SRCAVAIL_STATE(mb) = kzalloc(
				sizeof(struct rx_srcavail_state), M_NOWAIT);

		rx_sa->mseq = ntohl(h->mseq);
		rx_sa->used = 0;
		rx_sa->len = mb_len = ntohl(srcah->len);
		rx_sa->rkey = ntohl(srcah->rkey);
		rx_sa->vaddr = be64_to_cpu(srcah->vaddr);
		rx_sa->flags = 0;

		if (ssk->tx_sa) {
			sdp_dbg_data(ssk->socket, "got RX SrcAvail while waiting "
					"for TX SrcAvail. waking up TX SrcAvail"
					"to be aborted\n");
			wake_up(sk->sk_sleep);
		}

		atomic_add(mb->len, &ssk->rcv_nxt);
		sdp_dbg_data(sk, "queueing SrcAvail. mb_len = %d vaddr = %lld\n",
			mb_len, rx_sa->vaddr);
	} else
#endif
	{
		atomic_add(mb->m_pkthdr.len, &ssk->rcv_nxt);
	}

	m_adj(mb, SDP_HEAD_SIZE);
	SOCKBUF_LOCK(&sk->so_rcv);
	if (unlikely(h->flags & SDP_OOB_PRES))
		sdp_urg(ssk, mb);
	sbappend_locked(&sk->so_rcv, mb);
	sorwakeup_locked(sk);
	return mb;
}

static int
sdp_get_recv_bytes(struct sdp_sock *ssk, u32 new_size)
{

	return MIN(new_size, SDP_MAX_PACKET);
}

int
sdp_init_buffers(struct sdp_sock *ssk, u32 new_size)
{

	ssk->recv_bytes = sdp_get_recv_bytes(ssk, new_size);
	sdp_post_recvs(ssk);

	return 0;
}

int
sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size)
{
	u32 curr_size = ssk->recv_bytes;
	u32 max_size = SDP_MAX_PACKET;

	if (new_size > curr_size && new_size <= max_size) {
		ssk->recv_bytes = sdp_get_recv_bytes(ssk, new_size);
		return 0;
	}
	return -1;
}

static void
sdp_handle_resize_request(struct sdp_sock *ssk, struct sdp_chrecvbuf *buf)
{
	if (sdp_resize_buffers(ssk, ntohl(buf->size)) == 0)
		ssk->recv_request_head = ring_head(ssk->rx_ring) + 1;
	else
		ssk->recv_request_head = ring_tail(ssk->rx_ring);
	ssk->recv_request = 1;
}

static void
sdp_handle_resize_ack(struct sdp_sock *ssk, struct sdp_chrecvbuf *buf)
{
	u32 new_size = ntohl(buf->size);

	if (new_size > ssk->xmit_size_goal)
		ssk->xmit_size_goal = new_size;
}

static struct mbuf *
sdp_recv_completion(struct sdp_sock *ssk, int id)
{
	struct sdp_buf *rx_req;
	struct ib_device *dev;
	struct mbuf *mb;

	if (unlikely(id != ring_tail(ssk->rx_ring))) {
		printk(KERN_WARNING "Bogus recv completion id %d tail %d\n",
			id, ring_tail(ssk->rx_ring));
		return NULL;
	}

	dev = ssk->ib_device;
	rx_req = &ssk->rx_ring.buffer[id & (SDP_RX_SIZE - 1)];
	mb = rx_req->mb;
	sdp_cleanup_sdp_buf(ssk, rx_req, DMA_FROM_DEVICE);

	atomic_inc(&ssk->rx_ring.tail);
	atomic_dec(&ssk->remote_credits);
	return mb;
}

/* socket lock should be taken before calling this */
static int
sdp_process_rx_ctl_mb(struct sdp_sock *ssk, struct mbuf *mb)
{
	struct sdp_bsdh *h;
	struct socket *sk;

	SDP_WLOCK_ASSERT(ssk);
	sk = ssk->socket;
 	h = mtod(mb, struct sdp_bsdh *);
	switch (h->mid) {
	case SDP_MID_DATA:
	case SDP_MID_SRCAVAIL:
		sdp_dbg(sk, "DATA after socket rcv was shutdown\n");

		/* got data in RCV_SHUTDOWN */
		if (ssk->state == TCPS_FIN_WAIT_1) {
			sdp_dbg(sk, "RX data when state = FIN_WAIT1\n");
			sdp_notify(ssk, ECONNRESET);
		}
		m_freem(mb);

		break;
#ifdef SDP_ZCOPY
	case SDP_MID_RDMARDCOMPL:
		m_freem(mb);
		break;
	case SDP_MID_SENDSM:
		sdp_handle_sendsm(ssk, ntohl(h->mseq_ack));
		m_freem(mb);
		break;
	case SDP_MID_SRCAVAIL_CANCEL:
		sdp_dbg_data(sk, "Handling SrcAvailCancel\n");
		sdp_prf(sk, NULL, "Handling SrcAvailCancel");
		if (ssk->rx_sa) {
			ssk->srcavail_cancel_mseq = ntohl(h->mseq);
			ssk->rx_sa->flags |= RX_SA_ABORTED;
			ssk->rx_sa = NULL; /* TODO: change it into SDP_MID_DATA and get 
			                      the dirty logic from recvmsg */
		} else {
			sdp_dbg(sk, "Got SrcAvailCancel - "
					"but no SrcAvail in process\n");
		}
		m_freem(mb);
		break;
	case SDP_MID_SINKAVAIL:
		sdp_dbg_data(sk, "Got SinkAvail - not supported: ignored\n");
		sdp_prf(sk, NULL, "Got SinkAvail - not supported: ignored");
		/* FALLTHROUGH */
#endif
	case SDP_MID_ABORT:
		sdp_dbg_data(sk, "Handling ABORT\n");
		sdp_prf(sk, NULL, "Handling ABORT");
		sdp_notify(ssk, ECONNRESET);
		m_freem(mb);
		break;
	case SDP_MID_DISCONN:
		sdp_dbg_data(sk, "Handling DISCONN\n");
		sdp_prf(sk, NULL, "Handling DISCONN");
		sdp_handle_disconn(ssk);
		break;
	case SDP_MID_CHRCVBUF:
		sdp_dbg_data(sk, "Handling RX CHRCVBUF\n");
		sdp_handle_resize_request(ssk, (struct sdp_chrecvbuf *)(h+1));
		m_freem(mb);
		break;
	case SDP_MID_CHRCVBUF_ACK:
		sdp_dbg_data(sk, "Handling RX CHRCVBUF_ACK\n");
		sdp_handle_resize_ack(ssk, (struct sdp_chrecvbuf *)(h+1));
		m_freem(mb);
		break;
	default:
		/* TODO: Handle other messages */
		sdp_warn(sk, "SDP: FIXME MID %d\n", h->mid);
		m_freem(mb);
	}

	return 0;
}

static int
sdp_process_rx_mb(struct sdp_sock *ssk, struct mbuf *mb)
{
	struct socket *sk;
	struct sdp_bsdh *h;
	unsigned long mseq_ack;
	int credits_before;

	h = mtod(mb, struct sdp_bsdh *);
	sk = ssk->socket;
	/*
	 * If another thread is in so_pcbfree this may be partially torn
	 * down but no further synchronization is required as the destroying
	 * thread will wait for receive to shutdown before discarding the
	 * socket.
	 */
	if (sk == NULL) {
		m_freem(mb);
		return 0;
	}

	SDPSTATS_HIST_LINEAR(credits_before_update, tx_credits(ssk));

	mseq_ack = ntohl(h->mseq_ack);
	credits_before = tx_credits(ssk);
	atomic_set(&ssk->tx_ring.credits, mseq_ack - ring_head(ssk->tx_ring) +
			1 + ntohs(h->bufs));
	if (mseq_ack >= ssk->nagle_last_unacked)
		ssk->nagle_last_unacked = 0;

	sdp_prf1(ssk->socket, mb, "RX %s +%d c:%d->%d mseq:%d ack:%d\n",
		mid2str(h->mid), ntohs(h->bufs), credits_before,
		tx_credits(ssk), ntohl(h->mseq), ntohl(h->mseq_ack));

	if (unlikely(h->mid == SDP_MID_DATA &&
	    mb->m_pkthdr.len == SDP_HEAD_SIZE)) {
		/* Credit update is valid even after RCV_SHUTDOWN */
		m_freem(mb);
		return 0;
	}

	if ((h->mid != SDP_MID_DATA && h->mid != SDP_MID_SRCAVAIL) ||
	    TCPS_HAVERCVDFIN(ssk->state)) {
		sdp_prf(sk, NULL, "Control mb - queing to control queue");
#ifdef SDP_ZCOPY
		if (h->mid == SDP_MID_SRCAVAIL_CANCEL) {
			sdp_dbg_data(sk, "Got SrcAvailCancel. "
					"seq: 0x%d seq_ack: 0x%d\n",
					ntohl(h->mseq), ntohl(h->mseq_ack));
			ssk->srcavail_cancel_mseq = ntohl(h->mseq);
		}


		if (h->mid == SDP_MID_RDMARDCOMPL) {
			struct sdp_rrch *rrch = (struct sdp_rrch *)(h+1);
			sdp_dbg_data(sk, "RdmaRdCompl message arrived\n");
			sdp_handle_rdma_read_compl(ssk, ntohl(h->mseq_ack),
					ntohl(rrch->len));
		}
#endif
		mb->m_nextpkt = NULL;
		if (ssk->rx_ctl_tail)
			ssk->rx_ctl_tail->m_nextpkt = mb;
		else
			ssk->rx_ctl_q = mb;
		ssk->rx_ctl_tail = mb;

		return 0;
	}

	sdp_prf1(sk, NULL, "queueing %s mb\n", mid2str(h->mid));
	mb = sdp_sock_queue_rcv_mb(sk, mb);


	return 0;
}

/* called only from irq */
static struct mbuf *
sdp_process_rx_wc(struct sdp_sock *ssk, struct ib_wc *wc)
{
	struct mbuf *mb;
	struct sdp_bsdh *h;
	struct socket *sk = ssk->socket;
	int mseq;

	mb = sdp_recv_completion(ssk, wc->wr_id);
	if (unlikely(!mb))
		return NULL;

	if (unlikely(wc->status)) {
		if (ssk->qp_active && sk) {
			sdp_dbg(sk, "Recv completion with error. "
					"Status %d, vendor: %d\n",
				wc->status, wc->vendor_err);
			sdp_abort(sk);
			ssk->qp_active = 0;
		}
		m_freem(mb);
		return NULL;
	}

	sdp_dbg_data(sk, "Recv completion. ID %d Length %d\n",
			(int)wc->wr_id, wc->byte_len);
	if (unlikely(wc->byte_len < sizeof(struct sdp_bsdh))) {
		sdp_warn(sk, "SDP BUG! byte_len %d < %zd\n",
				wc->byte_len, sizeof(struct sdp_bsdh));
		m_freem(mb);
		return NULL;
	}
	/* Use m_adj to trim the tail of data we didn't use. */
	m_adj(mb, -(mb->m_pkthdr.len - wc->byte_len));
	h = mtod(mb, struct sdp_bsdh *);

	SDP_DUMP_PACKET(ssk->socket, "RX", mb, h);

	ssk->rx_packets++;
	ssk->rx_bytes += mb->m_pkthdr.len;

	mseq = ntohl(h->mseq);
	atomic_set(&ssk->mseq_ack, mseq);
	if (mseq != (int)wc->wr_id)
		sdp_warn(sk, "SDP BUG! mseq %d != wrid %d\n",
				mseq, (int)wc->wr_id);

	return mb;
}

/* Wakeup writers if we now have credits. */
static void
sdp_bzcopy_write_space(struct sdp_sock *ssk)
{
	struct socket *sk = ssk->socket;

	if (tx_credits(ssk) >= ssk->min_bufs && sk)
		sowwakeup(sk);
}

/* only from interrupt. */
static int
sdp_poll_rx_cq(struct sdp_sock *ssk)
{
	struct ib_cq *cq = ssk->rx_ring.cq;
	struct ib_wc ibwc[SDP_NUM_WC];
	int n, i;
	int wc_processed = 0;
	struct mbuf *mb;

	do {
		n = ib_poll_cq(cq, SDP_NUM_WC, ibwc);
		for (i = 0; i < n; ++i) {
			struct ib_wc *wc = &ibwc[i];

			BUG_ON(!(wc->wr_id & SDP_OP_RECV));
			mb = sdp_process_rx_wc(ssk, wc);
			if (!mb)
				continue;

			sdp_process_rx_mb(ssk, mb);
			wc_processed++;
		}
	} while (n == SDP_NUM_WC);

	if (wc_processed)
		sdp_bzcopy_write_space(ssk);

	return wc_processed;
}

static void
sdp_rx_comp_work(struct work_struct *work)
{
	struct sdp_sock *ssk = container_of(work, struct sdp_sock,
			rx_comp_work);

	sdp_prf(ssk->socket, NULL, "%s", __func__);

	SDP_WLOCK(ssk);
	if (unlikely(!ssk->qp)) {
		sdp_prf(ssk->socket, NULL, "qp was destroyed");
		goto out;
	}
	if (unlikely(!ssk->rx_ring.cq)) {
		sdp_prf(ssk->socket, NULL, "rx_ring.cq is NULL");
		goto out;
	}

	if (unlikely(!ssk->poll_cq)) {
		struct rdma_cm_id *id = ssk->id;
		if (id && id->qp)
			rdma_notify(id, RDMA_CM_EVENT_ESTABLISHED);
		goto out;
	}

	sdp_do_posts(ssk);
out:
	SDP_WUNLOCK(ssk);
}

void
sdp_do_posts(struct sdp_sock *ssk)
{
	struct socket *sk = ssk->socket;
	int xmit_poll_force;
	struct mbuf *mb;

	SDP_WLOCK_ASSERT(ssk);
	if (!ssk->qp_active) {
		sdp_dbg(sk, "QP is deactivated\n");
		return;
	}

	while ((mb = ssk->rx_ctl_q)) {
		ssk->rx_ctl_q = mb->m_nextpkt;
		mb->m_nextpkt = NULL;
		sdp_process_rx_ctl_mb(ssk, mb);
	}

	if (ssk->state == TCPS_TIME_WAIT)
		return;

	if (!ssk->rx_ring.cq || !ssk->tx_ring.cq)
		return;

	sdp_post_recvs(ssk);

	if (tx_ring_posted(ssk))
		sdp_xmit_poll(ssk, 1);

	sdp_post_sends(ssk, M_NOWAIT);

	xmit_poll_force = tx_credits(ssk) < SDP_MIN_TX_CREDITS;

	if (credit_update_needed(ssk) || xmit_poll_force) {
		/* if has pending tx because run out of tx_credits - xmit it */
		sdp_prf(sk, NULL, "Processing to free pending sends");
		sdp_xmit_poll(ssk,  xmit_poll_force);
		sdp_prf(sk, NULL, "Sending credit update");
		sdp_post_sends(ssk, M_NOWAIT);
	}

}

int
sdp_process_rx(struct sdp_sock *ssk)
{
	int wc_processed = 0;
	int credits_before;

	if (!rx_ring_trylock(&ssk->rx_ring)) {
		sdp_dbg(ssk->socket, "ring destroyed. not polling it\n");
		return 0;
	}

	credits_before = tx_credits(ssk);

	wc_processed = sdp_poll_rx_cq(ssk);
	sdp_prf(ssk->socket, NULL, "processed %d", wc_processed);

	if (wc_processed) {
		sdp_prf(ssk->socket, NULL, "credits:  %d -> %d",
				credits_before, tx_credits(ssk));
		queue_work(rx_comp_wq, &ssk->rx_comp_work);
	}
	sdp_arm_rx_cq(ssk);

	rx_ring_unlock(&ssk->rx_ring);

	return (wc_processed);
}

static void
sdp_rx_irq(struct ib_cq *cq, void *cq_context)
{
	struct socket *sk = cq_context;
	struct sdp_sock *ssk = sdp_sk(sk);

	if (cq != ssk->rx_ring.cq) {
		sdp_dbg(sk, "cq = %p, ssk->cq = %p\n", cq, ssk->rx_ring.cq);
		return;
	}

	SDPSTATS_COUNTER_INC(rx_int_count);

	sdp_prf(sk, NULL, "rx irq");

	sdp_process_rx(ssk);
}

static
void sdp_rx_ring_purge(struct sdp_sock *ssk)
{
	while (rx_ring_posted(ssk) > 0) {
		struct mbuf *mb;
		mb = sdp_recv_completion(ssk, ring_tail(ssk->rx_ring));
		if (!mb)
			break;
		m_freem(mb);
	}
}

void
sdp_rx_ring_init(struct sdp_sock *ssk)
{
	ssk->rx_ring.buffer = NULL;
	ssk->rx_ring.destroyed = 0;
	rw_init(&ssk->rx_ring.destroyed_lock, "sdp rx lock");
}

static void
sdp_rx_cq_event_handler(struct ib_event *event, void *data)
{
}

int
sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device)
{
	struct ib_cq *rx_cq;
	int rc = 0;


	sdp_dbg(ssk->socket, "rx ring created");
	INIT_WORK(&ssk->rx_comp_work, sdp_rx_comp_work);
	atomic_set(&ssk->rx_ring.head, 1);
	atomic_set(&ssk->rx_ring.tail, 1);

	ssk->rx_ring.buffer = kmalloc(
			sizeof *ssk->rx_ring.buffer * SDP_RX_SIZE, GFP_KERNEL);
	if (!ssk->rx_ring.buffer) {
		sdp_warn(ssk->socket,
			"Unable to allocate RX Ring size %zd.\n",
			 sizeof(*ssk->rx_ring.buffer) * SDP_RX_SIZE);

		return -ENOMEM;
	}

	rx_cq = ib_create_cq(device, sdp_rx_irq, sdp_rx_cq_event_handler,
			  ssk->socket, SDP_RX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED);

	if (IS_ERR(rx_cq)) {
		rc = PTR_ERR(rx_cq);
		sdp_warn(ssk->socket, "Unable to allocate RX CQ: %d.\n", rc);
		goto err_cq;
	}

	sdp_sk(ssk->socket)->rx_ring.cq = rx_cq;
	sdp_arm_rx_cq(ssk);

	return 0;

err_cq:
	kfree(ssk->rx_ring.buffer);
	ssk->rx_ring.buffer = NULL;
	return rc;
}

void
sdp_rx_ring_destroy(struct sdp_sock *ssk)
{

	cancel_work_sync(&ssk->rx_comp_work);
	rx_ring_destroy_lock(&ssk->rx_ring);

	if (ssk->rx_ring.buffer) {
		sdp_rx_ring_purge(ssk);

		kfree(ssk->rx_ring.buffer);
		ssk->rx_ring.buffer = NULL;
	}

	if (ssk->rx_ring.cq) {
		if (ib_destroy_cq(ssk->rx_ring.cq)) {
			sdp_warn(ssk->socket, "destroy cq(%p) failed\n",
				ssk->rx_ring.cq);
		} else {
			ssk->rx_ring.cq = NULL;
		}
	}

	WARN_ON(ring_head(ssk->rx_ring) != ring_tail(ssk->rx_ring));
}

Man Man