Current Path : /sys/contrib/rdma/ |
FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64 |
Current File : //sys/contrib/rdma/rdma_iwcm.c |
/* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <sys/cdefs.h> __FBSDID("$FreeBSD: release/9.1.0/sys/contrib/rdma/rdma_iwcm.c 178784 2008-05-05 18:35:55Z kmacy $"); #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/kernel.h> #include <sys/libkern.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/module.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/rwlock.h> #include <sys/queue.h> #include <sys/taskqueue.h> #include <sys/priv.h> #include <sys/syslog.h> #include <sys/malloc.h> #include <netinet/in.h> #include <netinet/in_pcb.h> #include <contrib/rdma/iw_cm.h> enum iw_cm_state { IW_CM_STATE_IDLE, /* unbound, inactive */ IW_CM_STATE_LISTEN, /* listen waiting for connect */ IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */ IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */ IW_CM_STATE_ESTABLISHED, /* established */ IW_CM_STATE_CLOSING, /* disconnect */ IW_CM_STATE_DESTROYING /* object being deleted */ }; struct iwcm_id_private { struct iw_cm_id id; enum iw_cm_state state; unsigned long flags; struct ib_qp *qp; void * destroy_comp; void * connect_wait; TAILQ_HEAD(, iwcm_work) work_list; struct mtx lock; volatile int refcount; TAILQ_HEAD(, iwcm_work) work_free_list; }; #define IWCM_F_CALLBACK_DESTROY 1 #define IWCM_F_CONNECT_WAIT 2 static struct taskqueue *iwcm_wq; struct iwcm_work { struct task task; struct iwcm_id_private *cm_id; TAILQ_ENTRY(iwcm_work) list; struct iw_cm_event event; TAILQ_ENTRY(iwcm_work) free_list; }; /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: * LISTENING IDS: Get enough elements preallocated to handle the * listen backlog. * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE * * Allocating them in connect and listen avoids having to deal * with allocation failures on the event upcall from the provider (which * is called in the interrupt context). * * One exception is when creating the cm_id for incoming connection requests. * There are two cases: * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If * the backlog is exceeded, then no more connection request events will * be processed. cm_event_handler() returns ENOMEM in this case. Its up * to the provider to reject the connection request. * 2) in the connection request workqueue handler, cm_conn_req_handler(). * If work elements cannot be allocated for the new connect request cm_id, * then IWCM will call the provider reject method. This is ok since * cm_conn_req_handler() runs in the workqueue thread context. */ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) { struct iwcm_work *work; if (TAILQ_EMPTY(&cm_id_priv->work_free_list)) return NULL; work = TAILQ_FIRST(&cm_id_priv->work_free_list); TAILQ_REMOVE(&cm_id_priv->work_free_list, work, free_list); return work; } static void put_work(struct iwcm_work *work) { TAILQ_INSERT_HEAD(&work->cm_id->work_free_list, work, free_list); } static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct iwcm_work *e, *tmp; TAILQ_FOREACH_SAFE(e, &cm_id_priv->work_free_list, free_list, tmp) free(e, M_DEVBUF); } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) { struct iwcm_work *work; PANIC_IF(!TAILQ_EMPTY(&cm_id_priv->work_free_list)); while (count--) { work = malloc(sizeof(struct iwcm_work), M_DEVBUF, M_NOWAIT); if (!work) { dealloc_work_entries(cm_id_priv); return (ENOMEM); } work->cm_id = cm_id_priv; put_work(work); } return 0; } /* * Save private data from incoming connection requests to * iw_cm_event, so the low level driver doesn't have to. Adjust * the event ptr to point to the local copy. */ static int copy_private_data(struct iw_cm_event *event) { void *p; p = malloc(event->private_data_len, M_DEVBUF, M_NOWAIT); if (!p) return (ENOMEM); bcopy(event->private_data, p, event->private_data_len); event->private_data = p; return 0; } static void free_cm_id(struct iwcm_id_private *cm_id_priv) { dealloc_work_entries(cm_id_priv); free(cm_id_priv, M_DEVBUF); } /* * Release a reference on cm_id. If the last reference is being * released, enable the waiting thread (in iw_destroy_cm_id) to * get woken up, and return 1 if a thread is already waiting. */ static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { mtx_lock(&cm_id_priv->lock); PANIC_IF(atomic_load_acq_int(&cm_id_priv->refcount)==0); if (atomic_fetchadd_int(&cm_id_priv->refcount, -1) == 1) { PANIC_IF(!TAILQ_EMPTY(&cm_id_priv->work_list)); wakeup(&cm_id_priv->destroy_comp); mtx_unlock(&cm_id_priv->lock); return 1; } mtx_unlock(&cm_id_priv->lock); return 0; } static void add_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); mtx_lock(&cm_id_priv->lock); atomic_add_int(&cm_id_priv->refcount, 1); mtx_unlock(&cm_id_priv->lock); } static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); if (iwcm_deref_id(cm_id_priv) && isset(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY)) { PANIC_IF(!TAILQ_EMPTY(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); } } static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); struct iw_cm_id *iw_create_cm_id(struct ib_device *device, struct socket *so, iw_cm_handler cm_handler, void *context) { struct iwcm_id_private *cm_id_priv; KASSERT(so, ("iw_create_cm_id called with NULL socket!")); cm_id_priv = malloc(sizeof(*cm_id_priv), M_DEVBUF, M_NOWAIT); if (!cm_id_priv) return ERR_PTR(ENOMEM); bzero(cm_id_priv, sizeof *cm_id_priv); cm_id_priv->state = IW_CM_STATE_IDLE; cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.event_handler = cm_event_handler; cm_id_priv->id.add_ref = add_ref; cm_id_priv->id.rem_ref = rem_ref; cm_id_priv->id.so = so; mtx_init(&cm_id_priv->lock, "cm_id_priv", NULL, MTX_DUPOK|MTX_DEF); atomic_store_rel_int(&cm_id_priv->refcount, 1); TAILQ_INIT(&cm_id_priv->work_list); TAILQ_INIT(&cm_id_priv->work_free_list); return &cm_id_priv->id; } static int iwcm_modify_qp_err(struct ib_qp *qp) { struct ib_qp_attr qp_attr; if (!qp) return (EINVAL); qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * This is really the RDMAC CLOSING state. It is most similar to the * IB SQD QP state. */ static int iwcm_modify_qp_sqd(struct ib_qp *qp) { struct ib_qp_attr qp_attr; PANIC_IF(qp == NULL); qp_attr.qp_state = IB_QPS_SQD; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * CM_ID <-- CLOSING * * Block if a passive or active connection is currently being processed. Then * process the event as follows: * - If we are ESTABLISHED, move to CLOSING and modify the QP state * based on the abrupt flag * - If the connection is already in the CLOSING or IDLE state, the peer is * disconnecting concurrently with us and we've already seen the * DISCONNECT event -- ignore the request and return 0 * - Disconnect on a listening endpoint returns EINVAL */ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) { struct iwcm_id_private *cm_id_priv; int ret = 0; struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* Wait if we're currently in a connect or accept downcall */ mtx_lock(&cm_id_priv->lock); if (isset(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT)) msleep(&cm_id_priv->connect_wait, &cm_id_priv->lock, 0, "iwcm connect1", 0); switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_CLOSING; /* QP could be <nul> for user-mode client */ if (cm_id_priv->qp) qp = cm_id_priv->qp; else ret = EINVAL; break; case IW_CM_STATE_LISTEN: ret = EINVAL; break; case IW_CM_STATE_CLOSING: /* remote peer closed first */ case IW_CM_STATE_IDLE: /* accept or connect returned !0 */ break; case IW_CM_STATE_CONN_RECV: /* * App called disconnect before/without calling accept after * connect_request event delivered. */ break; case IW_CM_STATE_CONN_SENT: /* Can only get here if wait above fails */ default: panic("just cuz"); } mtx_unlock(&cm_id_priv->lock); if (qp) { if (abrupt) ret = iwcm_modify_qp_err(qp); else ret = iwcm_modify_qp_sqd(qp); /* * If both sides are disconnecting the QP could * already be in ERR or SQD states */ ret = 0; } return ret; } /* * CM_ID <-- DESTROYING * * Clean up all resources associated with the connection and release * the initial reference taken by iw_create_cm_id. */ static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* * Wait if we're currently in a connect or accept downcall. A * listening endpoint should never block here. */ mtx_lock(&cm_id_priv->lock); if (isset(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT)) msleep(&cm_id_priv->connect_wait, &cm_id_priv->lock, 0, "iwcm connect2", 0); switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; mtx_unlock(&cm_id_priv->lock); /* destroy the listening endpoint */ ret = cm_id->device->iwcm->destroy_listen(cm_id); mtx_lock(&cm_id_priv->lock); break; case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_DESTROYING; mtx_unlock(&cm_id_priv->lock); /* Abrupt close of the connection */ (void)iwcm_modify_qp_err(cm_id_priv->qp); mtx_lock(&cm_id_priv->lock); break; case IW_CM_STATE_IDLE: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_DESTROYING; break; case IW_CM_STATE_CONN_RECV: /* * App called destroy before/without calling accept after * receiving connection request event notification or * returned non zero from the event callback function. * In either case, must tell the provider to reject. */ cm_id_priv->state = IW_CM_STATE_DESTROYING; break; case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_DESTROYING: default: panic("just cuz"); break; } if (cm_id_priv->qp) { cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } mtx_unlock(&cm_id_priv->lock); (void)iwcm_deref_id(cm_id_priv); } /* * This function is only called by the application thread and cannot * be called by the event thread. The function will wait for all * references to be released on the cm_id and then free the cm_id * object. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); PANIC_IF(isset(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY)); destroy_cm_id(cm_id); mtx_lock(&cm_id_priv->lock); if (atomic_load_acq_int(&cm_id_priv->refcount)) msleep(&cm_id_priv->destroy_comp, &cm_id_priv->lock, 0, "iwcm destroy", 0); mtx_unlock(&cm_id_priv->lock); free_cm_id(cm_id_priv); } /* * CM_ID <-- LISTEN * * Start listening for connect requests. Generates one CONNECT_REQUEST * event for each inbound connect request. */ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) { struct iwcm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; mtx_lock(&cm_id_priv->lock); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; mtx_unlock(&cm_id_priv->lock); ret = cm_id->device->iwcm->create_listen(cm_id, backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; mtx_lock(&cm_id_priv->lock); break; default: ret = EINVAL; } mtx_unlock(&cm_id_priv->lock); return ret; } /* * CM_ID <-- IDLE * * Rejects an inbound connection request. No events are generated. */ int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct iwcm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); setbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); mtx_lock(&cm_id_priv->lock); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); wakeup(&cm_id_priv->connect_wait); mtx_unlock(&cm_id_priv->lock); return (EINVAL); } cm_id_priv->state = IW_CM_STATE_IDLE; mtx_unlock(&cm_id_priv->lock); ret = cm_id->device->iwcm->reject(cm_id, private_data, private_data_len); mtx_lock(&cm_id_priv->lock); clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); wakeup(&cm_id_priv->connect_wait); mtx_unlock(&cm_id_priv->lock); return ret; } /* * CM_ID <-- ESTABLISHED * * Accepts an inbound connection request and generates an ESTABLISHED * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block * until the ESTABLISHED event is received from the provider. */ int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); setbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); mtx_lock(&cm_id_priv->lock); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); wakeup(&cm_id_priv->connect_wait); mtx_unlock(&cm_id_priv->lock); return (EINVAL); } /* Get the ib_qp given the QPN */ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { mtx_unlock(&cm_id_priv->lock); return (EINVAL); } cm_id->device->iwcm->add_ref(qp); cm_id_priv->qp = qp; mtx_unlock(&cm_id_priv->lock); ret = cm_id->device->iwcm->accept(cm_id, iw_param); if (ret) { /* An error on accept precludes provider events */ PANIC_IF(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; mtx_lock(&cm_id_priv->lock); if (cm_id_priv->qp) { cm_id->device->iwcm->rem_ref(qp); cm_id_priv->qp = NULL; } clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); wakeup(&cm_id_priv->connect_wait); mtx_unlock(&cm_id_priv->lock); } return ret; } /* * Active Side: CM_ID <-- CONN_SENT * * If successful, results in the generation of a CONNECT_REPLY * event. iw_cm_disconnect and iw_cm_destroy will block until the * CONNECT_REPLY event is received from the provider. */ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; int ret; struct ib_qp *qp; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); ret = alloc_work_entries(cm_id_priv, 4); if (ret) return ret; setbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); mtx_lock(&cm_id_priv->lock); if (cm_id_priv->state != IW_CM_STATE_IDLE) { clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); wakeup(&cm_id_priv->connect_wait); mtx_unlock(&cm_id_priv->lock); return (EINVAL); } /* Get the ib_qp given the QPN */ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { mtx_unlock(&cm_id_priv->lock); return (EINVAL); } cm_id->device->iwcm->add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; mtx_unlock(&cm_id_priv->lock); ret = cm_id->device->iwcm->connect(cm_id, iw_param); if (ret) { mtx_lock(&cm_id_priv->lock); if (cm_id_priv->qp) { cm_id->device->iwcm->rem_ref(qp); cm_id_priv->qp = NULL; } PANIC_IF(cm_id_priv->state != IW_CM_STATE_CONN_SENT); cm_id_priv->state = IW_CM_STATE_IDLE; clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); wakeup(&cm_id_priv->connect_wait); mtx_unlock(&cm_id_priv->lock); } return ret; } /* * Passive Side: new CM_ID <-- CONN_RECV * * Handles an inbound connect request. The function creates a new * iw_cm_id to represent the new connection and inherits the client * callback function and other attributes from the listening parent. * * The work item contains a pointer to the listen_cm_id and the event. The * listen_cm_id contains the client cm_handler, context and * device. These are copied when the device is cloned. The event * contains the new four tuple. * * An error on the child should not affect the parent, so this * function does not return a value. */ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, struct iw_cm_event *iw_event) { struct iw_cm_id *cm_id; struct iwcm_id_private *cm_id_priv; int ret; /* * The provider should never generate a connection request * event with a bad status. */ PANIC_IF(iw_event->status); /* * We could be destroying the listening id. If so, ignore this * upcall. */ mtx_lock(&listen_id_priv->lock); if (listen_id_priv->state != IW_CM_STATE_LISTEN) { mtx_unlock(&listen_id_priv->lock); goto out; } mtx_unlock(&listen_id_priv->lock); cm_id = iw_create_cm_id(listen_id_priv->id.device, iw_event->so, listen_id_priv->id.cm_handler, listen_id_priv->id.context); /* If the cm_id could not be created, ignore the request */ if (IS_ERR(cm_id)) goto out; cm_id->provider_data = iw_event->provider_data; cm_id->local_addr = iw_event->local_addr; cm_id->remote_addr = iw_event->remote_addr; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; ret = alloc_work_entries(cm_id_priv, 3); if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); goto out; } /* Call the client CM handler */ ret = cm_id->cm_handler(cm_id, iw_event); if (ret) { iw_cm_reject(cm_id, NULL, 0); setbit(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY); destroy_cm_id(cm_id); if (atomic_load_acq_int(&cm_id_priv->refcount)==0) free_cm_id(cm_id_priv); } out: if (iw_event->private_data_len) free(iw_event->private_data, M_DEVBUF); } /* * Passive Side: CM_ID <-- ESTABLISHED * * The provider generated an ESTABLISHED event which means that * the MPA negotion has completed successfully and we are now in MPA * FPDU mode. * * This event can only be received in the CONN_RECV state. If the * remote peer closed, the ESTABLISHED event would be received followed * by the CLOSE event. If the app closes, it will block until we wake * it up after processing this event. */ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { int ret; mtx_lock(&cm_id_priv->lock); /* * We clear the CONNECT_WAIT bit here to allow the callback * function to call iw_cm_disconnect. Calling iw_destroy_cm_id * from a callback handler is not allowed. */ clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); PANIC_IF(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_ESTABLISHED; ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); wakeup(&cm_id_priv->connect_wait); mtx_unlock(&cm_id_priv->lock); return ret; } /* * Active Side: CM_ID <-- ESTABLISHED * * The app has called connect and is waiting for the established event to * post it's requests to the server. This event will wake up anyone * blocked in iw_cm_disconnect or iw_destroy_id. */ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { int ret; mtx_lock(&cm_id_priv->lock); /* * Clear the connect wait bit so a callback function calling * iw_cm_disconnect will not wait and deadlock this thread */ clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); PANIC_IF(cm_id_priv->state != IW_CM_STATE_CONN_SENT); if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) { cm_id_priv->id.local_addr = iw_event->local_addr; cm_id_priv->id.remote_addr = iw_event->remote_addr; cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } mtx_unlock(&cm_id_priv->lock); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); mtx_lock(&cm_id_priv->lock); if (iw_event->private_data_len) free(iw_event->private_data, M_DEVBUF); /* Wake up waiters on connect complete */ wakeup(&cm_id_priv->connect_wait); mtx_unlock(&cm_id_priv->lock); return ret; } /* * CM_ID <-- CLOSING * * If in the ESTABLISHED state, move to CLOSING. */ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { mtx_lock(&cm_id_priv->lock); if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) cm_id_priv->state = IW_CM_STATE_CLOSING; mtx_unlock(&cm_id_priv->lock); } /* * CM_ID <-- IDLE * * If in the ESTBLISHED or CLOSING states, the QP will have have been * moved by the provider to the ERR state. Disassociate the CM_ID from * the QP, move to IDLE, and remove the 'connected' reference. * * If in some other state, the cm_id was destroyed asynchronously. * This is the last reference that will result in waking up * the app thread blocked in iw_destroy_cm_id. */ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { int ret = 0; mtx_lock(&cm_id_priv->lock); if (cm_id_priv->qp) { cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; mtx_unlock(&cm_id_priv->lock); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); mtx_lock(&cm_id_priv->lock); break; case IW_CM_STATE_DESTROYING: break; default: panic("just cuz"); } mtx_unlock(&cm_id_priv->lock); return ret; } static int process_event(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { int ret = 0; switch (iw_event->event) { case IW_CM_EVENT_CONNECT_REQUEST: cm_conn_req_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CONNECT_REPLY: ret = cm_conn_rep_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_ESTABLISHED: ret = cm_conn_est_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_DISCONNECT: cm_disconnect_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CLOSE: ret = cm_close_handler(cm_id_priv, iw_event); break; default: panic("just cuz"); } return ret; } /* * Process events on the work_list for the cm_id. If the callback * function requests that the cm_id be deleted, a flag is set in the * cm_id flags to indicate that when the last reference is * removed, the cm_id is to be destroyed. This is necessary to * distinguish between an object that will be destroyed by the app * thread asleep on the destroy_comp list vs. an object destroyed * here synchronously when the last reference is removed. */ static void cm_work_handler(void *context, int pending) { struct iwcm_work *work = context; struct iw_cm_event levent; struct iwcm_id_private *cm_id_priv = work->cm_id; int empty; int ret = 0; mtx_lock(&cm_id_priv->lock); empty = TAILQ_EMPTY(&cm_id_priv->work_list); while (!empty) { work = TAILQ_FIRST(&cm_id_priv->work_list); TAILQ_REMOVE(&cm_id_priv->work_list, work, list); empty = TAILQ_EMPTY(&cm_id_priv->work_list); levent = work->event; put_work(work); mtx_unlock(&cm_id_priv->lock); ret = process_event(cm_id_priv, &levent); if (ret) { setbit(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY); destroy_cm_id(&cm_id_priv->id); } PANIC_IF(atomic_load_acq_int(&cm_id_priv->refcount)==0); if (iwcm_deref_id(cm_id_priv)) { if (isset(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY)) { PANIC_IF(!TAILQ_EMPTY(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); } return; } mtx_lock(&cm_id_priv->lock); } mtx_unlock(&cm_id_priv->lock); } /* * This function is called on interrupt context. Schedule events on * the iwcm_wq thread to allow callback functions to downcall into * the CM and/or block. Events are queued to a per-CM_ID * work_list. If this is the first event on the work_list, the work * element is also queued on the iwcm_wq thread. * * Each event holds a reference on the cm_id. Until the last posted * event has been delivered and processed, the cm_id cannot be * deleted. * * Returns: * 0 - the event was handled. * ENOMEM - the event was not handled due to lack of resources. */ static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { struct iwcm_work *work; struct iwcm_id_private *cm_id_priv; int ret = 0; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); mtx_lock(&cm_id_priv->lock); work = get_work(cm_id_priv); if (!work) { ret = ENOMEM; goto out; } TASK_INIT(&work->task, 0, cm_work_handler, work); work->cm_id = cm_id_priv; work->event = *iw_event; if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || work->event.event == IW_CM_EVENT_CONNECT_REPLY) && work->event.private_data_len) { ret = copy_private_data(&work->event); if (ret) { put_work(work); goto out; } } atomic_add_acq_int(&cm_id_priv->refcount, 1); if (TAILQ_EMPTY(&cm_id_priv->work_list)) { TAILQ_INSERT_TAIL(&cm_id_priv->work_list, work, list); taskqueue_enqueue(iwcm_wq, &work->task); } else TAILQ_INSERT_TAIL(&cm_id_priv->work_list, work, list); out: mtx_unlock(&cm_id_priv->lock); return ret; } static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { int ret; mtx_lock(&cm_id_priv->lock); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE| IB_ACCESS_REMOTE_READ; ret = 0; break; default: ret = EINVAL; break; } mtx_unlock(&cm_id_priv->lock); return ret; } static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { int ret; mtx_lock(&cm_id_priv->lock); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = 0; ret = 0; break; default: ret = EINVAL; break; } mtx_unlock(&cm_id_priv->lock); return ret; } int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct iwcm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); switch (qp_attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: ret = iwcm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTS: ret = iwcm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: ret = EINVAL; break; } return ret; } static int iw_cm_init(void) { iwcm_wq = taskqueue_create("iw_cm_wq", M_NOWAIT, taskqueue_thread_enqueue, &iwcm_wq); if (!iwcm_wq) return (ENOMEM); taskqueue_start_threads(&iwcm_wq, 1, PI_NET, "iw_cm_wq thread"); return 0; } static void iw_cm_cleanup(void) { taskqueue_free(iwcm_wq); } static int iw_cm_load(module_t mod, int cmd, void *arg) { int err = 0; switch (cmd) { case MOD_LOAD: printf("Loading rdma_iwcm.\n"); iw_cm_init(); break; case MOD_QUIESCE: break; case MOD_UNLOAD: printf("Unloading rdma_iwcm.\n"); iw_cm_cleanup(); break; case MOD_SHUTDOWN: break; default: err = EOPNOTSUPP; break; } return (err); } static moduledata_t mod_data = { "rdma_iwcm", iw_cm_load, 0 }; MODULE_VERSION(rdma_iwcm, 1); MODULE_DEPEND(rdma_iwcm, rdma_core, 1, 1, 1); DECLARE_MODULE(rdma_iwcm, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);