xref: /NextBSD/contrib/ofed/libmlx4/src/verbs.c (revision 65145fa4c81da358fcbc3b650156dab705dfa34e)
1 /*
2  * Copyright (c) 2007 Cisco, Inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #if HAVE_CONFIG_H
34 #  include <config.h>
35 #endif /* HAVE_CONFIG_H */
36 
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <pthread.h>
41 #include <errno.h>
42 #include <netinet/in.h>
43 
44 #include "mlx4.h"
45 #include "mlx4-abi.h"
46 #include "wqe.h"
47 
mlx4_query_device(struct ibv_context * context,struct ibv_device_attr * attr)48 int mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
49 {
50 	struct ibv_query_device cmd;
51 	uint64_t raw_fw_ver;
52 	unsigned major, minor, sub_minor;
53 	int ret;
54 
55 	ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd);
56 	if (ret)
57 		return ret;
58 
59 	major     = (raw_fw_ver >> 32) & 0xffff;
60 	minor     = (raw_fw_ver >> 16) & 0xffff;
61 	sub_minor = raw_fw_ver & 0xffff;
62 
63 	snprintf(attr->fw_ver, sizeof attr->fw_ver,
64 		 "%d.%d.%03d", major, minor, sub_minor);
65 
66 	return 0;
67 }
68 
mlx4_query_port(struct ibv_context * context,uint8_t port,struct ibv_port_attr * attr)69 int mlx4_query_port(struct ibv_context *context, uint8_t port,
70 		     struct ibv_port_attr *attr)
71 {
72 	struct ibv_query_port cmd;
73 
74 	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
75 }
76 
mlx4_alloc_pd(struct ibv_context * context)77 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context)
78 {
79 	struct ibv_alloc_pd       cmd;
80 	struct mlx4_alloc_pd_resp resp;
81 	struct mlx4_pd		 *pd;
82 
83 	pd = malloc(sizeof *pd);
84 	if (!pd)
85 		return NULL;
86 
87 	if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd,
88 			     &resp.ibv_resp, sizeof resp)) {
89 		free(pd);
90 		return NULL;
91 	}
92 
93 	pd->pdn = resp.pdn;
94 
95 	return &pd->ibv_pd;
96 }
97 
mlx4_free_pd(struct ibv_pd * pd)98 int mlx4_free_pd(struct ibv_pd *pd)
99 {
100 	int ret;
101 
102 	ret = ibv_cmd_dealloc_pd(pd);
103 	if (ret)
104 		return ret;
105 
106 	free(to_mpd(pd));
107 	return 0;
108 }
109 
mlx4_reg_mr(struct ibv_pd * pd,void * addr,size_t length,enum ibv_access_flags access)110 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
111 			   enum ibv_access_flags access)
112 {
113 	struct ibv_mr *mr;
114 	struct ibv_reg_mr cmd;
115 	int ret;
116 
117 	mr = malloc(sizeof *mr);
118 	if (!mr)
119 		return NULL;
120 
121 #ifdef IBV_CMD_REG_MR_HAS_RESP_PARAMS
122 	{
123 		struct ibv_reg_mr_resp resp;
124 
125 		ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
126 				     access, mr, &cmd, sizeof cmd,
127 				     &resp, sizeof resp);
128 	}
129 #else
130 	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, access, mr,
131 			     &cmd, sizeof cmd);
132 #endif
133 	if (ret) {
134 		free(mr);
135 		return NULL;
136 	}
137 
138 	return mr;
139 }
140 
mlx4_dereg_mr(struct ibv_mr * mr)141 int mlx4_dereg_mr(struct ibv_mr *mr)
142 {
143 	int ret;
144 
145 	ret = ibv_cmd_dereg_mr(mr);
146 	if (ret)
147 		return ret;
148 
149 	free(mr);
150 	return 0;
151 }
152 
align_queue_size(int req)153 static int align_queue_size(int req)
154 {
155 	int nent;
156 
157 	for (nent = 1; nent < req; nent <<= 1)
158 		; /* nothing */
159 
160 	return nent;
161 }
162 
mlx4_create_cq(struct ibv_context * context,int cqe,struct ibv_comp_channel * channel,int comp_vector)163 struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
164 			       struct ibv_comp_channel *channel,
165 			       int comp_vector)
166 {
167 	struct mlx4_create_cq      cmd;
168 	struct mlx4_create_cq_resp resp;
169 	struct mlx4_cq		  *cq;
170 	int			   ret;
171 	struct mlx4_context	   *mctx = to_mctx(context);
172 
173 	/* Sanity check CQ size before proceeding */
174 	if (cqe > 0x3fffff)
175 		return NULL;
176 
177 	cq = malloc(sizeof *cq);
178 	if (!cq)
179 		return NULL;
180 
181 	cq->cons_index = 0;
182 
183 	if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE))
184 		goto err;
185 
186 	cqe = align_queue_size(cqe + 1);
187 
188 	if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cqe, mctx->cqe_size))
189 		goto err;
190 
191 	cq->cqe_size = mctx->cqe_size;
192 
193 	cq->set_ci_db  = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ);
194 	if (!cq->set_ci_db)
195 		goto err_buf;
196 
197 	cq->arm_db     = cq->set_ci_db + 1;
198 	*cq->arm_db    = 0;
199 	cq->arm_sn     = 1;
200 	*cq->set_ci_db = 0;
201 
202 	cmd.buf_addr = (uintptr_t) cq->buf.buf;
203 	cmd.db_addr  = (uintptr_t) cq->set_ci_db;
204 
205 	ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector,
206 				&cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
207 				&resp.ibv_resp, sizeof resp);
208 	if (ret)
209 		goto err_db;
210 
211 	cq->cqn = resp.cqn;
212 
213 	return &cq->ibv_cq;
214 
215 err_db:
216 	mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_CQ, cq->set_ci_db);
217 
218 err_buf:
219 	mlx4_free_buf(&cq->buf);
220 
221 err:
222 	free(cq);
223 
224 	return NULL;
225 }
226 
mlx4_resize_cq(struct ibv_cq * ibcq,int cqe)227 int mlx4_resize_cq(struct ibv_cq *ibcq, int cqe)
228 {
229 	struct mlx4_cq *cq = to_mcq(ibcq);
230 	struct mlx4_resize_cq cmd;
231 	struct mlx4_buf buf;
232 	int old_cqe, outst_cqe, ret;
233 
234 	/* Sanity check CQ size before proceeding */
235 	if (cqe > 0x3fffff)
236 		return EINVAL;
237 
238 	pthread_spin_lock(&cq->lock);
239 
240 	cqe = align_queue_size(cqe + 1);
241 	if (cqe == ibcq->cqe + 1) {
242 		ret = 0;
243 		goto out;
244 	}
245 
246 	/* Can't be smaller then the number of outstanding CQEs */
247 	outst_cqe = mlx4_get_outstanding_cqes(cq);
248 	if (cqe < outst_cqe + 1) {
249 		ret = 0;
250 		goto out;
251 	}
252 
253 	ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe,
254 					cq->cqe_size);
255 	if (ret)
256 		goto out;
257 
258 	old_cqe = ibcq->cqe;
259 	cmd.buf_addr = (uintptr_t) buf.buf;
260 
261 #ifdef IBV_CMD_RESIZE_CQ_HAS_RESP_PARAMS
262 	{
263 		struct ibv_resize_cq_resp resp;
264 		ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd,
265 					&resp, sizeof resp);
266 	}
267 #else
268 	ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd);
269 #endif
270 	if (ret) {
271 		mlx4_free_buf(&buf);
272 		goto out;
273 	}
274 
275 	mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
276 
277 	mlx4_free_buf(&cq->buf);
278 	cq->buf = buf;
279 
280 out:
281 	pthread_spin_unlock(&cq->lock);
282 	return ret;
283 }
284 
mlx4_destroy_cq(struct ibv_cq * cq)285 int mlx4_destroy_cq(struct ibv_cq *cq)
286 {
287 	int ret;
288 
289 	ret = ibv_cmd_destroy_cq(cq);
290 	if (ret)
291 		return ret;
292 
293 	mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db);
294 	mlx4_free_buf(&to_mcq(cq)->buf);
295 	free(to_mcq(cq));
296 
297 	return 0;
298 }
299 
mlx4_create_srq(struct ibv_pd * pd,struct ibv_srq_init_attr * attr)300 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
301 				 struct ibv_srq_init_attr *attr)
302 {
303 	struct mlx4_create_srq      cmd;
304 	struct mlx4_create_srq_resp resp;
305 	struct mlx4_srq		   *srq;
306 	int			    ret;
307 
308 	/* Sanity check SRQ size before proceeding */
309 	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
310 		return NULL;
311 
312 	srq = malloc(sizeof *srq);
313 	if (!srq)
314 		return NULL;
315 
316 	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
317 		goto err;
318 
319 	srq->max     = align_queue_size(attr->attr.max_wr + 1);
320 	srq->max_gs  = attr->attr.max_sge;
321 	srq->counter = 0;
322 
323 	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
324 		goto err;
325 
326 	srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
327 	if (!srq->db)
328 		goto err_free;
329 
330 	*srq->db = 0;
331 
332 	cmd.buf_addr = (uintptr_t) srq->buf.buf;
333 	cmd.db_addr  = (uintptr_t) srq->db;
334 
335 	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
336 				 &cmd.ibv_cmd, sizeof cmd,
337 				 &resp.ibv_resp, sizeof resp);
338 	if (ret)
339 		goto err_db;
340 
341 	srq->srqn = resp.srqn;
342 
343 	return &srq->ibv_srq;
344 
345 err_db:
346 	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
347 
348 err_free:
349 	free(srq->wrid);
350 	mlx4_free_buf(&srq->buf);
351 
352 err:
353 	free(srq);
354 
355 	return NULL;
356 }
357 
mlx4_modify_srq(struct ibv_srq * srq,struct ibv_srq_attr * attr,enum ibv_srq_attr_mask attr_mask)358 int mlx4_modify_srq(struct ibv_srq *srq,
359 		     struct ibv_srq_attr *attr,
360 		     enum ibv_srq_attr_mask attr_mask)
361 {
362 	struct ibv_modify_srq cmd;
363 
364 	return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
365 }
366 
mlx4_query_srq(struct ibv_srq * srq,struct ibv_srq_attr * attr)367 int mlx4_query_srq(struct ibv_srq *srq,
368 		    struct ibv_srq_attr *attr)
369 {
370 	struct ibv_query_srq cmd;
371 
372 	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
373 }
374 
mlx4_destroy_srq(struct ibv_srq * ibsrq)375 int mlx4_destroy_srq(struct ibv_srq *ibsrq)
376 {
377 	struct mlx4_srq *srq = to_msrq(ibsrq);
378 	struct mlx4_cq *mcq = NULL;
379 	int ret;
380 
381 	if (ibsrq->xrc_cq) {
382 		/* is an xrc_srq */
383 		mcq = to_mcq(ibsrq->xrc_cq);
384 		mlx4_cq_clean(mcq, 0, srq);
385 		pthread_spin_lock(&mcq->lock);
386 		mlx4_clear_xrc_srq(to_mctx(ibsrq->context), srq->srqn);
387 		pthread_spin_unlock(&mcq->lock);
388 	}
389 
390 	ret = ibv_cmd_destroy_srq(ibsrq);
391 	if (ret) {
392 		if (ibsrq->xrc_cq) {
393 			pthread_spin_lock(&mcq->lock);
394 			mlx4_store_xrc_srq(to_mctx(ibsrq->context),
395 					   srq->srqn, srq);
396 			pthread_spin_unlock(&mcq->lock);
397 		}
398 		return ret;
399 	}
400 
401 	mlx4_free_db(to_mctx(ibsrq->context), MLX4_DB_TYPE_RQ, srq->db);
402 	mlx4_free_buf(&srq->buf);
403 	free(srq->wrid);
404 	free(srq);
405 
406 	return 0;
407 }
408 
verify_sizes(struct ibv_qp_init_attr * attr,struct mlx4_context * context)409 static int verify_sizes(struct ibv_qp_init_attr *attr, struct mlx4_context *context)
410 {
411 	int size;
412 	int nsegs;
413 
414 	if (attr->cap.max_send_wr     > context->max_qp_wr ||
415 	    attr->cap.max_recv_wr     > context->max_qp_wr ||
416 	    attr->cap.max_send_sge    > context->max_sge   ||
417 	    attr->cap.max_recv_sge    > context->max_sge)
418 		return -1;
419 
420 	if (attr->cap.max_inline_data) {
421 		nsegs = num_inline_segs(attr->cap.max_inline_data, attr->qp_type);
422 		size = MLX4_MAX_WQE_SIZE - nsegs * sizeof (struct mlx4_wqe_inline_seg);
423 		switch (attr->qp_type) {
424 		case IBV_QPT_UD:
425 			size -= (sizeof (struct mlx4_wqe_ctrl_seg) +
426 				 sizeof (struct mlx4_wqe_datagram_seg));
427 			break;
428 
429 		case IBV_QPT_RC:
430 		case IBV_QPT_UC:
431 		case IBV_QPT_XRC:
432 			size -= (sizeof (struct mlx4_wqe_ctrl_seg) +
433 				 sizeof (struct mlx4_wqe_raddr_seg));
434 			break;
435 
436 		default:
437 			return 0;
438 		}
439 
440 		if (attr->cap.max_inline_data > size)
441 			return -1;
442 	}
443 
444 	return 0;
445 }
446 
mlx4_create_qp(struct ibv_pd * pd,struct ibv_qp_init_attr * attr)447 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
448 {
449 	struct mlx4_create_qp     cmd;
450 	struct ibv_create_qp_resp resp;
451 	struct mlx4_qp		 *qp;
452 	int			  ret;
453 	struct mlx4_context	 *context = to_mctx(pd->context);
454 
455 
456 	/* Sanity check QP size before proceeding */
457 	if (verify_sizes(attr, context))
458 		return NULL;
459 
460 	qp = malloc(sizeof *qp);
461 	if (!qp)
462 		return NULL;
463 
464 	mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
465 
466 	/*
467 	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
468 	 * allow HW to prefetch.
469 	 */
470 	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
471 	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
472 	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
473 
474 	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
475 		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
476 	else {
477 		if (attr->cap.max_recv_sge < 1)
478 			attr->cap.max_recv_sge = 1;
479 		if (attr->cap.max_recv_wr < 1)
480 			attr->cap.max_recv_wr = 1;
481 	}
482 
483 	if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
484 		goto err;
485 
486 	mlx4_init_qp_indices(qp);
487 
488 	if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
489 	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
490 		goto err_free;
491 
492 	if (!attr->srq && attr->qp_type != IBV_QPT_XRC) {
493 		qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
494 		if (!qp->db)
495 			goto err_free;
496 
497 		*qp->db = 0;
498 	}
499 
500 	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
501 	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
502 		cmd.db_addr = 0;
503 	else
504 		cmd.db_addr = (uintptr_t) qp->db;
505 	cmd.log_sq_stride   = qp->sq.wqe_shift;
506 	for (cmd.log_sq_bb_count = 0;
507 	     qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
508 	     ++cmd.log_sq_bb_count)
509 		; /* nothing */
510 	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
511 	memset(cmd.reserved, 0, sizeof cmd.reserved);
512 
513 	pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
514 
515 	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
516 				&resp, sizeof resp);
517 	if (ret)
518 		goto err_rq_db;
519 
520 	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
521 	if (ret)
522 		goto err_destroy;
523 	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
524 
525 	qp->rq.wqe_cnt = attr->cap.max_recv_wr;
526 	qp->rq.max_gs  = attr->cap.max_recv_sge;
527 
528 	/* adjust rq maxima to not exceed reported device maxima */
529 	attr->cap.max_recv_wr = min(context->max_qp_wr, attr->cap.max_recv_wr);
530 	attr->cap.max_recv_sge = min(context->max_sge, attr->cap.max_recv_sge);
531 
532 	qp->rq.max_post = attr->cap.max_recv_wr;
533 	mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
534 
535 	qp->doorbell_qpn    = htonl(qp->ibv_qp.qp_num << 8);
536 	if (attr->sq_sig_all)
537 		qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE);
538 	else
539 		qp->sq_signal_bits = 0;
540 
541 	return &qp->ibv_qp;
542 
543 err_destroy:
544 	ibv_cmd_destroy_qp(&qp->ibv_qp);
545 
546 err_rq_db:
547 	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
548 	if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
549 		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
550 
551 err_free:
552 	free(qp->sq.wrid);
553 	if (qp->rq.wqe_cnt)
554 		free(qp->rq.wrid);
555 	mlx4_free_buf(&qp->buf);
556 
557 err:
558 	free(qp);
559 
560 	return NULL;
561 }
562 
mlx4_query_qp(struct ibv_qp * ibqp,struct ibv_qp_attr * attr,enum ibv_qp_attr_mask attr_mask,struct ibv_qp_init_attr * init_attr)563 int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
564 		   enum ibv_qp_attr_mask attr_mask,
565 		   struct ibv_qp_init_attr *init_attr)
566 {
567 	struct ibv_query_qp cmd;
568 	struct mlx4_qp *qp = to_mqp(ibqp);
569 	int ret;
570 
571 	ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof cmd);
572 	if (ret)
573 		return ret;
574 
575 	init_attr->cap.max_send_wr     = qp->sq.max_post;
576 	init_attr->cap.max_send_sge    = qp->sq.max_gs;
577 	init_attr->cap.max_inline_data = qp->max_inline_data;
578 
579 	attr->cap = init_attr->cap;
580 
581 	return 0;
582 }
583 
mlx4_modify_qp(struct ibv_qp * qp,struct ibv_qp_attr * attr,enum ibv_qp_attr_mask attr_mask)584 int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
585 		    enum ibv_qp_attr_mask attr_mask)
586 {
587 	struct ibv_modify_qp cmd;
588 	int ret;
589 
590 	if (qp->state == IBV_QPS_RESET &&
591 	    attr_mask & IBV_QP_STATE   &&
592 	    attr->qp_state == IBV_QPS_INIT) {
593 		mlx4_qp_init_sq_ownership(to_mqp(qp));
594 	}
595 
596 	ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof cmd);
597 
598 	if (!ret		       &&
599 	    (attr_mask & IBV_QP_STATE) &&
600 	    attr->qp_state == IBV_QPS_RESET) {
601 		mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
602 			       qp->srq ? to_msrq(qp->srq) : NULL);
603 		if (qp->send_cq != qp->recv_cq)
604 			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
605 
606 		mlx4_init_qp_indices(to_mqp(qp));
607 		if (!qp->srq && qp->qp_type != IBV_QPT_XRC)
608 			*to_mqp(qp)->db = 0;
609 	}
610 
611 	return ret;
612 }
613 
mlx4_lock_cqs(struct ibv_qp * qp)614 static void mlx4_lock_cqs(struct ibv_qp *qp)
615 {
616 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
617 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
618 
619 	if (send_cq == recv_cq)
620 		pthread_spin_lock(&send_cq->lock);
621 	else if (send_cq->cqn < recv_cq->cqn) {
622 		pthread_spin_lock(&send_cq->lock);
623 		pthread_spin_lock(&recv_cq->lock);
624 	} else {
625 		pthread_spin_lock(&recv_cq->lock);
626 		pthread_spin_lock(&send_cq->lock);
627 	}
628 }
629 
mlx4_unlock_cqs(struct ibv_qp * qp)630 static void mlx4_unlock_cqs(struct ibv_qp *qp)
631 {
632 	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
633 	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
634 
635 	if (send_cq == recv_cq)
636 		pthread_spin_unlock(&send_cq->lock);
637 	else if (send_cq->cqn < recv_cq->cqn) {
638 		pthread_spin_unlock(&recv_cq->lock);
639 		pthread_spin_unlock(&send_cq->lock);
640 	} else {
641 		pthread_spin_unlock(&send_cq->lock);
642 		pthread_spin_unlock(&recv_cq->lock);
643 	}
644 }
645 
mlx4_destroy_qp(struct ibv_qp * ibqp)646 int mlx4_destroy_qp(struct ibv_qp *ibqp)
647 {
648 	struct mlx4_qp *qp = to_mqp(ibqp);
649 	int ret;
650 
651 	pthread_mutex_lock(&to_mctx(ibqp->context)->qp_table_mutex);
652 	ret = ibv_cmd_destroy_qp(ibqp);
653 	if (ret) {
654 		pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
655 		return ret;
656 	}
657 
658 	mlx4_lock_cqs(ibqp);
659 
660 	__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
661 			ibqp->srq ? to_msrq(ibqp->srq) : NULL);
662 	if (ibqp->send_cq != ibqp->recv_cq)
663 		__mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
664 
665 	mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
666 
667 	mlx4_unlock_cqs(ibqp);
668 	pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
669 
670 	if (!ibqp->srq && ibqp->qp_type != IBV_QPT_XRC)
671 		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
672 	free(qp->sq.wrid);
673 	if (qp->rq.wqe_cnt)
674 		free(qp->rq.wrid);
675 	mlx4_free_buf(&qp->buf);
676 	free(qp);
677 
678 	return 0;
679 }
680 
mlx4_create_ah(struct ibv_pd * pd,struct ibv_ah_attr * attr)681 struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
682 {
683 	struct mlx4_ah *ah;
684 	struct ibv_port_attr port_attr;
685 	uint8_t is_mcast;
686 
687 	ah = malloc(sizeof *ah);
688 	if (!ah)
689 		return NULL;
690 
691 	memset(ah, 0, sizeof *ah);
692 
693 	ah->av.port_pd   = htonl(to_mpd(pd)->pdn | (attr->port_num << 24));
694 	ah->av.g_slid    = attr->src_path_bits;
695 	ah->av.dlid      = htons(attr->dlid);
696 	if (attr->static_rate) {
697 		ah->av.stat_rate = attr->static_rate + MLX4_STAT_RATE_OFFSET;
698 		/* XXX check rate cap? */
699 	}
700 	ah->av.sl_tclass_flowlabel = htonl(attr->sl << 28);
701 	if (attr->is_global) {
702 		ah->av.g_slid   |= 0x80;
703 		ah->av.gid_index = attr->grh.sgid_index;
704 		ah->av.hop_limit = attr->grh.hop_limit;
705 		ah->av.sl_tclass_flowlabel |=
706 			htonl((attr->grh.traffic_class << 20) |
707 				    attr->grh.flow_label);
708 		memcpy(ah->av.dgid, attr->grh.dgid.raw, 16);
709 	}
710 
711 	if (ibv_query_port(pd->context, attr->port_num, &port_attr))
712 		goto err;
713 
714 	if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
715 		if (ibv_resolve_eth_gid(pd, attr->port_num,
716 					(union ibv_gid *)ah->av.dgid,
717 					attr->grh.sgid_index,
718 					ah->mac, &ah->vlan,
719 					&ah->tagged, &is_mcast))
720 			goto err;
721 
722 		if (is_mcast) {
723 			ah->av.dlid = htons(0xc000);
724 			ah->av.port_pd |= htonl(1 << 31);
725 		}
726 		if (ah->tagged) {
727 			ah->av.port_pd |= htonl(1 << 29);
728 			ah->vlan |= (attr->sl & 7) << 13;
729 		}
730 	}
731 
732 
733 	return &ah->ibv_ah;
734 err:
735 	free(ah);
736 	return NULL;
737 }
738 
mlx4_destroy_ah(struct ibv_ah * ah)739 int mlx4_destroy_ah(struct ibv_ah *ah)
740 {
741 	free(to_mah(ah));
742 
743 	return 0;
744 }
745 
746 #ifdef HAVE_IBV_XRC_OPS
mlx4_create_xrc_srq(struct ibv_pd * pd,struct ibv_xrc_domain * xrc_domain,struct ibv_cq * xrc_cq,struct ibv_srq_init_attr * attr)747 struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd,
748 				    struct ibv_xrc_domain *xrc_domain,
749 				    struct ibv_cq *xrc_cq,
750 				    struct ibv_srq_init_attr *attr)
751 {
752 	struct mlx4_create_xrc_srq  cmd;
753 	struct mlx4_create_srq_resp resp;
754 	struct mlx4_srq		   *srq;
755 	int			    ret;
756 
757 	/* Sanity check SRQ size before proceeding */
758 	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
759 		return NULL;
760 
761 	srq = malloc(sizeof *srq);
762 	if (!srq)
763 		return NULL;
764 
765 	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
766 		goto err;
767 
768 	srq->max     = align_queue_size(attr->attr.max_wr + 1);
769 	srq->max_gs  = attr->attr.max_sge;
770 	srq->counter = 0;
771 
772 	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
773 		goto err;
774 
775 	srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
776 	if (!srq->db)
777 		goto err_free;
778 
779 	*srq->db = 0;
780 
781 	cmd.buf_addr = (uintptr_t) srq->buf.buf;
782 	cmd.db_addr  = (uintptr_t) srq->db;
783 
784 	ret = ibv_cmd_create_xrc_srq(pd, &srq->ibv_srq, attr,
785 				     xrc_domain->handle,
786 				     xrc_cq->handle,
787 				     &cmd.ibv_cmd, sizeof cmd,
788 				     &resp.ibv_resp, sizeof resp);
789 	if (ret)
790 		goto err_db;
791 
792 	srq->ibv_srq.xrc_srq_num = srq->srqn = resp.srqn;
793 
794 	ret = mlx4_store_xrc_srq(to_mctx(pd->context), srq->ibv_srq.xrc_srq_num, srq);
795 	if (ret)
796 		goto err_destroy;
797 
798 	return &srq->ibv_srq;
799 
800 err_destroy:
801 	ibv_cmd_destroy_srq(&srq->ibv_srq);
802 
803 err_db:
804 	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
805 
806 err_free:
807 	free(srq->wrid);
808 	mlx4_free_buf(&srq->buf);
809 
810 err:
811 	free(srq);
812 
813 	return NULL;
814 }
815 
mlx4_open_xrc_domain(struct ibv_context * context,int fd,int oflag)816 struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
817 					    int fd, int oflag)
818 {
819 	int ret;
820 	struct mlx4_open_xrc_domain_resp resp;
821 	struct mlx4_xrc_domain *xrcd;
822 
823 	xrcd = malloc(sizeof *xrcd);
824 	if (!xrcd)
825 		return NULL;
826 
827 	ret = ibv_cmd_open_xrc_domain(context, fd, oflag, &xrcd->ibv_xrcd,
828 				      &resp.ibv_resp, sizeof resp);
829 	if (ret) {
830 		free(xrcd);
831 		return NULL;
832 	}
833 
834 	xrcd->xrcdn = resp.xrcdn;
835 	return &xrcd->ibv_xrcd;
836 }
837 
mlx4_close_xrc_domain(struct ibv_xrc_domain * d)838 int mlx4_close_xrc_domain(struct ibv_xrc_domain *d)
839 {
840 	int ret;
841 	ret = ibv_cmd_close_xrc_domain(d);
842 	if (!ret)
843 		free(d);
844 	return ret;
845 }
846 
mlx4_create_xrc_rcv_qp(struct ibv_qp_init_attr * init_attr,uint32_t * xrc_qp_num)847 int mlx4_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr,
848 			   uint32_t *xrc_qp_num)
849 {
850 
851 	return ibv_cmd_create_xrc_rcv_qp(init_attr, xrc_qp_num);
852 }
853 
mlx4_modify_xrc_rcv_qp(struct ibv_xrc_domain * xrc_domain,uint32_t xrc_qp_num,struct ibv_qp_attr * attr,int attr_mask)854 int mlx4_modify_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
855 			   uint32_t xrc_qp_num,
856 			   struct ibv_qp_attr *attr,
857 			   int attr_mask)
858 {
859 	return ibv_cmd_modify_xrc_rcv_qp(xrc_domain, xrc_qp_num,
860 					 attr, attr_mask);
861 }
862 
mlx4_query_xrc_rcv_qp(struct ibv_xrc_domain * xrc_domain,uint32_t xrc_qp_num,struct ibv_qp_attr * attr,int attr_mask,struct ibv_qp_init_attr * init_attr)863 int mlx4_query_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
864 			  uint32_t xrc_qp_num,
865 			  struct ibv_qp_attr *attr,
866 			  int attr_mask,
867 			  struct ibv_qp_init_attr *init_attr)
868 {
869 	int ret;
870 
871 	ret = ibv_cmd_query_xrc_rcv_qp(xrc_domain, xrc_qp_num,
872 				       attr, attr_mask, init_attr);
873 	if (ret)
874 		return ret;
875 
876 	init_attr->cap.max_send_wr = init_attr->cap.max_send_sge = 1;
877 	init_attr->cap.max_recv_sge = init_attr->cap.max_recv_wr = 0;
878 	init_attr->cap.max_inline_data = 0;
879 	init_attr->recv_cq = init_attr->send_cq = NULL;
880 	init_attr->srq = NULL;
881 	init_attr->xrc_domain = xrc_domain;
882 	init_attr->qp_type = IBV_QPT_XRC;
883 	init_attr->qp_context = NULL;
884 	attr->cap = init_attr->cap;
885 
886 	return 0;
887 }
888 
mlx4_reg_xrc_rcv_qp(struct ibv_xrc_domain * xrc_domain,uint32_t xrc_qp_num)889 int mlx4_reg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
890 			uint32_t xrc_qp_num)
891 {
892 	return ibv_cmd_reg_xrc_rcv_qp(xrc_domain, xrc_qp_num);
893 }
894 
mlx4_unreg_xrc_rcv_qp(struct ibv_xrc_domain * xrc_domain,uint32_t xrc_qp_num)895 int mlx4_unreg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
896 			  uint32_t xrc_qp_num)
897 {
898 	return ibv_cmd_unreg_xrc_rcv_qp(xrc_domain, xrc_qp_num);
899 }
900 
901 #endif
902