1/*
2 * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34#include <linux/kref.h>
35#include <linux/random.h>
36#include <linux/debugfs.h>
37#include <linux/export.h>
38#include <linux/delay.h>
39#include <rdma/ib_umem.h>
40#include "mlx5_ib.h"
41
42enum {
43	MAX_PENDING_REG_MR = 8,
44};
45
46enum {
47	MLX5_UMR_ALIGN	= 2048
48};
49
50static __be64 *mr_align(__be64 *ptr, int align)
51{
52	unsigned long mask = align - 1;
53
54	return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
55}
56
57static int order2idx(struct mlx5_ib_dev *dev, int order)
58{
59	struct mlx5_mr_cache *cache = &dev->cache;
60
61	if (order < cache->ent[0].order)
62		return 0;
63	else
64		return order - cache->ent[0].order;
65}
66
67static void reg_mr_callback(int status, void *context)
68{
69	struct mlx5_ib_mr *mr = context;
70	struct mlx5_ib_dev *dev = mr->dev;
71	struct mlx5_mr_cache *cache = &dev->cache;
72	int c = order2idx(dev, mr->order);
73	struct mlx5_cache_ent *ent = &cache->ent[c];
74	u8 key;
75	unsigned long flags;
76	struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
77	int err;
78
79	spin_lock_irqsave(&ent->lock, flags);
80	ent->pending--;
81	spin_unlock_irqrestore(&ent->lock, flags);
82	if (status) {
83		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
84		kfree(mr);
85		dev->fill_delay = 1;
86		mod_timer(&dev->delay_timer, jiffies + HZ);
87		return;
88	}
89
90	if (mr->out.hdr.status) {
91		mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
92			     mr->out.hdr.status,
93			     be32_to_cpu(mr->out.hdr.syndrome));
94		kfree(mr);
95		dev->fill_delay = 1;
96		mod_timer(&dev->delay_timer, jiffies + HZ);
97		return;
98	}
99
100	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
101	key = dev->mdev->priv.mkey_key++;
102	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
103	mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
104
105	cache->last_add = jiffies;
106
107	spin_lock_irqsave(&ent->lock, flags);
108	list_add_tail(&mr->list, &ent->head);
109	ent->cur++;
110	ent->size++;
111	spin_unlock_irqrestore(&ent->lock, flags);
112
113	write_lock_irqsave(&table->lock, flags);
114	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
115				&mr->mmr);
116	if (err)
117		pr_err("Error inserting to mr tree. 0x%x\n", -err);
118	write_unlock_irqrestore(&table->lock, flags);
119}
120
121static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
122{
123	struct mlx5_mr_cache *cache = &dev->cache;
124	struct mlx5_cache_ent *ent = &cache->ent[c];
125	struct mlx5_create_mkey_mbox_in *in;
126	struct mlx5_ib_mr *mr;
127	int npages = 1 << ent->order;
128	int err = 0;
129	int i;
130
131	in = kzalloc(sizeof(*in), GFP_KERNEL);
132	if (!in)
133		return -ENOMEM;
134
135	for (i = 0; i < num; i++) {
136		if (ent->pending >= MAX_PENDING_REG_MR) {
137			err = -EAGAIN;
138			break;
139		}
140
141		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
142		if (!mr) {
143			err = -ENOMEM;
144			break;
145		}
146		mr->order = ent->order;
147		mr->umred = 1;
148		mr->dev = dev;
149		in->seg.status = 1 << 6;
150		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
151		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
152		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
153		in->seg.log2_page_size = 12;
154
155		spin_lock_irq(&ent->lock);
156		ent->pending++;
157		spin_unlock_irq(&ent->lock);
158		err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
159					    sizeof(*in), reg_mr_callback,
160					    mr, &mr->out);
161		if (err) {
162			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
163			kfree(mr);
164			break;
165		}
166	}
167
168	kfree(in);
169	return err;
170}
171
172static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
173{
174	struct mlx5_mr_cache *cache = &dev->cache;
175	struct mlx5_cache_ent *ent = &cache->ent[c];
176	struct mlx5_ib_mr *mr;
177	int err;
178	int i;
179
180	for (i = 0; i < num; i++) {
181		spin_lock_irq(&ent->lock);
182		if (list_empty(&ent->head)) {
183			spin_unlock_irq(&ent->lock);
184			return;
185		}
186		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
187		list_del(&mr->list);
188		ent->cur--;
189		ent->size--;
190		spin_unlock_irq(&ent->lock);
191		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
192		if (err)
193			mlx5_ib_warn(dev, "failed destroy mkey\n");
194		else
195			kfree(mr);
196	}
197}
198
199static ssize_t size_write(struct file *filp, const char __user *buf,
200			  size_t count, loff_t *pos)
201{
202	struct mlx5_cache_ent *ent = filp->private_data;
203	struct mlx5_ib_dev *dev = ent->dev;
204	char lbuf[20];
205	u32 var;
206	int err;
207	int c;
208
209	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
210		return -EFAULT;
211
212	c = order2idx(dev, ent->order);
213	lbuf[sizeof(lbuf) - 1] = 0;
214
215	if (sscanf(lbuf, "%u", &var) != 1)
216		return -EINVAL;
217
218	if (var < ent->limit)
219		return -EINVAL;
220
221	if (var > ent->size) {
222		do {
223			err = add_keys(dev, c, var - ent->size);
224			if (err && err != -EAGAIN)
225				return err;
226
227			usleep_range(3000, 5000);
228		} while (err);
229	} else if (var < ent->size) {
230		remove_keys(dev, c, ent->size - var);
231	}
232
233	return count;
234}
235
236static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
237			 loff_t *pos)
238{
239	struct mlx5_cache_ent *ent = filp->private_data;
240	char lbuf[20];
241	int err;
242
243	if (*pos)
244		return 0;
245
246	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
247	if (err < 0)
248		return err;
249
250	if (copy_to_user(buf, lbuf, err))
251		return -EFAULT;
252
253	*pos += err;
254
255	return err;
256}
257
258static const struct file_operations size_fops = {
259	.owner	= THIS_MODULE,
260	.open	= simple_open,
261	.write	= size_write,
262	.read	= size_read,
263};
264
265static ssize_t limit_write(struct file *filp, const char __user *buf,
266			   size_t count, loff_t *pos)
267{
268	struct mlx5_cache_ent *ent = filp->private_data;
269	struct mlx5_ib_dev *dev = ent->dev;
270	char lbuf[20];
271	u32 var;
272	int err;
273	int c;
274
275	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
276		return -EFAULT;
277
278	c = order2idx(dev, ent->order);
279	lbuf[sizeof(lbuf) - 1] = 0;
280
281	if (sscanf(lbuf, "%u", &var) != 1)
282		return -EINVAL;
283
284	if (var > ent->size)
285		return -EINVAL;
286
287	ent->limit = var;
288
289	if (ent->cur < ent->limit) {
290		err = add_keys(dev, c, 2 * ent->limit - ent->cur);
291		if (err)
292			return err;
293	}
294
295	return count;
296}
297
298static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
299			  loff_t *pos)
300{
301	struct mlx5_cache_ent *ent = filp->private_data;
302	char lbuf[20];
303	int err;
304
305	if (*pos)
306		return 0;
307
308	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
309	if (err < 0)
310		return err;
311
312	if (copy_to_user(buf, lbuf, err))
313		return -EFAULT;
314
315	*pos += err;
316
317	return err;
318}
319
320static const struct file_operations limit_fops = {
321	.owner	= THIS_MODULE,
322	.open	= simple_open,
323	.write	= limit_write,
324	.read	= limit_read,
325};
326
327static int someone_adding(struct mlx5_mr_cache *cache)
328{
329	int i;
330
331	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
332		if (cache->ent[i].cur < cache->ent[i].limit)
333			return 1;
334	}
335
336	return 0;
337}
338
339static void __cache_work_func(struct mlx5_cache_ent *ent)
340{
341	struct mlx5_ib_dev *dev = ent->dev;
342	struct mlx5_mr_cache *cache = &dev->cache;
343	int i = order2idx(dev, ent->order);
344	int err;
345
346	if (cache->stopped)
347		return;
348
349	ent = &dev->cache.ent[i];
350	if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
351		err = add_keys(dev, i, 1);
352		if (ent->cur < 2 * ent->limit) {
353			if (err == -EAGAIN) {
354				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
355					    i + 2);
356				queue_delayed_work(cache->wq, &ent->dwork,
357						   msecs_to_jiffies(3));
358			} else if (err) {
359				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
360					     i + 2, err);
361				queue_delayed_work(cache->wq, &ent->dwork,
362						   msecs_to_jiffies(1000));
363			} else {
364				queue_work(cache->wq, &ent->work);
365			}
366		}
367	} else if (ent->cur > 2 * ent->limit) {
368		if (!someone_adding(cache) &&
369		    time_after(jiffies, cache->last_add + 300 * HZ)) {
370			remove_keys(dev, i, 1);
371			if (ent->cur > ent->limit)
372				queue_work(cache->wq, &ent->work);
373		} else {
374			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
375		}
376	}
377}
378
379static void delayed_cache_work_func(struct work_struct *work)
380{
381	struct mlx5_cache_ent *ent;
382
383	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
384	__cache_work_func(ent);
385}
386
387static void cache_work_func(struct work_struct *work)
388{
389	struct mlx5_cache_ent *ent;
390
391	ent = container_of(work, struct mlx5_cache_ent, work);
392	__cache_work_func(ent);
393}
394
395static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
396{
397	struct mlx5_mr_cache *cache = &dev->cache;
398	struct mlx5_ib_mr *mr = NULL;
399	struct mlx5_cache_ent *ent;
400	int c;
401	int i;
402
403	c = order2idx(dev, order);
404	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
405		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
406		return NULL;
407	}
408
409	for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
410		ent = &cache->ent[i];
411
412		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
413
414		spin_lock_irq(&ent->lock);
415		if (!list_empty(&ent->head)) {
416			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
417					      list);
418			list_del(&mr->list);
419			ent->cur--;
420			spin_unlock_irq(&ent->lock);
421			if (ent->cur < ent->limit)
422				queue_work(cache->wq, &ent->work);
423			break;
424		}
425		spin_unlock_irq(&ent->lock);
426
427		queue_work(cache->wq, &ent->work);
428
429		if (mr)
430			break;
431	}
432
433	if (!mr)
434		cache->ent[c].miss++;
435
436	return mr;
437}
438
439static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
440{
441	struct mlx5_mr_cache *cache = &dev->cache;
442	struct mlx5_cache_ent *ent;
443	int shrink = 0;
444	int c;
445
446	c = order2idx(dev, mr->order);
447	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
448		mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
449		return;
450	}
451	ent = &cache->ent[c];
452	spin_lock_irq(&ent->lock);
453	list_add_tail(&mr->list, &ent->head);
454	ent->cur++;
455	if (ent->cur > 2 * ent->limit)
456		shrink = 1;
457	spin_unlock_irq(&ent->lock);
458
459	if (shrink)
460		queue_work(cache->wq, &ent->work);
461}
462
463static void clean_keys(struct mlx5_ib_dev *dev, int c)
464{
465	struct mlx5_mr_cache *cache = &dev->cache;
466	struct mlx5_cache_ent *ent = &cache->ent[c];
467	struct mlx5_ib_mr *mr;
468	int err;
469
470	cancel_delayed_work(&ent->dwork);
471	while (1) {
472		spin_lock_irq(&ent->lock);
473		if (list_empty(&ent->head)) {
474			spin_unlock_irq(&ent->lock);
475			return;
476		}
477		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
478		list_del(&mr->list);
479		ent->cur--;
480		ent->size--;
481		spin_unlock_irq(&ent->lock);
482		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
483		if (err)
484			mlx5_ib_warn(dev, "failed destroy mkey\n");
485		else
486			kfree(mr);
487	}
488}
489
490static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
491{
492	struct mlx5_mr_cache *cache = &dev->cache;
493	struct mlx5_cache_ent *ent;
494	int i;
495
496	if (!mlx5_debugfs_root)
497		return 0;
498
499	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
500	if (!cache->root)
501		return -ENOMEM;
502
503	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
504		ent = &cache->ent[i];
505		sprintf(ent->name, "%d", ent->order);
506		ent->dir = debugfs_create_dir(ent->name,  cache->root);
507		if (!ent->dir)
508			return -ENOMEM;
509
510		ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
511						 &size_fops);
512		if (!ent->fsize)
513			return -ENOMEM;
514
515		ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
516						  &limit_fops);
517		if (!ent->flimit)
518			return -ENOMEM;
519
520		ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
521					       &ent->cur);
522		if (!ent->fcur)
523			return -ENOMEM;
524
525		ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
526						&ent->miss);
527		if (!ent->fmiss)
528			return -ENOMEM;
529	}
530
531	return 0;
532}
533
534static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
535{
536	if (!mlx5_debugfs_root)
537		return;
538
539	debugfs_remove_recursive(dev->cache.root);
540}
541
542static void delay_time_func(unsigned long ctx)
543{
544	struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
545
546	dev->fill_delay = 0;
547}
548
549int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
550{
551	struct mlx5_mr_cache *cache = &dev->cache;
552	struct mlx5_cache_ent *ent;
553	int limit;
554	int err;
555	int i;
556
557	cache->wq = create_singlethread_workqueue("mkey_cache");
558	if (!cache->wq) {
559		mlx5_ib_warn(dev, "failed to create work queue\n");
560		return -ENOMEM;
561	}
562
563	setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
564	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
565		INIT_LIST_HEAD(&cache->ent[i].head);
566		spin_lock_init(&cache->ent[i].lock);
567
568		ent = &cache->ent[i];
569		INIT_LIST_HEAD(&ent->head);
570		spin_lock_init(&ent->lock);
571		ent->order = i + 2;
572		ent->dev = dev;
573
574		if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
575			limit = dev->mdev->profile->mr_cache[i].limit;
576		else
577			limit = 0;
578
579		INIT_WORK(&ent->work, cache_work_func);
580		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
581		ent->limit = limit;
582		queue_work(cache->wq, &ent->work);
583	}
584
585	err = mlx5_mr_cache_debugfs_init(dev);
586	if (err)
587		mlx5_ib_warn(dev, "cache debugfs failure\n");
588
589	return 0;
590}
591
592int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
593{
594	int i;
595
596	dev->cache.stopped = 1;
597	flush_workqueue(dev->cache.wq);
598
599	mlx5_mr_cache_debugfs_cleanup(dev);
600
601	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
602		clean_keys(dev, i);
603
604	destroy_workqueue(dev->cache.wq);
605	del_timer_sync(&dev->delay_timer);
606
607	return 0;
608}
609
610struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
611{
612	struct mlx5_ib_dev *dev = to_mdev(pd->device);
613	struct mlx5_core_dev *mdev = dev->mdev;
614	struct mlx5_create_mkey_mbox_in *in;
615	struct mlx5_mkey_seg *seg;
616	struct mlx5_ib_mr *mr;
617	int err;
618
619	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
620	if (!mr)
621		return ERR_PTR(-ENOMEM);
622
623	in = kzalloc(sizeof(*in), GFP_KERNEL);
624	if (!in) {
625		err = -ENOMEM;
626		goto err_free;
627	}
628
629	seg = &in->seg;
630	seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
631	seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
632	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
633	seg->start_addr = 0;
634
635	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
636				    NULL);
637	if (err)
638		goto err_in;
639
640	kfree(in);
641	mr->ibmr.lkey = mr->mmr.key;
642	mr->ibmr.rkey = mr->mmr.key;
643	mr->umem = NULL;
644
645	return &mr->ibmr;
646
647err_in:
648	kfree(in);
649
650err_free:
651	kfree(mr);
652
653	return ERR_PTR(err);
654}
655
656static int get_octo_len(u64 addr, u64 len, int page_size)
657{
658	u64 offset;
659	int npages;
660
661	offset = addr & (page_size - 1);
662	npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
663	return (npages + 1) / 2;
664}
665
666static int use_umr(int order)
667{
668	return order <= 17;
669}
670
671static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
672			     struct ib_sge *sg, u64 dma, int n, u32 key,
673			     int page_shift, u64 virt_addr, u64 len,
674			     int access_flags)
675{
676	struct mlx5_ib_dev *dev = to_mdev(pd->device);
677	struct ib_mr *mr = dev->umrc.mr;
678
679	sg->addr = dma;
680	sg->length = ALIGN(sizeof(u64) * n, 64);
681	sg->lkey = mr->lkey;
682
683	wr->next = NULL;
684	wr->send_flags = 0;
685	wr->sg_list = sg;
686	if (n)
687		wr->num_sge = 1;
688	else
689		wr->num_sge = 0;
690
691	wr->opcode = MLX5_IB_WR_UMR;
692	wr->wr.fast_reg.page_list_len = n;
693	wr->wr.fast_reg.page_shift = page_shift;
694	wr->wr.fast_reg.rkey = key;
695	wr->wr.fast_reg.iova_start = virt_addr;
696	wr->wr.fast_reg.length = len;
697	wr->wr.fast_reg.access_flags = access_flags;
698	wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
699}
700
701static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
702			       struct ib_send_wr *wr, u32 key)
703{
704	wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
705	wr->opcode = MLX5_IB_WR_UMR;
706	wr->wr.fast_reg.rkey = key;
707}
708
709void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
710{
711	struct mlx5_ib_umr_context *context;
712	struct ib_wc wc;
713	int err;
714
715	while (1) {
716		err = ib_poll_cq(cq, 1, &wc);
717		if (err < 0) {
718			pr_warn("poll cq error %d\n", err);
719			return;
720		}
721		if (err == 0)
722			break;
723
724		context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
725		context->status = wc.status;
726		complete(&context->done);
727	}
728	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
729}
730
731static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
732				  u64 virt_addr, u64 len, int npages,
733				  int page_shift, int order, int access_flags)
734{
735	struct mlx5_ib_dev *dev = to_mdev(pd->device);
736	struct device *ddev = dev->ib_dev.dma_device;
737	struct umr_common *umrc = &dev->umrc;
738	struct mlx5_ib_umr_context umr_context;
739	struct ib_send_wr wr, *bad;
740	struct mlx5_ib_mr *mr;
741	struct ib_sge sg;
742	int size = sizeof(u64) * npages;
743	int err = 0;
744	int i;
745
746	for (i = 0; i < 1; i++) {
747		mr = alloc_cached_mr(dev, order);
748		if (mr)
749			break;
750
751		err = add_keys(dev, order2idx(dev, order), 1);
752		if (err && err != -EAGAIN) {
753			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
754			break;
755		}
756	}
757
758	if (!mr)
759		return ERR_PTR(-EAGAIN);
760
761	mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
762	if (!mr->pas) {
763		err = -ENOMEM;
764		goto free_mr;
765	}
766
767	mlx5_ib_populate_pas(dev, umem, page_shift,
768			     mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
769
770	mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
771				 DMA_TO_DEVICE);
772	if (dma_mapping_error(ddev, mr->dma)) {
773		err = -ENOMEM;
774		goto free_pas;
775	}
776
777	memset(&wr, 0, sizeof(wr));
778	wr.wr_id = (u64)(unsigned long)&umr_context;
779	prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
780
781	mlx5_ib_init_umr_context(&umr_context);
782	down(&umrc->sem);
783	err = ib_post_send(umrc->qp, &wr, &bad);
784	if (err) {
785		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
786		goto unmap_dma;
787	} else {
788		wait_for_completion(&umr_context.done);
789		if (umr_context.status != IB_WC_SUCCESS) {
790			mlx5_ib_warn(dev, "reg umr failed\n");
791			err = -EFAULT;
792		}
793	}
794
795	mr->mmr.iova = virt_addr;
796	mr->mmr.size = len;
797	mr->mmr.pd = to_mpd(pd)->pdn;
798
799unmap_dma:
800	up(&umrc->sem);
801	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
802
803free_pas:
804	kfree(mr->pas);
805
806free_mr:
807	if (err) {
808		free_cached_mr(dev, mr);
809		return ERR_PTR(err);
810	}
811
812	return mr;
813}
814
815static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
816				     u64 length, struct ib_umem *umem,
817				     int npages, int page_shift,
818				     int access_flags)
819{
820	struct mlx5_ib_dev *dev = to_mdev(pd->device);
821	struct mlx5_create_mkey_mbox_in *in;
822	struct mlx5_ib_mr *mr;
823	int inlen;
824	int err;
825
826	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
827	if (!mr)
828		return ERR_PTR(-ENOMEM);
829
830	inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
831	in = mlx5_vzalloc(inlen);
832	if (!in) {
833		err = -ENOMEM;
834		goto err_1;
835	}
836	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
837
838	in->seg.flags = convert_access(access_flags) |
839		MLX5_ACCESS_MODE_MTT;
840	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
841	in->seg.start_addr = cpu_to_be64(virt_addr);
842	in->seg.len = cpu_to_be64(length);
843	in->seg.bsfs_octo_size = 0;
844	in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
845	in->seg.log2_page_size = page_shift;
846	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
847	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
848							 1 << page_shift));
849	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
850				    NULL, NULL);
851	if (err) {
852		mlx5_ib_warn(dev, "create mkey failed\n");
853		goto err_2;
854	}
855	mr->umem = umem;
856	mlx5_vfree(in);
857
858	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
859
860	return mr;
861
862err_2:
863	mlx5_vfree(in);
864
865err_1:
866	kfree(mr);
867
868	return ERR_PTR(err);
869}
870
871struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
872				  u64 virt_addr, int access_flags,
873				  struct ib_udata *udata)
874{
875	struct mlx5_ib_dev *dev = to_mdev(pd->device);
876	struct mlx5_ib_mr *mr = NULL;
877	struct ib_umem *umem;
878	int page_shift;
879	int npages;
880	int ncont;
881	int order;
882	int err;
883
884	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
885		    start, virt_addr, length, access_flags);
886	umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
887			   0);
888	if (IS_ERR(umem)) {
889		mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
890		return (void *)umem;
891	}
892
893	mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
894	if (!npages) {
895		mlx5_ib_warn(dev, "avoid zero region\n");
896		err = -EINVAL;
897		goto error;
898	}
899
900	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
901		    npages, ncont, order, page_shift);
902
903	if (use_umr(order)) {
904		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
905			     order, access_flags);
906		if (PTR_ERR(mr) == -EAGAIN) {
907			mlx5_ib_dbg(dev, "cache empty for order %d", order);
908			mr = NULL;
909		}
910	}
911
912	if (!mr)
913		mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
914				access_flags);
915
916	if (IS_ERR(mr)) {
917		err = PTR_ERR(mr);
918		goto error;
919	}
920
921	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
922
923	mr->umem = umem;
924	mr->npages = npages;
925	spin_lock(&dev->mr_lock);
926	dev->mdev->priv.reg_pages += npages;
927	spin_unlock(&dev->mr_lock);
928	mr->ibmr.lkey = mr->mmr.key;
929	mr->ibmr.rkey = mr->mmr.key;
930
931	return &mr->ibmr;
932
933error:
934	ib_umem_release(umem);
935	return ERR_PTR(err);
936}
937
938static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
939{
940	struct umr_common *umrc = &dev->umrc;
941	struct mlx5_ib_umr_context umr_context;
942	struct ib_send_wr wr, *bad;
943	int err;
944
945	memset(&wr, 0, sizeof(wr));
946	wr.wr_id = (u64)(unsigned long)&umr_context;
947	prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
948
949	mlx5_ib_init_umr_context(&umr_context);
950	down(&umrc->sem);
951	err = ib_post_send(umrc->qp, &wr, &bad);
952	if (err) {
953		up(&umrc->sem);
954		mlx5_ib_dbg(dev, "err %d\n", err);
955		goto error;
956	} else {
957		wait_for_completion(&umr_context.done);
958		up(&umrc->sem);
959	}
960	if (umr_context.status != IB_WC_SUCCESS) {
961		mlx5_ib_warn(dev, "unreg umr failed\n");
962		err = -EFAULT;
963		goto error;
964	}
965	return 0;
966
967error:
968	return err;
969}
970
971int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
972{
973	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
974	struct mlx5_ib_mr *mr = to_mmr(ibmr);
975	struct ib_umem *umem = mr->umem;
976	int npages = mr->npages;
977	int umred = mr->umred;
978	int err;
979
980	if (!umred) {
981		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
982		if (err) {
983			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
984				     mr->mmr.key, err);
985			return err;
986		}
987	} else {
988		err = unreg_umr(dev, mr);
989		if (err) {
990			mlx5_ib_warn(dev, "failed unregister\n");
991			return err;
992		}
993		free_cached_mr(dev, mr);
994	}
995
996	if (umem) {
997		ib_umem_release(umem);
998		spin_lock(&dev->mr_lock);
999		dev->mdev->priv.reg_pages -= npages;
1000		spin_unlock(&dev->mr_lock);
1001	}
1002
1003	if (!umred)
1004		kfree(mr);
1005
1006	return 0;
1007}
1008
1009struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
1010				struct ib_mr_init_attr *mr_init_attr)
1011{
1012	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1013	struct mlx5_create_mkey_mbox_in *in;
1014	struct mlx5_ib_mr *mr;
1015	int access_mode, err;
1016	int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
1017
1018	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1019	if (!mr)
1020		return ERR_PTR(-ENOMEM);
1021
1022	in = kzalloc(sizeof(*in), GFP_KERNEL);
1023	if (!in) {
1024		err = -ENOMEM;
1025		goto err_free;
1026	}
1027
1028	in->seg.status = 1 << 6; /* free */
1029	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
1030	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1031	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1032	access_mode = MLX5_ACCESS_MODE_MTT;
1033
1034	if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
1035		u32 psv_index[2];
1036
1037		in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
1038							   MLX5_MKEY_BSF_EN);
1039		in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
1040		mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1041		if (!mr->sig) {
1042			err = -ENOMEM;
1043			goto err_free_in;
1044		}
1045
1046		/* create mem & wire PSVs */
1047		err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1048					   2, psv_index);
1049		if (err)
1050			goto err_free_sig;
1051
1052		access_mode = MLX5_ACCESS_MODE_KLM;
1053		mr->sig->psv_memory.psv_idx = psv_index[0];
1054		mr->sig->psv_wire.psv_idx = psv_index[1];
1055
1056		mr->sig->sig_status_checked = true;
1057		mr->sig->sig_err_exists = false;
1058		/* Next UMR, Arm SIGERR */
1059		++mr->sig->sigerr_count;
1060	}
1061
1062	in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
1063	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
1064				    NULL, NULL, NULL);
1065	if (err)
1066		goto err_destroy_psv;
1067
1068	mr->ibmr.lkey = mr->mmr.key;
1069	mr->ibmr.rkey = mr->mmr.key;
1070	mr->umem = NULL;
1071	kfree(in);
1072
1073	return &mr->ibmr;
1074
1075err_destroy_psv:
1076	if (mr->sig) {
1077		if (mlx5_core_destroy_psv(dev->mdev,
1078					  mr->sig->psv_memory.psv_idx))
1079			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1080				     mr->sig->psv_memory.psv_idx);
1081		if (mlx5_core_destroy_psv(dev->mdev,
1082					  mr->sig->psv_wire.psv_idx))
1083			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1084				     mr->sig->psv_wire.psv_idx);
1085	}
1086err_free_sig:
1087	kfree(mr->sig);
1088err_free_in:
1089	kfree(in);
1090err_free:
1091	kfree(mr);
1092	return ERR_PTR(err);
1093}
1094
1095int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
1096{
1097	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1098	struct mlx5_ib_mr *mr = to_mmr(ibmr);
1099	int err;
1100
1101	if (mr->sig) {
1102		if (mlx5_core_destroy_psv(dev->mdev,
1103					  mr->sig->psv_memory.psv_idx))
1104			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1105				     mr->sig->psv_memory.psv_idx);
1106		if (mlx5_core_destroy_psv(dev->mdev,
1107					  mr->sig->psv_wire.psv_idx))
1108			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1109				     mr->sig->psv_wire.psv_idx);
1110		kfree(mr->sig);
1111	}
1112
1113	err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
1114	if (err) {
1115		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1116			     mr->mmr.key, err);
1117		return err;
1118	}
1119
1120	kfree(mr);
1121
1122	return err;
1123}
1124
1125struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
1126					int max_page_list_len)
1127{
1128	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1129	struct mlx5_create_mkey_mbox_in *in;
1130	struct mlx5_ib_mr *mr;
1131	int err;
1132
1133	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1134	if (!mr)
1135		return ERR_PTR(-ENOMEM);
1136
1137	in = kzalloc(sizeof(*in), GFP_KERNEL);
1138	if (!in) {
1139		err = -ENOMEM;
1140		goto err_free;
1141	}
1142
1143	in->seg.status = 1 << 6; /* free */
1144	in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
1145	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1146	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1147	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1148	/*
1149	 * TBD not needed - issue 197292 */
1150	in->seg.log2_page_size = PAGE_SHIFT;
1151
1152	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
1153				    NULL, NULL);
1154	kfree(in);
1155	if (err)
1156		goto err_free;
1157
1158	mr->ibmr.lkey = mr->mmr.key;
1159	mr->ibmr.rkey = mr->mmr.key;
1160	mr->umem = NULL;
1161
1162	return &mr->ibmr;
1163
1164err_free:
1165	kfree(mr);
1166	return ERR_PTR(err);
1167}
1168
1169struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
1170							       int page_list_len)
1171{
1172	struct mlx5_ib_fast_reg_page_list *mfrpl;
1173	int size = page_list_len * sizeof(u64);
1174
1175	mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
1176	if (!mfrpl)
1177		return ERR_PTR(-ENOMEM);
1178
1179	mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
1180	if (!mfrpl->ibfrpl.page_list)
1181		goto err_free;
1182
1183	mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
1184						     size, &mfrpl->map,
1185						     GFP_KERNEL);
1186	if (!mfrpl->mapped_page_list)
1187		goto err_free;
1188
1189	WARN_ON(mfrpl->map & 0x3f);
1190
1191	return &mfrpl->ibfrpl;
1192
1193err_free:
1194	kfree(mfrpl->ibfrpl.page_list);
1195	kfree(mfrpl);
1196	return ERR_PTR(-ENOMEM);
1197}
1198
1199void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1200{
1201	struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1202	struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1203	int size = page_list->max_page_list_len * sizeof(u64);
1204
1205	dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
1206			  mfrpl->map);
1207	kfree(mfrpl->ibfrpl.page_list);
1208	kfree(mfrpl);
1209}
1210
1211int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1212			    struct ib_mr_status *mr_status)
1213{
1214	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1215	int ret = 0;
1216
1217	if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1218		pr_err("Invalid status check mask\n");
1219		ret = -EINVAL;
1220		goto done;
1221	}
1222
1223	mr_status->fail_status = 0;
1224	if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1225		if (!mmr->sig) {
1226			ret = -EINVAL;
1227			pr_err("signature status check requested on a non-signature enabled MR\n");
1228			goto done;
1229		}
1230
1231		mmr->sig->sig_status_checked = true;
1232		if (!mmr->sig->sig_err_exists)
1233			goto done;
1234
1235		if (ibmr->lkey == mmr->sig->err_item.key)
1236			memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1237			       sizeof(mr_status->sig_err));
1238		else {
1239			mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1240			mr_status->sig_err.sig_err_offset = 0;
1241			mr_status->sig_err.key = mmr->sig->err_item.key;
1242		}
1243
1244		mmr->sig->sig_err_exists = false;
1245		mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1246	}
1247
1248done:
1249	return ret;
1250}
1251