mthca_mr.c revision 2a1d9b7f09aaaacf235656cb32a40ba2c79590b3
1/*
2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
34 */
35
36#include <linux/slab.h>
37#include <linux/init.h>
38#include <linux/errno.h>
39
40#include "mthca_dev.h"
41#include "mthca_cmd.h"
42#include "mthca_memfree.h"
43
44struct mthca_mtt {
45	struct mthca_buddy *buddy;
46	int                 order;
47	u32                 first_seg;
48};
49
50/*
51 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
52 */
53struct mthca_mpt_entry {
54	u32 flags;
55	u32 page_size;
56	u32 key;
57	u32 pd;
58	u64 start;
59	u64 length;
60	u32 lkey;
61	u32 window_count;
62	u32 window_count_limit;
63	u64 mtt_seg;
64	u32 mtt_sz;		/* Arbel only */
65	u32 reserved[2];
66} __attribute__((packed));
67
68#define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
69#define MTHCA_MPT_FLAG_MIO           (1 << 17)
70#define MTHCA_MPT_FLAG_BIND_ENABLE   (1 << 15)
71#define MTHCA_MPT_FLAG_PHYSICAL      (1 <<  9)
72#define MTHCA_MPT_FLAG_REGION        (1 <<  8)
73
74#define MTHCA_MTT_FLAG_PRESENT       1
75
76#define MTHCA_MPT_STATUS_SW 0xF0
77#define MTHCA_MPT_STATUS_HW 0x00
78
79/*
80 * Buddy allocator for MTT segments (currently not very efficient
81 * since it doesn't keep a free list and just searches linearly
82 * through the bitmaps)
83 */
84
85static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
86{
87	int o;
88	int m;
89	u32 seg;
90
91	spin_lock(&buddy->lock);
92
93	for (o = order; o <= buddy->max_order; ++o) {
94		m = 1 << (buddy->max_order - o);
95		seg = find_first_bit(buddy->bits[o], m);
96		if (seg < m)
97			goto found;
98	}
99
100	spin_unlock(&buddy->lock);
101	return -1;
102
103 found:
104	clear_bit(seg, buddy->bits[o]);
105
106	while (o > order) {
107		--o;
108		seg <<= 1;
109		set_bit(seg ^ 1, buddy->bits[o]);
110	}
111
112	spin_unlock(&buddy->lock);
113
114	seg <<= order;
115
116	return seg;
117}
118
119static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
120{
121	seg >>= order;
122
123	spin_lock(&buddy->lock);
124
125	while (test_bit(seg ^ 1, buddy->bits[order])) {
126		clear_bit(seg ^ 1, buddy->bits[order]);
127		seg >>= 1;
128		++order;
129	}
130
131	set_bit(seg, buddy->bits[order]);
132
133	spin_unlock(&buddy->lock);
134}
135
136static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
137{
138	int i, s;
139
140	buddy->max_order = max_order;
141	spin_lock_init(&buddy->lock);
142
143	buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *),
144			      GFP_KERNEL);
145	if (!buddy->bits)
146		goto err_out;
147
148	memset(buddy->bits, 0, (buddy->max_order + 1) * sizeof (long *));
149
150	for (i = 0; i <= buddy->max_order; ++i) {
151		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
152		buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
153		if (!buddy->bits[i])
154			goto err_out_free;
155		bitmap_zero(buddy->bits[i],
156			    1 << (buddy->max_order - i));
157	}
158
159	set_bit(0, buddy->bits[buddy->max_order]);
160
161	return 0;
162
163err_out_free:
164	for (i = 0; i <= buddy->max_order; ++i)
165		kfree(buddy->bits[i]);
166
167	kfree(buddy->bits);
168
169err_out:
170	return -ENOMEM;
171}
172
173static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
174{
175	int i;
176
177	for (i = 0; i <= buddy->max_order; ++i)
178		kfree(buddy->bits[i]);
179
180	kfree(buddy->bits);
181}
182
183static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
184				 struct mthca_buddy *buddy)
185{
186	u32 seg = mthca_buddy_alloc(buddy, order);
187
188	if (seg == -1)
189		return -1;
190
191	if (mthca_is_memfree(dev))
192		if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
193					  seg + (1 << order) - 1)) {
194			mthca_buddy_free(buddy, seg, order);
195			seg = -1;
196		}
197
198	return seg;
199}
200
201static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
202					   struct mthca_buddy *buddy)
203{
204	struct mthca_mtt *mtt;
205	int i;
206
207	if (size <= 0)
208		return ERR_PTR(-EINVAL);
209
210	mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
211	if (!mtt)
212		return ERR_PTR(-ENOMEM);
213
214	mtt->buddy = buddy;
215	mtt->order = 0;
216	for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
217		++mtt->order;
218
219	mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
220	if (mtt->first_seg == -1) {
221		kfree(mtt);
222		return ERR_PTR(-ENOMEM);
223	}
224
225	return mtt;
226}
227
228struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
229{
230	return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
231}
232
233void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
234{
235	if (!mtt)
236		return;
237
238	mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
239
240	mthca_table_put_range(dev, dev->mr_table.mtt_table,
241			      mtt->first_seg,
242			      mtt->first_seg + (1 << mtt->order) - 1);
243
244	kfree(mtt);
245}
246
247int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
248		    int start_index, u64 *buffer_list, int list_len)
249{
250	struct mthca_mailbox *mailbox;
251	u64 *mtt_entry;
252	int err = 0;
253	u8 status;
254	int i;
255
256	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
257	if (IS_ERR(mailbox))
258		return PTR_ERR(mailbox);
259	mtt_entry = mailbox->buf;
260
261	while (list_len > 0) {
262		mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
263					   mtt->first_seg * MTHCA_MTT_SEG_SIZE +
264					   start_index * 8);
265		mtt_entry[1] = 0;
266		for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
267			mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
268						       MTHCA_MTT_FLAG_PRESENT);
269
270		/*
271		 * If we have an odd number of entries to write, add
272		 * one more dummy entry for firmware efficiency.
273		 */
274		if (i & 1)
275			mtt_entry[i + 2] = 0;
276
277		err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
278		if (err) {
279			mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
280			goto out;
281		}
282		if (status) {
283			mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
284				   status);
285			err = -EINVAL;
286			goto out;
287		}
288
289		list_len    -= i;
290		start_index += i;
291		buffer_list += i;
292	}
293
294out:
295	mthca_free_mailbox(dev, mailbox);
296	return err;
297}
298
299static inline u32 tavor_hw_index_to_key(u32 ind)
300{
301	return ind;
302}
303
304static inline u32 tavor_key_to_hw_index(u32 key)
305{
306	return key;
307}
308
309static inline u32 arbel_hw_index_to_key(u32 ind)
310{
311	return (ind >> 24) | (ind << 8);
312}
313
314static inline u32 arbel_key_to_hw_index(u32 key)
315{
316	return (key << 24) | (key >> 8);
317}
318
319static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
320{
321	if (mthca_is_memfree(dev))
322		return arbel_hw_index_to_key(ind);
323	else
324		return tavor_hw_index_to_key(ind);
325}
326
327static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
328{
329	if (mthca_is_memfree(dev))
330		return arbel_key_to_hw_index(key);
331	else
332		return tavor_key_to_hw_index(key);
333}
334
335int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
336		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
337{
338	struct mthca_mailbox *mailbox;
339	struct mthca_mpt_entry *mpt_entry;
340	u32 key;
341	int i;
342	int err;
343	u8 status;
344
345	might_sleep();
346
347	WARN_ON(buffer_size_shift >= 32);
348
349	key = mthca_alloc(&dev->mr_table.mpt_alloc);
350	if (key == -1)
351		return -ENOMEM;
352	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
353
354	if (mthca_is_memfree(dev)) {
355		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
356		if (err)
357			goto err_out_mpt_free;
358	}
359
360	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
361	if (IS_ERR(mailbox)) {
362		err = PTR_ERR(mailbox);
363		goto err_out_table;
364	}
365	mpt_entry = mailbox->buf;
366
367	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
368				       MTHCA_MPT_FLAG_MIO         |
369				       MTHCA_MPT_FLAG_REGION      |
370				       access);
371	if (!mr->mtt)
372		mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
373
374	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
375	mpt_entry->key       = cpu_to_be32(key);
376	mpt_entry->pd        = cpu_to_be32(pd);
377	mpt_entry->start     = cpu_to_be64(iova);
378	mpt_entry->length    = cpu_to_be64(total_size);
379
380	memset(&mpt_entry->lkey, 0,
381	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
382
383	if (mr->mtt)
384		mpt_entry->mtt_seg =
385			cpu_to_be64(dev->mr_table.mtt_base +
386				    mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
387
388	if (0) {
389		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
390		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
391			if (i % 4 == 0)
392				printk("[%02x] ", i * 4);
393			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
394			if ((i + 1) % 4 == 0)
395				printk("\n");
396		}
397	}
398
399	err = mthca_SW2HW_MPT(dev, mailbox,
400			      key & (dev->limits.num_mpts - 1),
401			      &status);
402	if (err) {
403		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
404		goto err_out_mailbox;
405	} else if (status) {
406		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
407			   status);
408		err = -EINVAL;
409		goto err_out_mailbox;
410	}
411
412	mthca_free_mailbox(dev, mailbox);
413	return err;
414
415err_out_mailbox:
416	mthca_free_mailbox(dev, mailbox);
417
418err_out_table:
419	mthca_table_put(dev, dev->mr_table.mpt_table, key);
420
421err_out_mpt_free:
422	mthca_free(&dev->mr_table.mpt_alloc, key);
423	return err;
424}
425
426int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
427			   u32 access, struct mthca_mr *mr)
428{
429	mr->mtt = NULL;
430	return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
431}
432
433int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
434			u64 *buffer_list, int buffer_size_shift,
435			int list_len, u64 iova, u64 total_size,
436			u32 access, struct mthca_mr *mr)
437{
438	int err;
439
440	mr->mtt = mthca_alloc_mtt(dev, list_len);
441	if (IS_ERR(mr->mtt))
442		return PTR_ERR(mr->mtt);
443
444	err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
445	if (err) {
446		mthca_free_mtt(dev, mr->mtt);
447		return err;
448	}
449
450	err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
451			     total_size, access, mr);
452	if (err)
453		mthca_free_mtt(dev, mr->mtt);
454
455	return err;
456}
457
458/* Free mr or fmr */
459static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
460{
461	mthca_table_put(dev, dev->mr_table.mpt_table,
462			arbel_key_to_hw_index(lkey));
463
464	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
465}
466
467void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
468{
469	int err;
470	u8 status;
471
472	might_sleep();
473
474	err = mthca_HW2SW_MPT(dev, NULL,
475			      key_to_hw_index(dev, mr->ibmr.lkey) &
476			      (dev->limits.num_mpts - 1),
477			      &status);
478	if (err)
479		mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
480	else if (status)
481		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
482			   status);
483
484	mthca_free_region(dev, mr->ibmr.lkey);
485	mthca_free_mtt(dev, mr->mtt);
486}
487
488int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
489		    u32 access, struct mthca_fmr *mr)
490{
491	struct mthca_mpt_entry *mpt_entry;
492	struct mthca_mailbox *mailbox;
493	u64 mtt_seg;
494	u32 key, idx;
495	u8 status;
496	int list_len = mr->attr.max_pages;
497	int err = -ENOMEM;
498	int i;
499
500	might_sleep();
501
502	if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
503		return -EINVAL;
504
505	/* For Arbel, all MTTs must fit in the same page. */
506	if (mthca_is_memfree(dev) &&
507	    mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
508		return -EINVAL;
509
510	mr->maps = 0;
511
512	key = mthca_alloc(&dev->mr_table.mpt_alloc);
513	if (key == -1)
514		return -ENOMEM;
515
516	idx = key & (dev->limits.num_mpts - 1);
517	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
518
519	if (mthca_is_memfree(dev)) {
520		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
521		if (err)
522			goto err_out_mpt_free;
523
524		mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
525		BUG_ON(!mr->mem.arbel.mpt);
526	} else
527		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
528		       	sizeof *(mr->mem.tavor.mpt) * idx;
529
530	mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
531	if (IS_ERR(mr->mtt))
532		goto err_out_table;
533
534	mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
535
536	if (mthca_is_memfree(dev)) {
537		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
538						      mr->mtt->first_seg);
539		BUG_ON(!mr->mem.arbel.mtts);
540	} else
541		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
542
543	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
544	if (IS_ERR(mailbox))
545		goto err_out_free_mtt;
546
547	mpt_entry = mailbox->buf;
548
549	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
550				       MTHCA_MPT_FLAG_MIO         |
551				       MTHCA_MPT_FLAG_REGION      |
552				       access);
553
554	mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
555	mpt_entry->key       = cpu_to_be32(key);
556	mpt_entry->pd        = cpu_to_be32(pd);
557	memset(&mpt_entry->start, 0,
558	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
559	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
560
561	if (0) {
562		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
563		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
564			if (i % 4 == 0)
565				printk("[%02x] ", i * 4);
566			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
567			if ((i + 1) % 4 == 0)
568				printk("\n");
569		}
570	}
571
572	err = mthca_SW2HW_MPT(dev, mailbox,
573			      key & (dev->limits.num_mpts - 1),
574			      &status);
575	if (err) {
576		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
577		goto err_out_mailbox_free;
578	}
579	if (status) {
580		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
581			   status);
582		err = -EINVAL;
583		goto err_out_mailbox_free;
584	}
585
586	mthca_free_mailbox(dev, mailbox);
587	return 0;
588
589err_out_mailbox_free:
590	mthca_free_mailbox(dev, mailbox);
591
592err_out_free_mtt:
593	mthca_free_mtt(dev, mr->mtt);
594
595err_out_table:
596	mthca_table_put(dev, dev->mr_table.mpt_table, key);
597
598err_out_mpt_free:
599	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
600	return err;
601}
602
603int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
604{
605	if (fmr->maps)
606		return -EBUSY;
607
608	mthca_free_region(dev, fmr->ibmr.lkey);
609	mthca_free_mtt(dev, fmr->mtt);
610
611	return 0;
612}
613
614static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
615				  int list_len, u64 iova)
616{
617	int i, page_mask;
618
619	if (list_len > fmr->attr.max_pages)
620		return -EINVAL;
621
622	page_mask = (1 << fmr->attr.page_size) - 1;
623
624	/* We are getting page lists, so va must be page aligned. */
625	if (iova & page_mask)
626		return -EINVAL;
627
628	/* Trust the user not to pass misaligned data in page_list */
629	if (0)
630		for (i = 0; i < list_len; ++i) {
631			if (page_list[i] & ~page_mask)
632				return -EINVAL;
633		}
634
635	if (fmr->maps >= fmr->attr.max_maps)
636		return -EINVAL;
637
638	return 0;
639}
640
641
642int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
643			     int list_len, u64 iova)
644{
645	struct mthca_fmr *fmr = to_mfmr(ibfmr);
646	struct mthca_dev *dev = to_mdev(ibfmr->device);
647	struct mthca_mpt_entry mpt_entry;
648	u32 key;
649	int i, err;
650
651	err = mthca_check_fmr(fmr, page_list, list_len, iova);
652	if (err)
653		return err;
654
655	++fmr->maps;
656
657	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
658	key += dev->limits.num_mpts;
659	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
660
661	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
662
663	for (i = 0; i < list_len; ++i) {
664		__be64 mtt_entry = cpu_to_be64(page_list[i] |
665					       MTHCA_MTT_FLAG_PRESENT);
666		mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
667	}
668
669	mpt_entry.lkey   = cpu_to_be32(key);
670	mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
671	mpt_entry.start  = cpu_to_be64(iova);
672
673	writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
674	memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
675		    offsetof(struct mthca_mpt_entry, window_count) -
676		    offsetof(struct mthca_mpt_entry, start));
677
678	writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
679
680	return 0;
681}
682
683int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
684			     int list_len, u64 iova)
685{
686	struct mthca_fmr *fmr = to_mfmr(ibfmr);
687	struct mthca_dev *dev = to_mdev(ibfmr->device);
688	u32 key;
689	int i, err;
690
691	err = mthca_check_fmr(fmr, page_list, list_len, iova);
692	if (err)
693		return err;
694
695	++fmr->maps;
696
697	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
698	key += dev->limits.num_mpts;
699	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
700
701	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
702
703	wmb();
704
705	for (i = 0; i < list_len; ++i)
706		fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
707						     MTHCA_MTT_FLAG_PRESENT);
708
709	fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
710	fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
711	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
712	fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
713
714	wmb();
715
716	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
717
718	wmb();
719
720	return 0;
721}
722
723void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
724{
725	u32 key;
726
727	if (!fmr->maps)
728		return;
729
730	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
731	key &= dev->limits.num_mpts - 1;
732	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
733
734	fmr->maps = 0;
735
736	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
737}
738
739void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
740{
741	u32 key;
742
743	if (!fmr->maps)
744		return;
745
746	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
747	key &= dev->limits.num_mpts - 1;
748	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
749
750	fmr->maps = 0;
751
752	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
753}
754
755int __devinit mthca_init_mr_table(struct mthca_dev *dev)
756{
757	int err, i;
758
759	err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
760			       dev->limits.num_mpts,
761			       ~0, dev->limits.reserved_mrws);
762	if (err)
763		return err;
764
765	if (!mthca_is_memfree(dev) &&
766	    (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
767		dev->limits.fmr_reserved_mtts = 0;
768	else
769		dev->mthca_flags |= MTHCA_FLAG_FMR;
770
771	err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
772			       fls(dev->limits.num_mtt_segs - 1));
773
774	if (err)
775		goto err_mtt_buddy;
776
777	dev->mr_table.tavor_fmr.mpt_base = NULL;
778	dev->mr_table.tavor_fmr.mtt_base = NULL;
779
780	if (dev->limits.fmr_reserved_mtts) {
781		i = fls(dev->limits.fmr_reserved_mtts - 1);
782
783		if (i >= 31) {
784			mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
785			err = -EINVAL;
786			goto err_fmr_mpt;
787		}
788
789		dev->mr_table.tavor_fmr.mpt_base =
790		       	ioremap(dev->mr_table.mpt_base,
791				(1 << i) * sizeof (struct mthca_mpt_entry));
792
793		if (!dev->mr_table.tavor_fmr.mpt_base) {
794			mthca_warn(dev, "MPT ioremap for FMR failed.\n");
795			err = -ENOMEM;
796			goto err_fmr_mpt;
797		}
798
799		dev->mr_table.tavor_fmr.mtt_base =
800			ioremap(dev->mr_table.mtt_base,
801				(1 << i) * MTHCA_MTT_SEG_SIZE);
802		if (!dev->mr_table.tavor_fmr.mtt_base) {
803			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
804			err = -ENOMEM;
805			goto err_fmr_mtt;
806		}
807
808		err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
809		if (err)
810			goto err_fmr_mtt_buddy;
811
812		/* Prevent regular MRs from using FMR keys */
813		err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
814		if (err)
815			goto err_reserve_fmr;
816
817		dev->mr_table.fmr_mtt_buddy =
818		       	&dev->mr_table.tavor_fmr.mtt_buddy;
819	} else
820		dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
821
822	/* FMR table is always the first, take reserved MTTs out of there */
823	if (dev->limits.reserved_mtts) {
824		i = fls(dev->limits.reserved_mtts - 1);
825
826		if (mthca_alloc_mtt_range(dev, i,
827					  dev->mr_table.fmr_mtt_buddy) == -1) {
828			mthca_warn(dev, "MTT table of order %d is too small.\n",
829				  dev->mr_table.fmr_mtt_buddy->max_order);
830			err = -ENOMEM;
831			goto err_reserve_mtts;
832		}
833	}
834
835	return 0;
836
837err_reserve_mtts:
838err_reserve_fmr:
839	if (dev->limits.fmr_reserved_mtts)
840		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
841
842err_fmr_mtt_buddy:
843	if (dev->mr_table.tavor_fmr.mtt_base)
844		iounmap(dev->mr_table.tavor_fmr.mtt_base);
845
846err_fmr_mtt:
847	if (dev->mr_table.tavor_fmr.mpt_base)
848		iounmap(dev->mr_table.tavor_fmr.mpt_base);
849
850err_fmr_mpt:
851	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
852
853err_mtt_buddy:
854	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
855
856	return err;
857}
858
859void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
860{
861	/* XXX check if any MRs are still allocated? */
862	if (dev->limits.fmr_reserved_mtts)
863		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
864
865	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
866
867	if (dev->mr_table.tavor_fmr.mtt_base)
868		iounmap(dev->mr_table.tavor_fmr.mtt_base);
869	if (dev->mr_table.tavor_fmr.mpt_base)
870		iounmap(dev->mr_table.tavor_fmr.mpt_base);
871
872	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
873}
874