mthca_mr.c revision a03a5a67b243e9a24805ee18272ad25e5b2ca92c
1/*
2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#include <linux/slab.h>
36#include <linux/init.h>
37#include <linux/errno.h>
38
39#include "mthca_dev.h"
40#include "mthca_cmd.h"
41#include "mthca_memfree.h"
42
43/*
44 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
45 */
46struct mthca_mpt_entry {
47	u32 flags;
48	u32 page_size;
49	u32 key;
50	u32 pd;
51	u64 start;
52	u64 length;
53	u32 lkey;
54	u32 window_count;
55	u32 window_count_limit;
56	u64 mtt_seg;
57	u32 mtt_sz;		/* Arbel only */
58	u32 reserved[2];
59} __attribute__((packed));
60
61#define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
62#define MTHCA_MPT_FLAG_MIO           (1 << 17)
63#define MTHCA_MPT_FLAG_BIND_ENABLE   (1 << 15)
64#define MTHCA_MPT_FLAG_PHYSICAL      (1 <<  9)
65#define MTHCA_MPT_FLAG_REGION        (1 <<  8)
66
67#define MTHCA_MTT_FLAG_PRESENT       1
68
69#define MTHCA_MPT_STATUS_SW 0xF0
70#define MTHCA_MPT_STATUS_HW 0x00
71
72/*
73 * Buddy allocator for MTT segments (currently not very efficient
74 * since it doesn't keep a free list and just searches linearly
75 * through the bitmaps)
76 */
77
78static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
79{
80	int o;
81	int m;
82	u32 seg;
83
84	spin_lock(&buddy->lock);
85
86	for (o = order; o <= buddy->max_order; ++o) {
87		m = 1 << (buddy->max_order - o);
88		seg = find_first_bit(buddy->bits[o], m);
89		if (seg < m)
90			goto found;
91	}
92
93	spin_unlock(&buddy->lock);
94	return -1;
95
96 found:
97	clear_bit(seg, buddy->bits[o]);
98
99	while (o > order) {
100		--o;
101		seg <<= 1;
102		set_bit(seg ^ 1, buddy->bits[o]);
103	}
104
105	spin_unlock(&buddy->lock);
106
107	seg <<= order;
108
109	return seg;
110}
111
112static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
113{
114	seg >>= order;
115
116	spin_lock(&buddy->lock);
117
118	while (test_bit(seg ^ 1, buddy->bits[order])) {
119		clear_bit(seg ^ 1, buddy->bits[order]);
120		seg >>= 1;
121		++order;
122	}
123
124	set_bit(seg, buddy->bits[order]);
125
126	spin_unlock(&buddy->lock);
127}
128
129static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
130{
131	int i, s;
132
133	buddy->max_order = max_order;
134	spin_lock_init(&buddy->lock);
135
136	buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *),
137			      GFP_KERNEL);
138	if (!buddy->bits)
139		goto err_out;
140
141	memset(buddy->bits, 0, (buddy->max_order + 1) * sizeof (long *));
142
143	for (i = 0; i <= buddy->max_order; ++i) {
144		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
145		buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
146		if (!buddy->bits[i])
147			goto err_out_free;
148		bitmap_zero(buddy->bits[i],
149			    1 << (buddy->max_order - i));
150	}
151
152	set_bit(0, buddy->bits[buddy->max_order]);
153
154	return 0;
155
156err_out_free:
157	for (i = 0; i <= buddy->max_order; ++i)
158		kfree(buddy->bits[i]);
159
160	kfree(buddy->bits);
161
162err_out:
163	return -ENOMEM;
164}
165
166static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
167{
168	int i;
169
170	for (i = 0; i <= buddy->max_order; ++i)
171		kfree(buddy->bits[i]);
172
173	kfree(buddy->bits);
174}
175
176static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
177			   struct mthca_buddy *buddy)
178{
179	u32 seg = mthca_buddy_alloc(buddy, order);
180
181	if (seg == -1)
182		return -1;
183
184	if (mthca_is_memfree(dev))
185		if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
186					  seg + (1 << order) - 1)) {
187			mthca_buddy_free(buddy, seg, order);
188			seg = -1;
189		}
190
191	return seg;
192}
193
194static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order,
195			   struct mthca_buddy* buddy)
196{
197	mthca_buddy_free(buddy, seg, order);
198	mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
199			      seg + (1 << order) - 1);
200}
201
202static inline u32 tavor_hw_index_to_key(u32 ind)
203{
204	return ind;
205}
206
207static inline u32 tavor_key_to_hw_index(u32 key)
208{
209	return key;
210}
211
212static inline u32 arbel_hw_index_to_key(u32 ind)
213{
214	return (ind >> 24) | (ind << 8);
215}
216
217static inline u32 arbel_key_to_hw_index(u32 key)
218{
219	return (key << 24) | (key >> 8);
220}
221
222static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
223{
224	if (mthca_is_memfree(dev))
225		return arbel_hw_index_to_key(ind);
226	else
227		return tavor_hw_index_to_key(ind);
228}
229
230static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
231{
232	if (mthca_is_memfree(dev))
233		return arbel_key_to_hw_index(key);
234	else
235		return tavor_key_to_hw_index(key);
236}
237
238int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
239			   u32 access, struct mthca_mr *mr)
240{
241	void *mailbox = NULL;
242	struct mthca_mpt_entry *mpt_entry;
243	u32 key;
244	int err;
245	u8 status;
246
247	might_sleep();
248
249	mr->order = -1;
250	key = mthca_alloc(&dev->mr_table.mpt_alloc);
251	if (key == -1)
252		return -ENOMEM;
253	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
254
255	if (mthca_is_memfree(dev)) {
256		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
257		if (err)
258			goto err_out_mpt_free;
259	}
260
261	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
262			  GFP_KERNEL);
263	if (!mailbox) {
264		err = -ENOMEM;
265		goto err_out_table;
266	}
267	mpt_entry = MAILBOX_ALIGN(mailbox);
268
269	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
270				       MTHCA_MPT_FLAG_MIO         |
271				       MTHCA_MPT_FLAG_PHYSICAL    |
272				       MTHCA_MPT_FLAG_REGION      |
273				       access);
274	mpt_entry->page_size = 0;
275	mpt_entry->key       = cpu_to_be32(key);
276	mpt_entry->pd        = cpu_to_be32(pd);
277	mpt_entry->start     = 0;
278	mpt_entry->length    = ~0ULL;
279
280	memset(&mpt_entry->lkey, 0,
281	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
282
283	err = mthca_SW2HW_MPT(dev, mpt_entry,
284			      key & (dev->limits.num_mpts - 1),
285			      &status);
286	if (err) {
287		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
288		goto err_out_table;
289	} else if (status) {
290		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
291			   status);
292		err = -EINVAL;
293		goto err_out_table;
294	}
295
296	kfree(mailbox);
297	return err;
298
299err_out_table:
300	mthca_table_put(dev, dev->mr_table.mpt_table, key);
301
302err_out_mpt_free:
303	mthca_free(&dev->mr_table.mpt_alloc, key);
304	kfree(mailbox);
305	return err;
306}
307
308int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
309			u64 *buffer_list, int buffer_size_shift,
310			int list_len, u64 iova, u64 total_size,
311			u32 access, struct mthca_mr *mr)
312{
313	void *mailbox;
314	u64 *mtt_entry;
315	struct mthca_mpt_entry *mpt_entry;
316	u32 key;
317	int err = -ENOMEM;
318	u8 status;
319	int i;
320
321	might_sleep();
322	WARN_ON(buffer_size_shift >= 32);
323
324	key = mthca_alloc(&dev->mr_table.mpt_alloc);
325	if (key == -1)
326		return -ENOMEM;
327	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
328
329	if (mthca_is_memfree(dev)) {
330		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
331		if (err)
332			goto err_out_mpt_free;
333	}
334
335	for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
336	     i < list_len;
337	     i <<= 1, ++mr->order)
338		; /* nothing */
339
340	mr->first_seg = mthca_alloc_mtt(dev, mr->order,
341				       	&dev->mr_table.mtt_buddy);
342	if (mr->first_seg == -1)
343		goto err_out_table;
344
345	/*
346	 * If list_len is odd, we add one more dummy entry for
347	 * firmware efficiency.
348	 */
349	mailbox = kmalloc(max(sizeof *mpt_entry,
350			      (size_t) 8 * (list_len + (list_len & 1) + 2)) +
351			  MTHCA_CMD_MAILBOX_EXTRA,
352			  GFP_KERNEL);
353	if (!mailbox)
354		goto err_out_free_mtt;
355
356	mtt_entry = MAILBOX_ALIGN(mailbox);
357
358	mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
359				   mr->first_seg * MTHCA_MTT_SEG_SIZE);
360	mtt_entry[1] = 0;
361	for (i = 0; i < list_len; ++i)
362		mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
363					       MTHCA_MTT_FLAG_PRESENT);
364	if (list_len & 1) {
365		mtt_entry[i + 2] = 0;
366		++list_len;
367	}
368
369	if (0) {
370		mthca_dbg(dev, "Dumping MPT entry\n");
371		for (i = 0; i < list_len + 2; ++i)
372			printk(KERN_ERR "[%2d] %016llx\n",
373			       i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
374	}
375
376	err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
377	if (err) {
378		mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
379		goto err_out_mailbox_free;
380	}
381	if (status) {
382		mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
383			   status);
384		err = -EINVAL;
385		goto err_out_mailbox_free;
386	}
387
388	mpt_entry = MAILBOX_ALIGN(mailbox);
389
390	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
391				       MTHCA_MPT_FLAG_MIO         |
392				       MTHCA_MPT_FLAG_REGION      |
393				       access);
394
395	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
396	mpt_entry->key       = cpu_to_be32(key);
397	mpt_entry->pd        = cpu_to_be32(pd);
398	mpt_entry->start     = cpu_to_be64(iova);
399	mpt_entry->length    = cpu_to_be64(total_size);
400	memset(&mpt_entry->lkey, 0,
401	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
402	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base +
403					   mr->first_seg * MTHCA_MTT_SEG_SIZE);
404
405	if (0) {
406		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
407		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
408			if (i % 4 == 0)
409				printk("[%02x] ", i * 4);
410			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
411			if ((i + 1) % 4 == 0)
412				printk("\n");
413		}
414	}
415
416	err = mthca_SW2HW_MPT(dev, mpt_entry,
417			      key & (dev->limits.num_mpts - 1),
418			      &status);
419	if (err)
420		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
421	else if (status) {
422		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
423			   status);
424		err = -EINVAL;
425	}
426
427	kfree(mailbox);
428	return err;
429
430err_out_mailbox_free:
431	kfree(mailbox);
432
433err_out_free_mtt:
434	mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);
435
436err_out_table:
437	mthca_table_put(dev, dev->mr_table.mpt_table, key);
438
439err_out_mpt_free:
440	mthca_free(&dev->mr_table.mpt_alloc, key);
441	return err;
442}
443
444/* Free mr or fmr */
445static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
446			      u32 first_seg, struct mthca_buddy *buddy)
447{
448	if (order >= 0)
449		mthca_free_mtt(dev, first_seg, order, buddy);
450
451	mthca_table_put(dev, dev->mr_table.mpt_table,
452			arbel_key_to_hw_index(lkey));
453
454	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
455}
456
457void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
458{
459	int err;
460	u8 status;
461
462	might_sleep();
463
464	err = mthca_HW2SW_MPT(dev, NULL,
465			      key_to_hw_index(dev, mr->ibmr.lkey) &
466			      (dev->limits.num_mpts - 1),
467			      &status);
468	if (err)
469		mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
470	else if (status)
471		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
472			   status);
473
474	mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
475			  &dev->mr_table.mtt_buddy);
476}
477
478int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
479		    u32 access, struct mthca_fmr *mr)
480{
481	struct mthca_mpt_entry *mpt_entry;
482	void *mailbox;
483	u64 mtt_seg;
484	u32 key, idx;
485	u8 status;
486	int list_len = mr->attr.max_pages;
487	int err = -ENOMEM;
488	int i;
489
490	might_sleep();
491
492	if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
493		return -EINVAL;
494
495	/* For Arbel, all MTTs must fit in the same page. */
496	if (mthca_is_memfree(dev) &&
497	    mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
498		return -EINVAL;
499
500	mr->maps = 0;
501
502	key = mthca_alloc(&dev->mr_table.mpt_alloc);
503	if (key == -1)
504		return -ENOMEM;
505
506	idx = key & (dev->limits.num_mpts - 1);
507	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
508
509	if (mthca_is_memfree(dev)) {
510		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
511		if (err)
512			goto err_out_mpt_free;
513
514		mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
515		BUG_ON(!mr->mem.arbel.mpt);
516	} else
517		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
518		       	sizeof *(mr->mem.tavor.mpt) * idx;
519
520	for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
521	     i < list_len;
522	     i <<= 1, ++mr->order)
523		; /* nothing */
524
525	mr->first_seg = mthca_alloc_mtt(dev, mr->order,
526				       	dev->mr_table.fmr_mtt_buddy);
527	if (mr->first_seg == -1)
528		goto err_out_table;
529
530	mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
531
532	if (mthca_is_memfree(dev)) {
533		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
534						      mr->first_seg);
535		BUG_ON(!mr->mem.arbel.mtts);
536	} else
537		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
538
539	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
540			  GFP_KERNEL);
541	if (!mailbox)
542		goto err_out_free_mtt;
543
544	mpt_entry = MAILBOX_ALIGN(mailbox);
545
546	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
547				       MTHCA_MPT_FLAG_MIO         |
548				       MTHCA_MPT_FLAG_REGION      |
549				       access);
550
551	mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
552	mpt_entry->key       = cpu_to_be32(key);
553	mpt_entry->pd        = cpu_to_be32(pd);
554	memset(&mpt_entry->start, 0,
555	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
556	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
557
558	if (0) {
559		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
560		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
561			if (i % 4 == 0)
562				printk("[%02x] ", i * 4);
563			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
564			if ((i + 1) % 4 == 0)
565				printk("\n");
566		}
567	}
568
569	err = mthca_SW2HW_MPT(dev, mpt_entry,
570			      key & (dev->limits.num_mpts - 1),
571			      &status);
572	if (err) {
573		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
574		goto err_out_mailbox_free;
575	}
576	if (status) {
577		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
578			   status);
579		err = -EINVAL;
580		goto err_out_mailbox_free;
581	}
582
583	kfree(mailbox);
584	return 0;
585
586err_out_mailbox_free:
587	kfree(mailbox);
588
589err_out_free_mtt:
590	mthca_free_mtt(dev, mr->first_seg, mr->order,
591		       dev->mr_table.fmr_mtt_buddy);
592
593err_out_table:
594	mthca_table_put(dev, dev->mr_table.mpt_table, key);
595
596err_out_mpt_free:
597	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
598	return err;
599}
600
601int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
602{
603	if (fmr->maps)
604		return -EBUSY;
605
606	mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
607			  dev->mr_table.fmr_mtt_buddy);
608	return 0;
609}
610
611static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
612				  int list_len, u64 iova)
613{
614	int i, page_mask;
615
616	if (list_len > fmr->attr.max_pages)
617		return -EINVAL;
618
619	page_mask = (1 << fmr->attr.page_size) - 1;
620
621	/* We are getting page lists, so va must be page aligned. */
622	if (iova & page_mask)
623		return -EINVAL;
624
625	/* Trust the user not to pass misaligned data in page_list */
626	if (0)
627		for (i = 0; i < list_len; ++i) {
628			if (page_list[i] & ~page_mask)
629				return -EINVAL;
630		}
631
632	if (fmr->maps >= fmr->attr.max_maps)
633		return -EINVAL;
634
635	return 0;
636}
637
638
639int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
640			     int list_len, u64 iova)
641{
642	struct mthca_fmr *fmr = to_mfmr(ibfmr);
643	struct mthca_dev *dev = to_mdev(ibfmr->device);
644	struct mthca_mpt_entry mpt_entry;
645	u32 key;
646	int i, err;
647
648	err = mthca_check_fmr(fmr, page_list, list_len, iova);
649	if (err)
650		return err;
651
652	++fmr->maps;
653
654	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
655	key += dev->limits.num_mpts;
656	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
657
658	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
659
660	for (i = 0; i < list_len; ++i) {
661		__be64 mtt_entry = cpu_to_be64(page_list[i] |
662					       MTHCA_MTT_FLAG_PRESENT);
663		mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
664	}
665
666	mpt_entry.lkey   = cpu_to_be32(key);
667	mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
668	mpt_entry.start  = cpu_to_be64(iova);
669
670	writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
671	memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
672		    offsetof(struct mthca_mpt_entry, window_count) -
673		    offsetof(struct mthca_mpt_entry, start));
674
675	writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
676
677	return 0;
678}
679
680int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
681			     int list_len, u64 iova)
682{
683	struct mthca_fmr *fmr = to_mfmr(ibfmr);
684	struct mthca_dev *dev = to_mdev(ibfmr->device);
685	u32 key;
686	int i, err;
687
688	err = mthca_check_fmr(fmr, page_list, list_len, iova);
689	if (err)
690		return err;
691
692	++fmr->maps;
693
694	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
695	key += dev->limits.num_mpts;
696	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
697
698	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
699
700	wmb();
701
702	for (i = 0; i < list_len; ++i)
703		fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
704						     MTHCA_MTT_FLAG_PRESENT);
705
706	fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
707	fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
708	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
709	fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
710
711	wmb();
712
713	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
714
715	wmb();
716
717	return 0;
718}
719
720void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
721{
722	u32 key;
723
724	if (!fmr->maps)
725		return;
726
727	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
728	key &= dev->limits.num_mpts - 1;
729	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
730
731	fmr->maps = 0;
732
733	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
734}
735
736void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
737{
738	u32 key;
739
740	if (!fmr->maps)
741		return;
742
743	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
744	key &= dev->limits.num_mpts - 1;
745	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
746
747	fmr->maps = 0;
748
749	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
750}
751
752int __devinit mthca_init_mr_table(struct mthca_dev *dev)
753{
754	int err, i;
755
756	err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
757			       dev->limits.num_mpts,
758			       ~0, dev->limits.reserved_mrws);
759	if (err)
760		return err;
761
762	if (!mthca_is_memfree(dev) &&
763	    (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
764		dev->limits.fmr_reserved_mtts = 0;
765	else
766		dev->mthca_flags |= MTHCA_FLAG_FMR;
767
768	err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
769			       fls(dev->limits.num_mtt_segs - 1));
770
771	if (err)
772		goto err_mtt_buddy;
773
774	dev->mr_table.tavor_fmr.mpt_base = NULL;
775	dev->mr_table.tavor_fmr.mtt_base = NULL;
776
777	if (dev->limits.fmr_reserved_mtts) {
778		i = fls(dev->limits.fmr_reserved_mtts - 1);
779
780		if (i >= 31) {
781			mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
782			err = -EINVAL;
783			goto err_fmr_mpt;
784		}
785
786		dev->mr_table.tavor_fmr.mpt_base =
787		       	ioremap(dev->mr_table.mpt_base,
788				(1 << i) * sizeof (struct mthca_mpt_entry));
789
790		if (!dev->mr_table.tavor_fmr.mpt_base) {
791			mthca_warn(dev, "MPT ioremap for FMR failed.\n");
792			err = -ENOMEM;
793			goto err_fmr_mpt;
794		}
795
796		dev->mr_table.tavor_fmr.mtt_base =
797			ioremap(dev->mr_table.mtt_base,
798				(1 << i) * MTHCA_MTT_SEG_SIZE);
799		if (!dev->mr_table.tavor_fmr.mtt_base) {
800			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
801			err = -ENOMEM;
802			goto err_fmr_mtt;
803		}
804
805		err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
806		if (err)
807			goto err_fmr_mtt_buddy;
808
809		/* Prevent regular MRs from using FMR keys */
810		err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
811		if (err)
812			goto err_reserve_fmr;
813
814		dev->mr_table.fmr_mtt_buddy =
815		       	&dev->mr_table.tavor_fmr.mtt_buddy;
816	} else
817		dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
818
819	/* FMR table is always the first, take reserved MTTs out of there */
820	if (dev->limits.reserved_mtts) {
821		i = fls(dev->limits.reserved_mtts - 1);
822
823		if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
824			mthca_warn(dev, "MTT table of order %d is too small.\n",
825				  dev->mr_table.fmr_mtt_buddy->max_order);
826			err = -ENOMEM;
827			goto err_reserve_mtts;
828		}
829	}
830
831	return 0;
832
833err_reserve_mtts:
834err_reserve_fmr:
835	if (dev->limits.fmr_reserved_mtts)
836		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
837
838err_fmr_mtt_buddy:
839	if (dev->mr_table.tavor_fmr.mtt_base)
840		iounmap(dev->mr_table.tavor_fmr.mtt_base);
841
842err_fmr_mtt:
843	if (dev->mr_table.tavor_fmr.mpt_base)
844		iounmap(dev->mr_table.tavor_fmr.mpt_base);
845
846err_fmr_mpt:
847	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
848
849err_mtt_buddy:
850	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
851
852	return err;
853}
854
855void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
856{
857	/* XXX check if any MRs are still allocated? */
858	if (dev->limits.fmr_reserved_mtts)
859		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
860
861	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
862
863	if (dev->mr_table.tavor_fmr.mtt_base)
864		iounmap(dev->mr_table.tavor_fmr.mtt_base);
865	if (dev->mr_table.tavor_fmr.mpt_base)
866		iounmap(dev->mr_table.tavor_fmr.mpt_base);
867
868	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
869}
870