mthca_memfree.c revision f02b16bea2d8411b21a531fc381e066985895387
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 * $Id$
35 */
36
37#include <linux/mm.h>
38
39#include "mthca_memfree.h"
40#include "mthca_dev.h"
41#include "mthca_cmd.h"
42
43/*
44 * We allocate in as big chunks as we can, up to a maximum of 256 KB
45 * per chunk.
46 */
47enum {
48	MTHCA_ICM_ALLOC_SIZE   = 1 << 18,
49	MTHCA_TABLE_CHUNK_SIZE = 1 << 18
50};
51
52struct mthca_user_db_table {
53	struct semaphore mutex;
54	struct {
55		u64                uvirt;
56		struct scatterlist mem;
57		int                refcount;
58	}                page[0];
59};
60
61void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
62{
63	struct mthca_icm_chunk *chunk, *tmp;
64	int i;
65
66	if (!icm)
67		return;
68
69	list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
70		if (chunk->nsg > 0)
71			pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
72				     PCI_DMA_BIDIRECTIONAL);
73
74		for (i = 0; i < chunk->npages; ++i)
75			__free_pages(chunk->mem[i].page,
76				     get_order(chunk->mem[i].length));
77
78		kfree(chunk);
79	}
80
81	kfree(icm);
82}
83
84struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
85				  unsigned int gfp_mask)
86{
87	struct mthca_icm *icm;
88	struct mthca_icm_chunk *chunk = NULL;
89	int cur_order;
90
91	icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
92	if (!icm)
93		return icm;
94
95	icm->refcount = 0;
96	INIT_LIST_HEAD(&icm->chunk_list);
97
98	cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);
99
100	while (npages > 0) {
101		if (!chunk) {
102			chunk = kmalloc(sizeof *chunk,
103					gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
104			if (!chunk)
105				goto fail;
106
107			chunk->npages = 0;
108			chunk->nsg    = 0;
109			list_add_tail(&chunk->list, &icm->chunk_list);
110		}
111
112		while (1 << cur_order > npages)
113			--cur_order;
114
115		chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
116		if (chunk->mem[chunk->npages].page) {
117			chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
118			chunk->mem[chunk->npages].offset = 0;
119
120			if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) {
121				chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
122							chunk->npages,
123							PCI_DMA_BIDIRECTIONAL);
124
125				if (chunk->nsg <= 0)
126					goto fail;
127
128				chunk = NULL;
129			}
130
131			npages -= 1 << cur_order;
132		} else {
133			--cur_order;
134			if (cur_order < 0)
135				goto fail;
136		}
137	}
138
139	if (chunk) {
140		chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
141					chunk->npages,
142					PCI_DMA_BIDIRECTIONAL);
143
144		if (chunk->nsg <= 0)
145			goto fail;
146	}
147
148	return icm;
149
150fail:
151	mthca_free_icm(dev, icm);
152	return NULL;
153}
154
155int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
156{
157	int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
158	int ret = 0;
159	u8 status;
160
161	down(&table->mutex);
162
163	if (table->icm[i]) {
164		++table->icm[i]->refcount;
165		goto out;
166	}
167
168	table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
169					(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
170					__GFP_NOWARN);
171	if (!table->icm[i]) {
172		ret = -ENOMEM;
173		goto out;
174	}
175
176	if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
177			  &status) || status) {
178		mthca_free_icm(dev, table->icm[i]);
179		table->icm[i] = NULL;
180		ret = -ENOMEM;
181		goto out;
182	}
183
184	++table->icm[i]->refcount;
185
186out:
187	up(&table->mutex);
188	return ret;
189}
190
191void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
192{
193	int i;
194	u8 status;
195
196	if (!mthca_is_memfree(dev))
197		return;
198
199	i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
200
201	down(&table->mutex);
202
203	if (--table->icm[i]->refcount == 0) {
204		mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
205				MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
206		mthca_free_icm(dev, table->icm[i]);
207		table->icm[i] = NULL;
208	}
209
210	up(&table->mutex);
211}
212
213void *mthca_table_find(struct mthca_icm_table *table, int obj)
214{
215	int idx, offset, i;
216	struct mthca_icm_chunk *chunk;
217	struct mthca_icm *icm;
218	struct page *page = NULL;
219
220	if (!table->lowmem)
221		return NULL;
222
223	down(&table->mutex);
224
225	idx = (obj & (table->num_obj - 1)) * table->obj_size;
226	icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
227	offset = idx % MTHCA_TABLE_CHUNK_SIZE;
228
229	if (!icm)
230		goto out;
231
232	list_for_each_entry(chunk, &icm->chunk_list, list) {
233		for (i = 0; i < chunk->npages; ++i) {
234			if (chunk->mem[i].length >= offset) {
235				page = chunk->mem[i].page;
236				break;
237			}
238			offset -= chunk->mem[i].length;
239		}
240	}
241
242out:
243	up(&table->mutex);
244	return page ? lowmem_page_address(page) + offset : NULL;
245}
246
247int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
248			  int start, int end)
249{
250	int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size;
251	int i, err;
252
253	for (i = start; i <= end; i += inc) {
254		err = mthca_table_get(dev, table, i);
255		if (err)
256			goto fail;
257	}
258
259	return 0;
260
261fail:
262	while (i > start) {
263		i -= inc;
264		mthca_table_put(dev, table, i);
265	}
266
267	return err;
268}
269
270void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
271			   int start, int end)
272{
273	int i;
274
275	if (!mthca_is_memfree(dev))
276		return;
277
278	for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
279		mthca_table_put(dev, table, i);
280}
281
282struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
283					      u64 virt, int obj_size,
284					      int nobj, int reserved,
285					      int use_lowmem)
286{
287	struct mthca_icm_table *table;
288	int num_icm;
289	unsigned chunk_size;
290	int i;
291	u8 status;
292
293	num_icm = (obj_size * nobj + MTHCA_TABLE_CHUNK_SIZE - 1) / MTHCA_TABLE_CHUNK_SIZE;
294
295	table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
296	if (!table)
297		return NULL;
298
299	table->virt     = virt;
300	table->num_icm  = num_icm;
301	table->num_obj  = nobj;
302	table->obj_size = obj_size;
303	table->lowmem   = use_lowmem;
304	init_MUTEX(&table->mutex);
305
306	for (i = 0; i < num_icm; ++i)
307		table->icm[i] = NULL;
308
309	for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
310		chunk_size = MTHCA_TABLE_CHUNK_SIZE;
311		if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size)
312			chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE;
313
314		table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
315						(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
316						__GFP_NOWARN);
317		if (!table->icm[i])
318			goto err;
319		if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
320				  &status) || status) {
321			mthca_free_icm(dev, table->icm[i]);
322			table->icm[i] = NULL;
323			goto err;
324		}
325
326		/*
327		 * Add a reference to this ICM chunk so that it never
328		 * gets freed (since it contains reserved firmware objects).
329		 */
330		++table->icm[i]->refcount;
331	}
332
333	return table;
334
335err:
336	for (i = 0; i < num_icm; ++i)
337		if (table->icm[i]) {
338			mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
339					MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
340			mthca_free_icm(dev, table->icm[i]);
341		}
342
343	kfree(table);
344
345	return NULL;
346}
347
348void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
349{
350	int i;
351	u8 status;
352
353	for (i = 0; i < table->num_icm; ++i)
354		if (table->icm[i]) {
355			mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
356					MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
357			mthca_free_icm(dev, table->icm[i]);
358		}
359
360	kfree(table);
361}
362
363static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
364{
365	return dev->uar_table.uarc_base +
366		uar->index * dev->uar_table.uarc_size +
367		page * 4096;
368}
369
370int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
371		      struct mthca_user_db_table *db_tab, int index, u64 uaddr)
372{
373	int ret = 0;
374	u8 status;
375	int i;
376
377	if (!mthca_is_memfree(dev))
378		return 0;
379
380	if (index < 0 || index > dev->uar_table.uarc_size / 8)
381		return -EINVAL;
382
383	down(&db_tab->mutex);
384
385	i = index / MTHCA_DB_REC_PER_PAGE;
386
387	if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE)       ||
388	    (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
389	    (uaddr & 4095)) {
390		ret = -EINVAL;
391		goto out;
392	}
393
394	if (db_tab->page[i].refcount) {
395		++db_tab->page[i].refcount;
396		goto out;
397	}
398
399	ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
400			     &db_tab->page[i].mem.page, NULL);
401	if (ret < 0)
402		goto out;
403
404	db_tab->page[i].mem.length = 4096;
405	db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
406
407	ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
408	if (ret < 0) {
409		put_page(db_tab->page[i].mem.page);
410		goto out;
411	}
412
413	ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
414				 mthca_uarc_virt(dev, uar, i), &status);
415	if (!ret && status)
416		ret = -EINVAL;
417	if (ret) {
418		pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
419		put_page(db_tab->page[i].mem.page);
420		goto out;
421	}
422
423	db_tab->page[i].uvirt    = uaddr;
424	db_tab->page[i].refcount = 1;
425
426out:
427	up(&db_tab->mutex);
428	return ret;
429}
430
431void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
432			 struct mthca_user_db_table *db_tab, int index)
433{
434	if (!mthca_is_memfree(dev))
435		return;
436
437	/*
438	 * To make our bookkeeping simpler, we don't unmap DB
439	 * pages until we clean up the whole db table.
440	 */
441
442	down(&db_tab->mutex);
443
444	--db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
445
446	up(&db_tab->mutex);
447}
448
449struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
450{
451	struct mthca_user_db_table *db_tab;
452	int npages;
453	int i;
454
455	if (!mthca_is_memfree(dev))
456		return NULL;
457
458	npages = dev->uar_table.uarc_size / 4096;
459	db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
460	if (!db_tab)
461		return ERR_PTR(-ENOMEM);
462
463	init_MUTEX(&db_tab->mutex);
464	for (i = 0; i < npages; ++i) {
465		db_tab->page[i].refcount = 0;
466		db_tab->page[i].uvirt    = 0;
467	}
468
469	return db_tab;
470}
471
472void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
473			       struct mthca_user_db_table *db_tab)
474{
475	int i;
476	u8 status;
477
478	if (!mthca_is_memfree(dev))
479		return;
480
481	for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
482		if (db_tab->page[i].uvirt) {
483			mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
484			pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
485			put_page(db_tab->page[i].mem.page);
486		}
487	}
488}
489
490int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db)
491{
492	int group;
493	int start, end, dir;
494	int i, j;
495	struct mthca_db_page *page;
496	int ret = 0;
497	u8 status;
498
499	down(&dev->db_tab->mutex);
500
501	switch (type) {
502	case MTHCA_DB_TYPE_CQ_ARM:
503	case MTHCA_DB_TYPE_SQ:
504		group = 0;
505		start = 0;
506		end   = dev->db_tab->max_group1;
507		dir   = 1;
508		break;
509
510	case MTHCA_DB_TYPE_CQ_SET_CI:
511	case MTHCA_DB_TYPE_RQ:
512	case MTHCA_DB_TYPE_SRQ:
513		group = 1;
514		start = dev->db_tab->npages - 1;
515		end   = dev->db_tab->min_group2;
516		dir   = -1;
517		break;
518
519	default:
520		ret = -EINVAL;
521		goto out;
522	}
523
524	for (i = start; i != end; i += dir)
525		if (dev->db_tab->page[i].db_rec &&
526		    !bitmap_full(dev->db_tab->page[i].used,
527				 MTHCA_DB_REC_PER_PAGE)) {
528			page = dev->db_tab->page + i;
529			goto found;
530		}
531
532	for (i = start; i != end; i += dir)
533		if (!dev->db_tab->page[i].db_rec) {
534			page = dev->db_tab->page + i;
535			goto alloc;
536		}
537
538	if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
539		ret = -ENOMEM;
540		goto out;
541	}
542
543	if (group == 0)
544		++dev->db_tab->max_group1;
545	else
546		--dev->db_tab->min_group2;
547
548	page = dev->db_tab->page + end;
549
550alloc:
551	page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096,
552					  &page->mapping, GFP_KERNEL);
553	if (!page->db_rec) {
554		ret = -ENOMEM;
555		goto out;
556	}
557	memset(page->db_rec, 0, 4096);
558
559	ret = mthca_MAP_ICM_page(dev, page->mapping,
560				 mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
561	if (!ret && status)
562		ret = -EINVAL;
563	if (ret) {
564		dma_free_coherent(&dev->pdev->dev, 4096,
565				  page->db_rec, page->mapping);
566		goto out;
567	}
568
569	bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
570
571found:
572	j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
573	set_bit(j, page->used);
574
575	if (group == 1)
576		j = MTHCA_DB_REC_PER_PAGE - 1 - j;
577
578	ret = i * MTHCA_DB_REC_PER_PAGE + j;
579
580	page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
581
582	*db = (__be32 *) &page->db_rec[j];
583
584out:
585	up(&dev->db_tab->mutex);
586
587	return ret;
588}
589
590void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
591{
592	int i, j;
593	struct mthca_db_page *page;
594	u8 status;
595
596	i = db_index / MTHCA_DB_REC_PER_PAGE;
597	j = db_index % MTHCA_DB_REC_PER_PAGE;
598
599	page = dev->db_tab->page + i;
600
601	down(&dev->db_tab->mutex);
602
603	page->db_rec[j] = 0;
604	if (i >= dev->db_tab->min_group2)
605		j = MTHCA_DB_REC_PER_PAGE - 1 - j;
606	clear_bit(j, page->used);
607
608	if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
609	    i >= dev->db_tab->max_group1 - 1) {
610		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
611
612		dma_free_coherent(&dev->pdev->dev, 4096,
613				  page->db_rec, page->mapping);
614		page->db_rec = NULL;
615
616		if (i == dev->db_tab->max_group1) {
617			--dev->db_tab->max_group1;
618			/* XXX may be able to unmap more pages now */
619		}
620		if (i == dev->db_tab->min_group2)
621			++dev->db_tab->min_group2;
622	}
623
624	up(&dev->db_tab->mutex);
625}
626
627int mthca_init_db_tab(struct mthca_dev *dev)
628{
629	int i;
630
631	if (!mthca_is_memfree(dev))
632		return 0;
633
634	dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
635	if (!dev->db_tab)
636		return -ENOMEM;
637
638	init_MUTEX(&dev->db_tab->mutex);
639
640	dev->db_tab->npages     = dev->uar_table.uarc_size / 4096;
641	dev->db_tab->max_group1 = 0;
642	dev->db_tab->min_group2 = dev->db_tab->npages - 1;
643
644	dev->db_tab->page = kmalloc(dev->db_tab->npages *
645				    sizeof *dev->db_tab->page,
646				    GFP_KERNEL);
647	if (!dev->db_tab->page) {
648		kfree(dev->db_tab);
649		return -ENOMEM;
650	}
651
652	for (i = 0; i < dev->db_tab->npages; ++i)
653		dev->db_tab->page[i].db_rec = NULL;
654
655	return 0;
656}
657
658void mthca_cleanup_db_tab(struct mthca_dev *dev)
659{
660	int i;
661	u8 status;
662
663	if (!mthca_is_memfree(dev))
664		return;
665
666	/*
667	 * Because we don't always free our UARC pages when they
668	 * become empty to make mthca_free_db() simpler we need to
669	 * make a sweep through the doorbell pages and free any
670	 * leftover pages now.
671	 */
672	for (i = 0; i < dev->db_tab->npages; ++i) {
673		if (!dev->db_tab->page[i].db_rec)
674			continue;
675
676		if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
677			mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
678
679		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
680
681		dma_free_coherent(&dev->pdev->dev, 4096,
682				  dev->db_tab->page[i].db_rec,
683				  dev->db_tab->page[i].mapping);
684	}
685
686	kfree(dev->db_tab->page);
687	kfree(dev->db_tab);
688}
689