pagealloc.c revision dabed0e6319a1900c8738676bd197f06b5b5b6cc
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <asm-generic/kmap_types.h>
34#include <linux/kernel.h>
35#include <linux/module.h>
36#include <linux/mlx5/driver.h>
37#include <linux/mlx5/cmd.h>
38#include "mlx5_core.h"
39
40enum {
41	MLX5_PAGES_CANT_GIVE	= 0,
42	MLX5_PAGES_GIVE		= 1,
43	MLX5_PAGES_TAKE		= 2
44};
45
46enum {
47	MLX5_BOOT_PAGES		= 1,
48	MLX5_INIT_PAGES		= 2,
49	MLX5_POST_INIT_PAGES	= 3
50};
51
52struct mlx5_pages_req {
53	struct mlx5_core_dev *dev;
54	u32	func_id;
55	s32	npages;
56	struct work_struct work;
57};
58
59struct fw_page {
60	struct rb_node	rb_node;
61	u64		addr;
62	struct page	*page;
63	u16		func_id;
64};
65
66struct mlx5_query_pages_inbox {
67	struct mlx5_inbox_hdr	hdr;
68	u8			rsvd[8];
69};
70
71struct mlx5_query_pages_outbox {
72	struct mlx5_outbox_hdr	hdr;
73	__be16			rsvd;
74	__be16			func_id;
75	__be32			num_pages;
76};
77
78struct mlx5_manage_pages_inbox {
79	struct mlx5_inbox_hdr	hdr;
80	__be16			rsvd;
81	__be16			func_id;
82	__be32			num_entries;
83	__be64			pas[0];
84};
85
86struct mlx5_manage_pages_outbox {
87	struct mlx5_outbox_hdr	hdr;
88	__be32			num_entries;
89	u8			rsvd[4];
90	__be64			pas[0];
91};
92
93enum {
94	MAX_RECLAIM_TIME_MSECS	= 5000,
95};
96
97static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
98{
99	struct rb_root *root = &dev->priv.page_root;
100	struct rb_node **new = &root->rb_node;
101	struct rb_node *parent = NULL;
102	struct fw_page *nfp;
103	struct fw_page *tfp;
104
105	while (*new) {
106		parent = *new;
107		tfp = rb_entry(parent, struct fw_page, rb_node);
108		if (tfp->addr < addr)
109			new = &parent->rb_left;
110		else if (tfp->addr > addr)
111			new = &parent->rb_right;
112		else
113			return -EEXIST;
114	}
115
116	nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
117	if (!nfp)
118		return -ENOMEM;
119
120	nfp->addr = addr;
121	nfp->page = page;
122	nfp->func_id = func_id;
123
124	rb_link_node(&nfp->rb_node, parent, new);
125	rb_insert_color(&nfp->rb_node, root);
126
127	return 0;
128}
129
130static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
131{
132	struct rb_root *root = &dev->priv.page_root;
133	struct rb_node *tmp = root->rb_node;
134	struct page *result = NULL;
135	struct fw_page *tfp;
136
137	while (tmp) {
138		tfp = rb_entry(tmp, struct fw_page, rb_node);
139		if (tfp->addr < addr) {
140			tmp = tmp->rb_left;
141		} else if (tfp->addr > addr) {
142			tmp = tmp->rb_right;
143		} else {
144			rb_erase(&tfp->rb_node, root);
145			result = tfp->page;
146			kfree(tfp);
147			break;
148		}
149	}
150
151	return result;
152}
153
154static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
155				s32 *npages, int boot)
156{
157	struct mlx5_query_pages_inbox	in;
158	struct mlx5_query_pages_outbox	out;
159	int err;
160
161	memset(&in, 0, sizeof(in));
162	memset(&out, 0, sizeof(out));
163	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
164	in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES);
165
166	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
167	if (err)
168		return err;
169
170	if (out.hdr.status)
171		return mlx5_cmd_status_to_err(&out.hdr);
172
173	*npages = be32_to_cpu(out.num_pages);
174	*func_id = be16_to_cpu(out.func_id);
175
176	return err;
177}
178
179static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
180		      int notify_fail)
181{
182	struct mlx5_manage_pages_inbox *in;
183	struct mlx5_manage_pages_outbox out;
184	struct page *page;
185	int inlen;
186	u64 addr;
187	int err;
188	int i;
189
190	inlen = sizeof(*in) + npages * sizeof(in->pas[0]);
191	in = mlx5_vzalloc(inlen);
192	if (!in) {
193		mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
194		return -ENOMEM;
195	}
196	memset(&out, 0, sizeof(out));
197
198	for (i = 0; i < npages; i++) {
199		page = alloc_page(GFP_HIGHUSER);
200		if (!page) {
201			err = -ENOMEM;
202			mlx5_core_warn(dev, "failed to allocate page\n");
203			goto out_alloc;
204		}
205		addr = dma_map_page(&dev->pdev->dev, page, 0,
206				    PAGE_SIZE, DMA_BIDIRECTIONAL);
207		if (dma_mapping_error(&dev->pdev->dev, addr)) {
208			mlx5_core_warn(dev, "failed dma mapping page\n");
209			__free_page(page);
210			err = -ENOMEM;
211			goto out_alloc;
212		}
213		err = insert_page(dev, addr, page, func_id);
214		if (err) {
215			mlx5_core_err(dev, "failed to track allocated page\n");
216			dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
217			__free_page(page);
218			err = -ENOMEM;
219			goto out_alloc;
220		}
221		in->pas[i] = cpu_to_be64(addr);
222	}
223
224	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
225	in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
226	in->func_id = cpu_to_be16(func_id);
227	in->num_entries = cpu_to_be32(npages);
228	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
229	mlx5_core_dbg(dev, "err %d\n", err);
230	if (err) {
231		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
232		goto out_alloc;
233	}
234	dev->priv.fw_pages += npages;
235
236	if (out.hdr.status) {
237		err = mlx5_cmd_status_to_err(&out.hdr);
238		if (err) {
239			mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", func_id, npages, out.hdr.status);
240			goto out_alloc;
241		}
242	}
243
244	mlx5_core_dbg(dev, "err %d\n", err);
245
246	goto out_free;
247
248out_alloc:
249	if (notify_fail) {
250		memset(in, 0, inlen);
251		memset(&out, 0, sizeof(out));
252		in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
253		in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
254		if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
255			mlx5_core_warn(dev, "\n");
256	}
257	for (i--; i >= 0; i--) {
258		addr = be64_to_cpu(in->pas[i]);
259		page = remove_page(dev, addr);
260		if (!page) {
261			mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
262				      addr);
263			continue;
264		}
265		dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
266		__free_page(page);
267	}
268
269out_free:
270	mlx5_vfree(in);
271	return err;
272}
273
274static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
275			 int *nclaimed)
276{
277	struct mlx5_manage_pages_inbox   in;
278	struct mlx5_manage_pages_outbox *out;
279	struct page *page;
280	int num_claimed;
281	int outlen;
282	u64 addr;
283	int err;
284	int i;
285
286	if (nclaimed)
287		*nclaimed = 0;
288
289	memset(&in, 0, sizeof(in));
290	outlen = sizeof(*out) + npages * sizeof(out->pas[0]);
291	out = mlx5_vzalloc(outlen);
292	if (!out)
293		return -ENOMEM;
294
295	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
296	in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
297	in.func_id = cpu_to_be16(func_id);
298	in.num_entries = cpu_to_be32(npages);
299	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
300	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
301	if (err) {
302		mlx5_core_err(dev, "failed recliaming pages\n");
303		goto out_free;
304	}
305	dev->priv.fw_pages -= npages;
306
307	if (out->hdr.status) {
308		err = mlx5_cmd_status_to_err(&out->hdr);
309		goto out_free;
310	}
311
312	num_claimed = be32_to_cpu(out->num_entries);
313	if (nclaimed)
314		*nclaimed = num_claimed;
315
316	for (i = 0; i < num_claimed; i++) {
317		addr = be64_to_cpu(out->pas[i]);
318		page = remove_page(dev, addr);
319		if (!page) {
320			mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
321		} else {
322			dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
323			__free_page(page);
324		}
325	}
326
327out_free:
328	mlx5_vfree(out);
329	return err;
330}
331
332static void pages_work_handler(struct work_struct *work)
333{
334	struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
335	struct mlx5_core_dev *dev = req->dev;
336	int err = 0;
337
338	if (req->npages < 0)
339		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
340	else if (req->npages > 0)
341		err = give_pages(dev, req->func_id, req->npages, 1);
342
343	if (err)
344		mlx5_core_warn(dev, "%s fail %d\n", req->npages < 0 ?
345			       "reclaim" : "give", err);
346
347	kfree(req);
348}
349
350void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
351				 s32 npages)
352{
353	struct mlx5_pages_req *req;
354
355	req = kzalloc(sizeof(*req), GFP_ATOMIC);
356	if (!req) {
357		mlx5_core_warn(dev, "failed to allocate pages request\n");
358		return;
359	}
360
361	req->dev = dev;
362	req->func_id = func_id;
363	req->npages = npages;
364	INIT_WORK(&req->work, pages_work_handler);
365	queue_work(dev->priv.pg_wq, &req->work);
366}
367
368int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
369{
370	u16 uninitialized_var(func_id);
371	s32 uninitialized_var(npages);
372	int err;
373
374	err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
375	if (err)
376		return err;
377
378	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
379		      npages, boot ? "boot" : "init", func_id);
380
381	return give_pages(dev, func_id, npages, 0);
382}
383
384static int optimal_reclaimed_pages(void)
385{
386	struct mlx5_cmd_prot_block *block;
387	struct mlx5_cmd_layout *lay;
388	int ret;
389
390	ret = (sizeof(lay->in) + sizeof(block->data) -
391	       sizeof(struct mlx5_manage_pages_outbox)) / 8;
392
393	return ret;
394}
395
396int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
397{
398	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
399	struct fw_page *fwp;
400	struct rb_node *p;
401	int nclaimed = 0;
402	int err;
403
404	do {
405		p = rb_first(&dev->priv.page_root);
406		if (p) {
407			fwp = rb_entry(p, struct fw_page, rb_node);
408			err = reclaim_pages(dev, fwp->func_id,
409					    optimal_reclaimed_pages(),
410					    &nclaimed);
411			if (err) {
412				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err);
413				return err;
414			}
415			if (nclaimed)
416				end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
417		}
418		if (time_after(jiffies, end)) {
419			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
420			break;
421		}
422	} while (p);
423
424	return 0;
425}
426
427void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
428{
429	dev->priv.page_root = RB_ROOT;
430}
431
432void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
433{
434	/* nothing */
435}
436
437int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
438{
439	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
440	if (!dev->priv.pg_wq)
441		return -ENOMEM;
442
443	return 0;
444}
445
446void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
447{
448	destroy_workqueue(dev->priv.pg_wq);
449}
450