1/*
2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34#include <linux/mlx4/cq.h>
35#include <linux/slab.h>
36#include <linux/mlx4/qp.h>
37#include <linux/skbuff.h>
38#include <linux/if_ether.h>
39#include <linux/if_vlan.h>
40#include <linux/vmalloc.h>
41
42#include "mlx4_en.h"
43
44
45static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
46			      struct mlx4_en_rx_desc *rx_desc,
47			      struct page_frag *skb_frags,
48			      struct mlx4_en_rx_alloc *ring_alloc,
49			      int i)
50{
51	struct mlx4_en_dev *mdev = priv->mdev;
52	struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
53	struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
54	struct page *page;
55	dma_addr_t dma;
56
57	if (page_alloc->offset == frag_info->last_offset) {
58		/* Allocate new page */
59		page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER);
60		if (!page)
61			return -ENOMEM;
62
63		skb_frags[i].page = page_alloc->page;
64		skb_frags[i].offset = page_alloc->offset;
65		page_alloc->page = page;
66		page_alloc->offset = frag_info->frag_align;
67	} else {
68		page = page_alloc->page;
69		get_page(page);
70
71		skb_frags[i].page = page;
72		skb_frags[i].offset = page_alloc->offset;
73		page_alloc->offset += frag_info->frag_stride;
74	}
75	dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) +
76			     skb_frags[i].offset, frag_info->frag_size,
77			     PCI_DMA_FROMDEVICE);
78	rx_desc->data[i].addr = cpu_to_be64(dma);
79	return 0;
80}
81
82static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
83				  struct mlx4_en_rx_ring *ring)
84{
85	struct mlx4_en_rx_alloc *page_alloc;
86	int i;
87
88	for (i = 0; i < priv->num_frags; i++) {
89		page_alloc = &ring->page_alloc[i];
90		page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
91					       MLX4_EN_ALLOC_ORDER);
92		if (!page_alloc->page)
93			goto out;
94
95		page_alloc->offset = priv->frag_info[i].frag_align;
96		en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
97		       i, page_alloc->page);
98	}
99	return 0;
100
101out:
102	while (i--) {
103		page_alloc = &ring->page_alloc[i];
104		put_page(page_alloc->page);
105		page_alloc->page = NULL;
106	}
107	return -ENOMEM;
108}
109
110static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
111				      struct mlx4_en_rx_ring *ring)
112{
113	struct mlx4_en_rx_alloc *page_alloc;
114	int i;
115
116	for (i = 0; i < priv->num_frags; i++) {
117		page_alloc = &ring->page_alloc[i];
118		en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
119		       i, page_count(page_alloc->page));
120
121		put_page(page_alloc->page);
122		page_alloc->page = NULL;
123	}
124}
125
126
127static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
128				 struct mlx4_en_rx_ring *ring, int index)
129{
130	struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
131	struct skb_frag_struct *skb_frags = ring->rx_info +
132					    (index << priv->log_rx_info);
133	int possible_frags;
134	int i;
135
136	/* Set size and memtype fields */
137	for (i = 0; i < priv->num_frags; i++) {
138		skb_frag_size_set(&skb_frags[i], priv->frag_info[i].frag_size);
139		rx_desc->data[i].byte_count =
140			cpu_to_be32(priv->frag_info[i].frag_size);
141		rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
142	}
143
144	/* If the number of used fragments does not fill up the ring stride,
145	 * remaining (unused) fragments must be padded with null address/size
146	 * and a special memory key */
147	possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
148	for (i = priv->num_frags; i < possible_frags; i++) {
149		rx_desc->data[i].byte_count = 0;
150		rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
151		rx_desc->data[i].addr = 0;
152	}
153}
154
155
156static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
157				   struct mlx4_en_rx_ring *ring, int index)
158{
159	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
160	struct page_frag *skb_frags = ring->rx_info +
161				      (index << priv->log_rx_info);
162	int i;
163
164	for (i = 0; i < priv->num_frags; i++)
165		if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i))
166			goto err;
167
168	return 0;
169
170err:
171	while (i--) {
172		dma_addr_t dma = be64_to_cpu(rx_desc->data[i].addr);
173		pci_unmap_single(priv->mdev->pdev, dma, skb_frags[i].size,
174				 PCI_DMA_FROMDEVICE);
175		put_page(skb_frags[i].page);
176	}
177	return -ENOMEM;
178}
179
180static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
181{
182	*ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
183}
184
185static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
186				 struct mlx4_en_rx_ring *ring,
187				 int index)
188{
189	struct mlx4_en_dev *mdev = priv->mdev;
190	struct page_frag *skb_frags;
191	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
192	dma_addr_t dma;
193	int nr;
194
195	skb_frags = ring->rx_info + (index << priv->log_rx_info);
196	for (nr = 0; nr < priv->num_frags; nr++) {
197		en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
198		dma = be64_to_cpu(rx_desc->data[nr].addr);
199
200		en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
201		pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
202				 PCI_DMA_FROMDEVICE);
203		put_page(skb_frags[nr].page);
204	}
205}
206
207static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
208{
209	struct mlx4_en_rx_ring *ring;
210	int ring_ind;
211	int buf_ind;
212	int new_size;
213
214	for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
215		for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
216			ring = &priv->rx_ring[ring_ind];
217
218			if (mlx4_en_prepare_rx_desc(priv, ring,
219						    ring->actual_size)) {
220				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
221					en_err(priv, "Failed to allocate "
222						     "enough rx buffers\n");
223					return -ENOMEM;
224				} else {
225					new_size = rounddown_pow_of_two(ring->actual_size);
226					en_warn(priv, "Only %d buffers allocated "
227						      "reducing ring size to %d",
228						ring->actual_size, new_size);
229					goto reduce_rings;
230				}
231			}
232			ring->actual_size++;
233			ring->prod++;
234		}
235	}
236	return 0;
237
238reduce_rings:
239	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
240		ring = &priv->rx_ring[ring_ind];
241		while (ring->actual_size > new_size) {
242			ring->actual_size--;
243			ring->prod--;
244			mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
245		}
246	}
247
248	return 0;
249}
250
251static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
252				struct mlx4_en_rx_ring *ring)
253{
254	int index;
255
256	en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
257	       ring->cons, ring->prod);
258
259	/* Unmap and free Rx buffers */
260	BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size);
261	while (ring->cons != ring->prod) {
262		index = ring->cons & ring->size_mask;
263		en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
264		mlx4_en_free_rx_desc(priv, ring, index);
265		++ring->cons;
266	}
267}
268
269int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
270			   struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
271{
272	struct mlx4_en_dev *mdev = priv->mdev;
273	int err;
274	int tmp;
275
276
277	ring->prod = 0;
278	ring->cons = 0;
279	ring->size = size;
280	ring->size_mask = size - 1;
281	ring->stride = stride;
282	ring->log_stride = ffs(ring->stride) - 1;
283	ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
284
285	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
286					sizeof(struct skb_frag_struct));
287	ring->rx_info = vmalloc(tmp);
288	if (!ring->rx_info) {
289		en_err(priv, "Failed allocating rx_info ring\n");
290		return -ENOMEM;
291	}
292	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
293		 ring->rx_info, tmp);
294
295	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
296				 ring->buf_size, 2 * PAGE_SIZE);
297	if (err)
298		goto err_ring;
299
300	err = mlx4_en_map_buffer(&ring->wqres.buf);
301	if (err) {
302		en_err(priv, "Failed to map RX buffer\n");
303		goto err_hwq;
304	}
305	ring->buf = ring->wqres.buf.direct.buf;
306
307	return 0;
308
309err_hwq:
310	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
311err_ring:
312	vfree(ring->rx_info);
313	ring->rx_info = NULL;
314	return err;
315}
316
317int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
318{
319	struct mlx4_en_rx_ring *ring;
320	int i;
321	int ring_ind;
322	int err;
323	int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
324					DS_SIZE * priv->num_frags);
325
326	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
327		ring = &priv->rx_ring[ring_ind];
328
329		ring->prod = 0;
330		ring->cons = 0;
331		ring->actual_size = 0;
332		ring->cqn = priv->rx_cq[ring_ind].mcq.cqn;
333
334		ring->stride = stride;
335		if (ring->stride <= TXBB_SIZE)
336			ring->buf += TXBB_SIZE;
337
338		ring->log_stride = ffs(ring->stride) - 1;
339		ring->buf_size = ring->size * ring->stride;
340
341		memset(ring->buf, 0, ring->buf_size);
342		mlx4_en_update_rx_prod_db(ring);
343
344		/* Initailize all descriptors */
345		for (i = 0; i < ring->size; i++)
346			mlx4_en_init_rx_desc(priv, ring, i);
347
348		/* Initialize page allocators */
349		err = mlx4_en_init_allocator(priv, ring);
350		if (err) {
351			en_err(priv, "Failed initializing ring allocator\n");
352			if (ring->stride <= TXBB_SIZE)
353				ring->buf -= TXBB_SIZE;
354			ring_ind--;
355			goto err_allocator;
356		}
357	}
358	err = mlx4_en_fill_rx_buffers(priv);
359	if (err)
360		goto err_buffers;
361
362	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
363		ring = &priv->rx_ring[ring_ind];
364
365		ring->size_mask = ring->actual_size - 1;
366		mlx4_en_update_rx_prod_db(ring);
367	}
368
369	return 0;
370
371err_buffers:
372	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
373		mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]);
374
375	ring_ind = priv->rx_ring_num - 1;
376err_allocator:
377	while (ring_ind >= 0) {
378		if (priv->rx_ring[ring_ind].stride <= TXBB_SIZE)
379			priv->rx_ring[ring_ind].buf -= TXBB_SIZE;
380		mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]);
381		ring_ind--;
382	}
383	return err;
384}
385
386void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
387			     struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
388{
389	struct mlx4_en_dev *mdev = priv->mdev;
390
391	mlx4_en_unmap_buffer(&ring->wqres.buf);
392	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
393	vfree(ring->rx_info);
394	ring->rx_info = NULL;
395}
396
397void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
398				struct mlx4_en_rx_ring *ring)
399{
400	mlx4_en_free_rx_buf(priv, ring);
401	if (ring->stride <= TXBB_SIZE)
402		ring->buf -= TXBB_SIZE;
403	mlx4_en_destroy_allocator(priv, ring);
404}
405
406
407/* Unmap a completed descriptor and free unused pages */
408static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
409				    struct mlx4_en_rx_desc *rx_desc,
410				    struct page_frag *skb_frags,
411				    struct sk_buff *skb,
412				    struct mlx4_en_rx_alloc *page_alloc,
413				    int length)
414{
415	struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
416	struct mlx4_en_dev *mdev = priv->mdev;
417	struct mlx4_en_frag_info *frag_info;
418	int nr;
419	dma_addr_t dma;
420
421	/* Collect used fragments while replacing them in the HW descirptors */
422	for (nr = 0; nr < priv->num_frags; nr++) {
423		frag_info = &priv->frag_info[nr];
424		if (length <= frag_info->frag_prefix_size)
425			break;
426
427		/* Save page reference in skb */
428		__skb_frag_set_page(&skb_frags_rx[nr], skb_frags[nr].page);
429		skb_frag_size_set(&skb_frags_rx[nr], skb_frags[nr].size);
430		skb_frags_rx[nr].page_offset = skb_frags[nr].offset;
431		skb->truesize += frag_info->frag_stride;
432		dma = be64_to_cpu(rx_desc->data[nr].addr);
433
434		/* Allocate a replacement page */
435		if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr))
436			goto fail;
437
438		/* Unmap buffer */
439		pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]),
440				 PCI_DMA_FROMDEVICE);
441	}
442	/* Adjust size of last fragment to match actual length */
443	if (nr > 0)
444		skb_frag_size_set(&skb_frags_rx[nr - 1],
445			length - priv->frag_info[nr - 1].frag_prefix_size);
446	return nr;
447
448fail:
449	/* Drop all accumulated fragments (which have already been replaced in
450	 * the descriptor) of this packet; remaining fragments are reused... */
451	while (nr > 0) {
452		nr--;
453		__skb_frag_unref(&skb_frags_rx[nr]);
454	}
455	return 0;
456}
457
458
459static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
460				      struct mlx4_en_rx_desc *rx_desc,
461				      struct page_frag *skb_frags,
462				      struct mlx4_en_rx_alloc *page_alloc,
463				      unsigned int length)
464{
465	struct mlx4_en_dev *mdev = priv->mdev;
466	struct sk_buff *skb;
467	void *va;
468	int used_frags;
469	dma_addr_t dma;
470
471	skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN);
472	if (!skb) {
473		en_dbg(RX_ERR, priv, "Failed allocating skb\n");
474		return NULL;
475	}
476	skb->dev = priv->dev;
477	skb_reserve(skb, NET_IP_ALIGN);
478	skb->len = length;
479
480	/* Get pointer to first fragment so we could copy the headers into the
481	 * (linear part of the) skb */
482	va = page_address(skb_frags[0].page) + skb_frags[0].offset;
483
484	if (length <= SMALL_PACKET_SIZE) {
485		/* We are copying all relevant data to the skb - temporarily
486		 * synch buffers for the copy */
487		dma = be64_to_cpu(rx_desc->data[0].addr);
488		dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length,
489					DMA_FROM_DEVICE);
490		skb_copy_to_linear_data(skb, va, length);
491		dma_sync_single_for_device(&mdev->pdev->dev, dma, length,
492					   DMA_FROM_DEVICE);
493		skb->tail += length;
494	} else {
495
496		/* Move relevant fragments to skb */
497		used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags,
498						      skb, page_alloc, length);
499		if (unlikely(!used_frags)) {
500			kfree_skb(skb);
501			return NULL;
502		}
503		skb_shinfo(skb)->nr_frags = used_frags;
504
505		/* Copy headers into the skb linear buffer */
506		memcpy(skb->data, va, HEADER_COPY_SIZE);
507		skb->tail += HEADER_COPY_SIZE;
508
509		/* Skip headers in first fragment */
510		skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE;
511
512		/* Adjust size of first fragment */
513		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], HEADER_COPY_SIZE);
514		skb->data_len = length - HEADER_COPY_SIZE;
515	}
516	return skb;
517}
518
519static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
520{
521	int i;
522	int offset = ETH_HLEN;
523
524	for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
525		if (*(skb->data + offset) != (unsigned char) (i & 0xff))
526			goto out_loopback;
527	}
528	/* Loopback found */
529	priv->loopback_ok = 1;
530
531out_loopback:
532	dev_kfree_skb_any(skb);
533}
534
535int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
536{
537	struct mlx4_en_priv *priv = netdev_priv(dev);
538	struct mlx4_cqe *cqe;
539	struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring];
540	struct page_frag *skb_frags;
541	struct mlx4_en_rx_desc *rx_desc;
542	struct sk_buff *skb;
543	int index;
544	int nr;
545	unsigned int length;
546	int polled = 0;
547	int ip_summed;
548	struct ethhdr *ethh;
549	u64 s_mac;
550
551	if (!priv->port_up)
552		return 0;
553
554	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
555	 * descriptor offset can be deduced from the CQE index instead of
556	 * reading 'cqe->index' */
557	index = cq->mcq.cons_index & ring->size_mask;
558	cqe = &cq->buf[index];
559
560	/* Process all completed CQEs */
561	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
562		    cq->mcq.cons_index & cq->size)) {
563
564		skb_frags = ring->rx_info + (index << priv->log_rx_info);
565		rx_desc = ring->buf + (index << ring->log_stride);
566
567		/*
568		 * make sure we read the CQE after we read the ownership bit
569		 */
570		rmb();
571
572		/* Drop packet on bad receive or bad checksum */
573		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
574						MLX4_CQE_OPCODE_ERROR)) {
575			en_err(priv, "CQE completed in error - vendor "
576				  "syndrom:%d syndrom:%d\n",
577				  ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
578				  ((struct mlx4_err_cqe *) cqe)->syndrome);
579			goto next;
580		}
581		if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
582			en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
583			goto next;
584		}
585
586		/* Get pointer to first fragment since we haven't skb yet and
587		 * cast it to ethhdr struct */
588		ethh = (struct ethhdr *)(page_address(skb_frags[0].page) +
589					 skb_frags[0].offset);
590		s_mac = mlx4_en_mac_to_u64(ethh->h_source);
591
592		/* If source MAC is equal to our own MAC and not performing
593		 * the selftest or flb disabled - drop the packet */
594		if (s_mac == priv->mac &&
595			(!(dev->features & NETIF_F_LOOPBACK) ||
596			 !priv->validate_loopback))
597			goto next;
598
599		/*
600		 * Packet is OK - process it.
601		 */
602		length = be32_to_cpu(cqe->byte_cnt);
603		length -= ring->fcs_del;
604		ring->bytes += length;
605		ring->packets++;
606
607		if (likely(dev->features & NETIF_F_RXCSUM)) {
608			if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
609			    (cqe->checksum == cpu_to_be16(0xffff))) {
610				ring->csum_ok++;
611				/* This packet is eligible for LRO if it is:
612				 * - DIX Ethernet (type interpretation)
613				 * - TCP/IP (v4)
614				 * - without IP options
615				 * - not an IP fragment */
616				if (dev->features & NETIF_F_GRO) {
617					struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
618					if (!gro_skb)
619						goto next;
620
621					nr = mlx4_en_complete_rx_desc(
622						priv, rx_desc,
623						skb_frags, gro_skb,
624						ring->page_alloc, length);
625					if (!nr)
626						goto next;
627
628					skb_shinfo(gro_skb)->nr_frags = nr;
629					gro_skb->len = length;
630					gro_skb->data_len = length;
631					gro_skb->ip_summed = CHECKSUM_UNNECESSARY;
632
633					if (cqe->vlan_my_qpn &
634					    cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) {
635						u16 vid = be16_to_cpu(cqe->sl_vid);
636
637						__vlan_hwaccel_put_tag(gro_skb, vid);
638					}
639
640					if (dev->features & NETIF_F_RXHASH)
641						gro_skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);
642
643					skb_record_rx_queue(gro_skb, cq->ring);
644					napi_gro_frags(&cq->napi);
645
646					goto next;
647				}
648
649				/* LRO not possible, complete processing here */
650				ip_summed = CHECKSUM_UNNECESSARY;
651			} else {
652				ip_summed = CHECKSUM_NONE;
653				ring->csum_none++;
654			}
655		} else {
656			ip_summed = CHECKSUM_NONE;
657			ring->csum_none++;
658		}
659
660		skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags,
661				     ring->page_alloc, length);
662		if (!skb) {
663			priv->stats.rx_dropped++;
664			goto next;
665		}
666
667                if (unlikely(priv->validate_loopback)) {
668			validate_loopback(priv, skb);
669			goto next;
670		}
671
672		skb->ip_summed = ip_summed;
673		skb->protocol = eth_type_trans(skb, dev);
674		skb_record_rx_queue(skb, cq->ring);
675
676		if (dev->features & NETIF_F_RXHASH)
677			skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);
678
679		if (be32_to_cpu(cqe->vlan_my_qpn) &
680		    MLX4_CQE_VLAN_PRESENT_MASK)
681			__vlan_hwaccel_put_tag(skb, be16_to_cpu(cqe->sl_vid));
682
683		/* Push it up the stack */
684		netif_receive_skb(skb);
685
686next:
687		++cq->mcq.cons_index;
688		index = (cq->mcq.cons_index) & ring->size_mask;
689		cqe = &cq->buf[index];
690		if (++polled == budget) {
691			/* We are here because we reached the NAPI budget -
692			 * flush only pending LRO sessions */
693			goto out;
694		}
695	}
696
697out:
698	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
699	mlx4_cq_set_ci(&cq->mcq);
700	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
701	ring->cons = cq->mcq.cons_index;
702	ring->prod += polled; /* Polled descriptors were realocated in place */
703	mlx4_en_update_rx_prod_db(ring);
704	return polled;
705}
706
707
708void mlx4_en_rx_irq(struct mlx4_cq *mcq)
709{
710	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
711	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
712
713	if (priv->port_up)
714		napi_schedule(&cq->napi);
715	else
716		mlx4_en_arm_cq(priv, cq);
717}
718
719/* Rx CQ polling - called by NAPI */
720int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
721{
722	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
723	struct net_device *dev = cq->dev;
724	struct mlx4_en_priv *priv = netdev_priv(dev);
725	int done;
726
727	done = mlx4_en_process_rx_cq(dev, cq, budget);
728
729	/* If we used up all the quota - we're probably not done yet... */
730	if (done == budget)
731		INC_PERF_COUNTER(priv->pstats.napi_quota);
732	else {
733		/* Done for now */
734		napi_complete(napi);
735		mlx4_en_arm_cq(priv, cq);
736	}
737	return done;
738}
739
740
741/* Calculate the last offset position that accommodates a full fragment
742 * (assuming fagment size = stride-align) */
743static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
744{
745	u16 res = MLX4_EN_ALLOC_SIZE % stride;
746	u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;
747
748	en_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
749			    "res:%d offset:%d\n", stride, align, res, offset);
750	return offset;
751}
752
753
754static int frag_sizes[] = {
755	FRAG_SZ0,
756	FRAG_SZ1,
757	FRAG_SZ2,
758	FRAG_SZ3
759};
760
761void mlx4_en_calc_rx_buf(struct net_device *dev)
762{
763	struct mlx4_en_priv *priv = netdev_priv(dev);
764	int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE;
765	int buf_size = 0;
766	int i = 0;
767
768	while (buf_size < eff_mtu) {
769		priv->frag_info[i].frag_size =
770			(eff_mtu > buf_size + frag_sizes[i]) ?
771				frag_sizes[i] : eff_mtu - buf_size;
772		priv->frag_info[i].frag_prefix_size = buf_size;
773		if (!i)	{
774			priv->frag_info[i].frag_align = NET_IP_ALIGN;
775			priv->frag_info[i].frag_stride =
776				ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES);
777		} else {
778			priv->frag_info[i].frag_align = 0;
779			priv->frag_info[i].frag_stride =
780				ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
781		}
782		priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
783						priv, priv->frag_info[i].frag_stride,
784						priv->frag_info[i].frag_align);
785		buf_size += priv->frag_info[i].frag_size;
786		i++;
787	}
788
789	priv->num_frags = i;
790	priv->rx_skb_size = eff_mtu;
791	priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct));
792
793	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
794		  "num_frags:%d):\n", eff_mtu, priv->num_frags);
795	for (i = 0; i < priv->num_frags; i++) {
796		en_dbg(DRV, priv, "  frag:%d - size:%d prefix:%d align:%d "
797				"stride:%d last_offset:%d\n", i,
798				priv->frag_info[i].frag_size,
799				priv->frag_info[i].frag_prefix_size,
800				priv->frag_info[i].frag_align,
801				priv->frag_info[i].frag_stride,
802				priv->frag_info[i].last_offset);
803	}
804}
805
806/* RSS related functions */
807
808static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
809				 struct mlx4_en_rx_ring *ring,
810				 enum mlx4_qp_state *state,
811				 struct mlx4_qp *qp)
812{
813	struct mlx4_en_dev *mdev = priv->mdev;
814	struct mlx4_qp_context *context;
815	int err = 0;
816
817	context = kmalloc(sizeof *context , GFP_KERNEL);
818	if (!context) {
819		en_err(priv, "Failed to allocate qp context\n");
820		return -ENOMEM;
821	}
822
823	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
824	if (err) {
825		en_err(priv, "Failed to allocate qp #%x\n", qpn);
826		goto out;
827	}
828	qp->event = mlx4_en_sqp_event;
829
830	memset(context, 0, sizeof *context);
831	mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
832				qpn, ring->cqn, context);
833	context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
834
835	/* Cancel FCS removal if FW allows */
836	if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
837		context->param3 |= cpu_to_be32(1 << 29);
838		ring->fcs_del = ETH_FCS_LEN;
839	} else
840		ring->fcs_del = 0;
841
842	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
843	if (err) {
844		mlx4_qp_remove(mdev->dev, qp);
845		mlx4_qp_free(mdev->dev, qp);
846	}
847	mlx4_en_update_rx_prod_db(ring);
848out:
849	kfree(context);
850	return err;
851}
852
853/* Allocate rx qp's and configure them according to rss map */
854int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
855{
856	struct mlx4_en_dev *mdev = priv->mdev;
857	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
858	struct mlx4_qp_context context;
859	struct mlx4_rss_context *rss_context;
860	int rss_rings;
861	void *ptr;
862	u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
863			MLX4_RSS_TCP_IPV6);
864	int i, qpn;
865	int err = 0;
866	int good_qps = 0;
867	static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC,
868				0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD,
869				0x593D56D9, 0xF3253C06, 0x2ADC1FFC};
870
871	en_dbg(DRV, priv, "Configuring rss steering\n");
872	err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
873				    priv->rx_ring_num,
874				    &rss_map->base_qpn);
875	if (err) {
876		en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
877		return err;
878	}
879
880	for (i = 0; i < priv->rx_ring_num; i++) {
881		qpn = rss_map->base_qpn + i;
882		err = mlx4_en_config_rss_qp(priv, qpn, &priv->rx_ring[i],
883					    &rss_map->state[i],
884					    &rss_map->qps[i]);
885		if (err)
886			goto rss_err;
887
888		++good_qps;
889	}
890
891	/* Configure RSS indirection qp */
892	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
893	if (err) {
894		en_err(priv, "Failed to allocate RSS indirection QP\n");
895		goto rss_err;
896	}
897	rss_map->indir_qp.event = mlx4_en_sqp_event;
898	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
899				priv->rx_ring[0].cqn, &context);
900
901	if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
902		rss_rings = priv->rx_ring_num;
903	else
904		rss_rings = priv->prof->rss_rings;
905
906	ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
907					+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
908	rss_context = ptr;
909	rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
910					    (rss_map->base_qpn));
911	rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
912	if (priv->mdev->profile.udp_rss) {
913		rss_mask |=  MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
914		rss_context->base_qpn_udp = rss_context->default_qpn;
915	}
916	rss_context->flags = rss_mask;
917	rss_context->hash_fn = MLX4_RSS_HASH_TOP;
918	for (i = 0; i < 10; i++)
919		rss_context->rss_key[i] = rsskey[i];
920
921	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
922			       &rss_map->indir_qp, &rss_map->indir_state);
923	if (err)
924		goto indir_err;
925
926	return 0;
927
928indir_err:
929	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
930		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
931	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
932	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
933rss_err:
934	for (i = 0; i < good_qps; i++) {
935		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
936			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
937		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
938		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
939	}
940	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
941	return err;
942}
943
944void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
945{
946	struct mlx4_en_dev *mdev = priv->mdev;
947	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
948	int i;
949
950	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
951		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
952	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
953	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
954
955	for (i = 0; i < priv->rx_ring_num; i++) {
956		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
957			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
958		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
959		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
960	}
961	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
962}
963
964
965
966
967
968