1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34/*
35 * This file is conditionally built on x86_64 only.  Otherwise weak symbol
36 * versions of the functions exported from here are used.
37 */
38
39#include <linux/pci.h>
40#include <asm/mtrr.h>
41#include <asm/processor.h>
42
43#include "ipath_kernel.h"
44
45/**
46 * ipath_enable_wc - enable write combining for MMIO writes to the device
47 * @dd: infinipath device
48 *
49 * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
50 * write combining.
51 */
52int ipath_enable_wc(struct ipath_devdata *dd)
53{
54	int ret = 0;
55	u64 pioaddr, piolen;
56	unsigned bits;
57	const unsigned long addr = pci_resource_start(dd->pcidev, 0);
58	const size_t len = pci_resource_len(dd->pcidev, 0);
59
60	/*
61	 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
62	 * chip.  Linux (possibly the hardware) requires it to be on a power
63	 * of 2 address matching the length (which has to be a power of 2).
64	 * For rev1, that means the base address, for rev2, it will be just
65	 * the PIO buffers themselves.
66	 * For chips with two sets of buffers, the calculations are
67	 * somewhat more complicated; we need to sum, and the piobufbase
68	 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
69	 * The buffers are still packed, so a single range covers both.
70	 */
71	if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
72		unsigned long pio2kbase, pio4kbase;
73		pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
74		pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
75		if (pio2kbase < pio4kbase) { /* all, for now */
76			pioaddr = addr + pio2kbase;
77			piolen = pio4kbase - pio2kbase +
78				dd->ipath_piobcnt4k * dd->ipath_4kalign;
79		} else {
80			pioaddr = addr + pio4kbase;
81			piolen = pio2kbase - pio4kbase +
82				dd->ipath_piobcnt2k * dd->ipath_palign;
83		}
84	} else {  /* single buffer size (2K, currently) */
85		pioaddr = addr + dd->ipath_piobufbase;
86		piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
87			dd->ipath_piobcnt4k * dd->ipath_4kalign;
88	}
89
90	for (bits = 0; !(piolen & (1ULL << bits)); bits++)
91		/* do nothing */ ;
92
93	if (piolen != (1ULL << bits)) {
94		piolen >>= bits;
95		while (piolen >>= 1)
96			bits++;
97		piolen = 1ULL << (bits + 1);
98	}
99	if (pioaddr & (piolen - 1)) {
100		u64 atmp;
101		ipath_dbg("pioaddr %llx not on right boundary for size "
102			  "%llx, fixing\n",
103			  (unsigned long long) pioaddr,
104			  (unsigned long long) piolen);
105		atmp = pioaddr & ~(piolen - 1);
106		if (atmp < addr || (atmp + piolen) > (addr + len)) {
107			ipath_dev_err(dd, "No way to align address/size "
108				      "(%llx/%llx), no WC mtrr\n",
109				      (unsigned long long) atmp,
110				      (unsigned long long) piolen << 1);
111			ret = -ENODEV;
112		} else {
113			ipath_dbg("changing WC base from %llx to %llx, "
114				  "len from %llx to %llx\n",
115				  (unsigned long long) pioaddr,
116				  (unsigned long long) atmp,
117				  (unsigned long long) piolen,
118				  (unsigned long long) piolen << 1);
119			pioaddr = atmp;
120			piolen <<= 1;
121		}
122	}
123
124	if (!ret) {
125		int cookie;
126		ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC "
127			   "(addr %llx, len=0x%llx)\n",
128			   (unsigned long long) pioaddr,
129			   (unsigned long long) piolen);
130		cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
131		if (cookie < 0) {
132			{
133				dev_info(&dd->pcidev->dev,
134					 "mtrr_add()  WC for PIO bufs "
135					 "failed (%d)\n",
136					 cookie);
137				ret = -EINVAL;
138			}
139		} else {
140			ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
141				   "cookie is %d\n", cookie);
142			dd->ipath_wc_cookie = cookie;
143			dd->ipath_wc_base = (unsigned long) pioaddr;
144			dd->ipath_wc_len = (unsigned long) piolen;
145		}
146	}
147
148	return ret;
149}
150
151/**
152 * ipath_disable_wc - disable write combining for MMIO writes to the device
153 * @dd: infinipath device
154 */
155void ipath_disable_wc(struct ipath_devdata *dd)
156{
157	if (dd->ipath_wc_cookie) {
158		int r;
159		ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
160		r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
161			     dd->ipath_wc_len);
162		if (r < 0)
163			dev_info(&dd->pcidev->dev,
164				 "mtrr_del(%lx, %lx, %lx) failed: %d\n",
165				 dd->ipath_wc_cookie, dd->ipath_wc_base,
166				 dd->ipath_wc_len, r);
167		dd->ipath_wc_cookie = 0; /* even on failure */
168	}
169}
170
171/**
172 * ipath_unordered_wc - indicate whether write combining is ordered
173 *
174 * Because our performance depends on our ability to do write combining mmio
175 * writes in the most efficient way, we need to know if we are on an Intel
176 * or AMD x86_64 processor.  AMD x86_64 processors flush WC buffers out in
177 * the order completed, and so no special flushing is required to get
178 * correct ordering.  Intel processors, however, will flush write buffers
179 * out in "random" orders, and so explicit ordering is needed at times.
180 */
181int ipath_unordered_wc(void)
182{
183	return boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
184}
185