13aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra/*
2eec1d4fa00c6552ae2fdf71d59f1eded7c88dd89Hans Rosenfeld * AMD NUMA support.
31da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Discover the memory map and associated nodes.
43aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra *
5eec1d4fa00c6552ae2fdf71d59f1eded7c88dd89Hans Rosenfeld * This version reads it directly from the AMD northbridge.
63aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra *
71da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds * Copyright 2002,2003 Andi Kleen, SuSE Labs.
81da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds */
91da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <linux/kernel.h>
101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <linux/init.h>
111da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <linux/string.h>
121da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <linux/module.h>
131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <linux/nodemask.h>
14a9ce6bc15100023b411f8117e53a016d61889800Yinghai Lu#include <linux/memblock.h>
152706a0bf7b02693ed88752df877f10c2206292ffTejun Heo#include <linux/bootmem.h>
16a9ce6bc15100023b411f8117e53a016d61889800Yinghai Lu
171da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/io.h>
181da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <linux/pci_ids.h>
19cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu#include <linux/acpi.h>
201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/types.h>
211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/mmzone.h>
221da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/proto.h>
231da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/e820.h>
241da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/pci-direct.h>
251da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds#include <asm/numa.h>
26cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu#include <asm/mpspec.h>
27cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu#include <asm/apic.h>
2823ac4ae827e6264e21b898f2cd3f601450aa02a6Andreas Herrmann#include <asm/amd_nb.h>
291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
30f51bf3073a145a5b3263fd882c52d6ec04b687daDavid Rientjesstatic unsigned char __initdata nodeids[8];
318ee2debce32412118cf8c239e0026ace56ea1425David Rientjes
321da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvaldsstatic __init int find_northbridge(void)
331da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds{
343aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra	int num;
351da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
363aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra	for (num = 0; num < 32; num++) {
371da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		u32 header;
383aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra
393aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		header = read_pci_config(0, num, 0, 0x00);
40bb4a1d644a84e7e9d1d1fa9d1c7d1017b02e0947Joachim Deguara		if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) &&
41bb4a1d644a84e7e9d1d1fa9d1c7d1017b02e0947Joachim Deguara			header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) &&
42bb4a1d644a84e7e9d1d1fa9d1c7d1017b02e0947Joachim Deguara			header != (PCI_VENDOR_ID_AMD | (0x1300<<16)))
433aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra			continue;
441da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
453aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		header = read_pci_config(0, num, 1, 0x00);
46bb4a1d644a84e7e9d1d1fa9d1c7d1017b02e0947Joachim Deguara		if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) &&
47bb4a1d644a84e7e9d1d1fa9d1c7d1017b02e0947Joachim Deguara			header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) &&
48bb4a1d644a84e7e9d1d1fa9d1c7d1017b02e0947Joachim Deguara			header != (PCI_VENDOR_ID_AMD | (0x1301<<16)))
493aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra			continue;
503aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		return num;
513aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra	}
521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
53940fed2e79a15cf0d006c860d7811adbe5c19882Tejun Heo	return -ENOENT;
541da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds}
551da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
56cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lustatic __init void early_get_boot_cpu_id(void)
57cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu{
58cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu	/*
59f6e9456c9272bb570df6e217cdbe007e270b1c4eRobert Richter	 * need to get the APIC ID of the BSP so can use that to
60eec1d4fa00c6552ae2fdf71d59f1eded7c88dd89Hans Rosenfeld	 * create apicid_to_node in amd_scan_nodes()
61cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu	 */
62a4caa18efe468acb3522e30763de57a67b3e438bYinghai Lu#ifdef CONFIG_X86_MPPARSE
63cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu	/*
64cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu	 * get boot-time SMP configuration:
65cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu	 */
66cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu	if (smp_found_config)
67cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82aYinghai Lu		early_get_smp_config();
68a4caa18efe468acb3522e30763de57a67b3e438bYinghai Lu#endif
698ee2debce32412118cf8c239e0026ace56ea1425David Rientjes}
708ee2debce32412118cf8c239e0026ace56ea1425David Rientjes
71940fed2e79a15cf0d006c860d7811adbe5c19882Tejun Heoint __init amd_numa_init(void)
728ee2debce32412118cf8c239e0026ace56ea1425David Rientjes{
732706a0bf7b02693ed88752df877f10c2206292ffTejun Heo	u64 start = PFN_PHYS(0);
742706a0bf7b02693ed88752df877f10c2206292ffTejun Heo	u64 end = PFN_PHYS(max_pfn);
758ee2debce32412118cf8c239e0026ace56ea1425David Rientjes	unsigned numnodes;
762706a0bf7b02693ed88752df877f10c2206292ffTejun Heo	u64 prevbase;
7745fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	int i, j, nb;
78d34c08958fa36c7a8c3f8d9c0ebe6ec1ab744a68Thomas Gleixner	u32 nodeid, reg;
7945fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	unsigned int bits, cores, apicid_base;
801da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
810637a70a5db98182d9ad3d6ae1ee30acf20afde9Andi Kleen	if (!early_pci_allowed())
82940fed2e79a15cf0d006c860d7811adbe5c19882Tejun Heo		return -EINVAL;
830637a70a5db98182d9ad3d6ae1ee30acf20afde9Andi Kleen
843aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra	nb = find_northbridge();
853aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra	if (nb < 0)
861da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		return nb;
871da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
881af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes	pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
891da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
903aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra	reg = read_pci_config(0, nb, 0, 0x60);
911da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	numnodes = ((reg >> 4) & 0xF) + 1;
923bea9c9793a17053e05d970e5d90d48fc9fce07dAndi Kleen	if (numnodes <= 1)
93940fed2e79a15cf0d006c860d7811adbe5c19882Tejun Heo		return -ENOENT;
941da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
958ee2debce32412118cf8c239e0026ace56ea1425David Rientjes	pr_info("Number of physical nodes %d\n", numnodes);
961da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
971da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds	prevbase = 0;
983aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra	for (i = 0; i < 8; i++) {
992706a0bf7b02693ed88752df877f10c2206292ffTejun Heo		u64 base, limit;
1003aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra
1011da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		base = read_pci_config(0, nb, 1, 0x40 + i*8);
1021da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
1031da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
104f51bf3073a145a5b3263fd882c52d6ec04b687daDavid Rientjes		nodeids[i] = nodeid = limit & 7;
1053aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		if ((base & 3) == 0) {
1061da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			if (i < numnodes)
1071af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes				pr_info("Skipping disabled node %d\n", i);
1081da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			continue;
1093aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		}
1101da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		if (nodeid >= numnodes) {
1112706a0bf7b02693ed88752df877f10c2206292ffTejun Heo			pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid,
1121af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes				base, limit);
1131da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			continue;
1143aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		}
1151da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1163aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		if (!limit) {
1172706a0bf7b02693ed88752df877f10c2206292ffTejun Heo			pr_info("Skipping node entry %d (base %Lx)\n",
1181af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes				i, base);
1191da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			continue;
1201da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		}
1211da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		if ((base >> 8) & 3 || (limit >> 8) & 3) {
1222706a0bf7b02693ed88752df877f10c2206292ffTejun Heo			pr_err("Node %d using interleaving mode %Lx/%Lx\n",
1231af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes			       nodeid, (base >> 8) & 3, (limit >> 8) & 3);
124940fed2e79a15cf0d006c860d7811adbe5c19882Tejun Heo			return -EINVAL;
1253aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		}
1264697bdcc945c094d2c8a4876a24faeaf31a283e0Tejun Heo		if (node_isset(nodeid, numa_nodes_parsed)) {
1271af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes			pr_info("Node %d already present, skipping\n",
1281af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes				nodeid);
1291da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			continue;
1301da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		}
1311da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1323aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		limit >>= 16;
1333aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		limit <<= 24;
1341da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		limit |= (1<<24)-1;
135ffd10a2b77bca50dd05ba26acd5a6e68bcc8f61fMagnus Damm		limit++;
1361da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1378ee2debce32412118cf8c239e0026ace56ea1425David Rientjes		if (limit > end)
1388ee2debce32412118cf8c239e0026ace56ea1425David Rientjes			limit = end;
1391da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		if (limit <= base)
1403aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra			continue;
1413aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra
1421da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		base >>= 16;
1433aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		base <<= 24;
1443aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra
1453aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		if (base < start)
1463aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra			base = start;
1473aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		if (limit > end)
1483aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra			limit = end;
1493aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		if (limit == base) {
1501af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes			pr_err("Empty node %d\n", nodeid);
1513aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra			continue;
1521da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		}
1533aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		if (limit < base) {
1542706a0bf7b02693ed88752df877f10c2206292ffTejun Heo			pr_err("Node %d bogus settings %Lx-%Lx.\n",
1553aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra			       nodeid, base, limit);
1561da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds			continue;
1573aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		}
1583aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra
1591da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		/* Could sort here, but pun for now. Should not happen anyroads. */
1603aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra		if (prevbase > base) {
1612706a0bf7b02693ed88752df877f10c2206292ffTejun Heo			pr_err("Node map not sorted %Lx,%Lx\n",
1623aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra			       prevbase, base);
163940fed2e79a15cf0d006c860d7811adbe5c19882Tejun Heo			return -EINVAL;
1641da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		}
1653aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra
1662706a0bf7b02693ed88752df877f10c2206292ffTejun Heo		pr_info("Node %d MemBase %016Lx Limit %016Lx\n",
1671af5ba514f0c2f2e2af965a4ffa5e8ab269271b9David Rientjes			nodeid, base, limit);
1683aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra
1691da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds		prevbase = base;
17091556237ec872e1029e3036174bae3b1a8df65ebTejun Heo		numa_add_memblk(nodeid, base, limit);
17192d4a4371eeb89e1e12b9ebbed0956f499b6c2c0Tejun Heo		node_set(nodeid, numa_nodes_parsed);
1723aa88cdf6bcc9e510c0707581131b821a7d3b7cbCarlos R. Mafra	}
1731da177e4c3f41524e886b7f1b8a0c1fc7321cacLinus Torvalds
1744697bdcc945c094d2c8a4876a24faeaf31a283e0Tejun Heo	if (!nodes_weight(numa_nodes_parsed))
175940fed2e79a15cf0d006c860d7811adbe5c19882Tejun Heo		return -ENOENT;
17645fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo
17745fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	/*
17845fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	 * We seem to have valid NUMA configuration.  Map apicids to nodes
17945fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	 * using the coreid bits from early_identify_cpu.
18045fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	 */
18145fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	bits = boot_cpu_data.x86_coreid_bits;
18245fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	cores = 1 << bits;
18345fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	apicid_base = 0;
18445fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo
18545fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	/* get the APIC ID of the BSP early for systems with apicid lifting */
18645fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	early_get_boot_cpu_id();
18745fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	if (boot_cpu_physical_apicid > 0) {
18845fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo		pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
18945fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo		apicid_base = boot_cpu_physical_apicid;
19045fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo	}
19145fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo
19292d4a4371eeb89e1e12b9ebbed0956f499b6c2c0Tejun Heo	for_each_node_mask(i, numa_nodes_parsed)
19345fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo		for (j = apicid_base; j < cores + apicid_base; j++)
19445fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo			set_apicid_to_node((i << bits) + j, i);
19545fe6c78c4ccc384044d1b4877eebe7acf359e76Tejun Heo
1968ee2debce32412118cf8c239e0026ace56ea1425David Rientjes	return 0;
1978ee2debce32412118cf8c239e0026ace56ea1425David Rientjes}
198