1#!/bin/sh
2##############################################################################
3#                                                                            #
4# Copyright (c) International Business Machines  Corp., 2007                 #
5#               Sivakumar Chinnaiah, Sivakumar.C@in.ibm.com                  #
6# Copyright (c) Linux Test Project, 2016                                     #
7#                                                                            #
8# This program is free software: you can redistribute it and/or modify       #
9# it under the terms of the GNU General Public License as published by       #
10# the Free Software Foundation, either version 3 of the License, or          #
11# (at your option) any later version.                                        #
12#                                                                            #
13# This program is distributed in the hope that it will be useful,            #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of             #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              #
16# GNU General Public License for more details.                               #
17#                                                                            #
18# You should have received a copy of the GNU General Public License          #
19# along with this program. If not, see <http://www.gnu.org/licenses/>.       #
20#                                                                            #
21##############################################################################
22#                                                                            #
23# Description:  Test Basic functionality of numactl command.                 #
24#               Test #1: Verifies cpunodebind and membind                    #
25#               Test #2: Verifies preferred node bind for memory allocation  #
26#               Test #3: Verifies share memory allocation on preferred node  #
27#               Test #4: Verifies memory interleave on all nodes             #
28#               Test #5: Verifies share memory interleave on all nodes       #
29#               Test #6: Verifies physcpubind                                #
30#               Test #7: Verifies localalloc                                 #
31#               Test #8: Verifies memhog                                     #
32#               Test #9: Verifies numa_node_size api                         #
33#               Test #10:Verifies Migratepages                               #
34#               Test #11:Verifies hugepage alloacted on specified node       #
35#               Test #12:Verifies THP memory allocated on preferred node     #
36#                                                                            #
37##############################################################################
38
39TST_CNT=12
40TST_SETUP=setup
41TST_TESTFUNC=test
42TST_NEEDS_TMPDIR=1
43TST_NEEDS_ROOT=1
44TST_NEEDS_CMDS="numactl numastat awk"
45
46. tst_test.sh
47
48#
49# Extracts the value of given numa node from the `numastat -p` output.
50#
51# $1 - Pid number.
52# $2 - Node number.
53#
54extract_numastat_p()
55{
56	local pid=$1
57	local node=$(($2 + 2))
58
59	echo $(numastat -p $pid |awk '/^Total/ {print $'$node'}')
60}
61
62wait_for_support_numa()
63{
64	local pid=$1
65	local retries=20
66
67	while [ $retries -gt 0 ]; do
68		local state=$(awk '{print $3}' /proc/$pid/stat)
69
70		if [ $state = 'T' ]; then
71			break
72		fi
73
74		retries=$((retries-1))
75		tst_sleep 50ms
76	done
77
78	if [ $retries -le 0 ]; then
79		tst_brk TBROK "Timeouted while waiting for support_numa ($pid)"
80	fi
81}
82
83setup()
84{
85	export MB=$((1024*1024))
86	export PAGE_SIZE=$(getconf PAGE_SIZE)
87	export HPAGE_SIZE=$(awk '/Hugepagesize:/ {print $2}' /proc/meminfo)
88
89	total_nodes=0
90
91	nodes_list=$(numactl --show | grep nodebind | cut -d ':' -f 2)
92	for node in $nodes_list; do
93		total_nodes=$((total_nodes+1))
94	done
95
96	tst_res TINFO "The system contains $total_nodes nodes: $nodes_list"
97	if [ $total_nodes -le 1 ]; then
98		tst_brk TCONF "your machine does not support numa policy
99		or your machine is not a NUMA machine"
100	fi
101}
102
103# Verification of memory allocated on a node
104test1()
105{
106	Mem_curr=0
107
108	for node in $nodes_list; do
109		numactl --cpunodebind=$node --membind=$node support_numa alloc_1MB &
110		pid=$!
111
112		wait_for_support_numa $pid
113
114		Mem_curr=$(echo "$(extract_numastat_p $pid $node) * $MB" |bc)
115		if [ $(echo "$Mem_curr < $MB" | bc) -eq 1 ]; then
116			tst_res TFAIL \
117				"NUMA memory allocated in node$node is less than expected"
118			kill -CONT $pid >/dev/null 2>&1
119			return
120		fi
121
122		kill -CONT $pid >/dev/null 2>&1
123	done
124
125	tst_res TPASS "NUMA local node and memory affinity"
126}
127
128# Verification of memory allocated on preferred node
129test2()
130{
131	Mem_curr=0
132
133	COUNTER=1
134	for node in $nodes_list; do
135
136		if [ $COUNTER -eq $total_nodes ]; then   #wrap up for last node
137			Preferred_node=$(echo $nodes_list | cut -d ' ' -f 1)
138		else
139			# always next node is preferred node
140			Preferred_node=$(echo $nodes_list | cut -d ' ' -f $((COUNTER+1)))
141		fi
142
143		numactl --cpunodebind=$node --preferred=$Preferred_node support_numa alloc_1MB &
144		pid=$!
145
146		wait_for_support_numa $pid
147
148		Mem_curr=$(echo "$(extract_numastat_p $pid $Preferred_node) * $MB" |bc)
149		if [ $(echo "$Mem_curr < $MB" |bc ) -eq 1 ]; then
150			tst_res TFAIL \
151				"NUMA memory allocated in node$Preferred_node is less than expected"
152			kill -CONT $pid >/dev/null 2>&1
153			return
154		fi
155
156		COUNTER=$((COUNTER+1))
157		kill -CONT $pid >/dev/null 2>&1
158	done
159
160	tst_res TPASS "NUMA preferred node policy"
161}
162
163# Verification of share memory allocated on preferred node
164test3()
165{
166	Mem_curr=0
167	COUNTER=1
168
169	for node in $nodes_list; do
170
171		if [ $COUNTER -eq $total_nodes ]   #wrap up for last node
172		then
173			Preferred_node=$(echo $nodes_list | cut -d ' ' -f 1)
174		else
175			# always next node is preferred node
176			Preferred_node=$(echo $nodes_list | cut -d ' ' -f $((COUNTER+1)))
177		fi
178
179		numactl --cpunodebind=$node --preferred=$Preferred_node support_numa alloc_1MB_shared &
180		pid=$!
181
182		wait_for_support_numa $pid
183
184		Mem_curr=$(echo "$(extract_numastat_p $pid $Preferred_node) * $MB" |bc)
185		if [ $(echo "$Mem_curr < $MB" |bc ) -eq 1 ]; then
186			tst_res TFAIL \
187				"NUMA share memory allocated in node$Preferred_node is less than expected"
188			kill -CONT $pid >/dev/null 2>&1
189			return
190		fi
191
192		COUNTER=$((COUNTER+1))
193		kill -CONT $pid >/dev/null 2>&1
194	done
195
196	tst_res TPASS "NUMA share memory allocated in preferred node"
197}
198
199# Verification of memory interleaved on all nodes
200test4()
201{
202	Mem_curr=0
203	# Memory will be allocated using round robin on nodes.
204	Exp_incr=$(echo "$MB / $total_nodes" |bc)
205
206	numactl --interleave=all support_numa alloc_1MB &
207	pid=$!
208
209	wait_for_support_numa $pid
210
211	for node in $nodes_list; do
212		Mem_curr=$(echo "$(extract_numastat_p $pid $node) * $MB" |bc)
213
214		if [ $(echo "$Mem_curr < $Exp_incr" |bc ) -eq 1 ]; then
215			tst_res TFAIL \
216				"NUMA interleave memory allocated in node$node is less than expected"
217			kill -CONT $pid >/dev/null 2>&1
218			return
219		fi
220	done
221
222	kill -CONT $pid >/dev/null 2>&1
223	tst_res TPASS "NUMA interleave policy"
224}
225
226# Verification of shared memory interleaved on all nodes
227test5()
228{
229	Mem_curr=0
230	# Memory will be allocated using round robin on nodes.
231	Exp_incr=$(echo "$MB / $total_nodes" |bc)
232
233	numactl --interleave=all support_numa alloc_1MB_shared &
234	pid=$!
235
236	wait_for_support_numa $pid
237
238	for node in $nodes_list; do
239		Mem_curr=$(echo "$(extract_numastat_p $pid $node) * $MB" |bc)
240
241		if [ $(echo "$Mem_curr < $Exp_incr" |bc ) -eq 1 ]; then
242			tst_res TFAIL \
243				"NUMA interleave share memory allocated in node$node is less than expected"
244			kill -CONT $pid >/dev/null 2>&1
245			return
246		fi
247	done
248
249	kill -CONT $pid >/dev/null 2>&1
250
251	tst_res TPASS "NUMA interleave policy on shared memory"
252}
253
254# Verification of physical cpu bind
255test6()
256{
257	no_of_cpus=0	#no. of cpu's exist
258	run_on_cpu=0
259	running_on_cpu=0
260
261	no_of_cpus=$(tst_ncpus)
262	# not sure whether cpu's can't be in odd number
263	run_on_cpu=$(($((no_of_cpus+1))/2))
264	numactl --physcpubind=$run_on_cpu support_numa pause & #just waits for sigint
265	pid=$!
266	var=`awk '{ print $2 }' /proc/$pid/stat`
267	while [ $var = '(numactl)' ]; do
268		var=`awk '{ print $2 }' /proc/$pid/stat`
269		tst_sleep 100ms
270	done
271	# Warning !! 39 represents cpu number, on which process pid is currently running and
272	# this may change if Some more fields are added in the middle, may be in future
273	running_on_cpu=$(awk '{ print $39; }' /proc/$pid/stat)
274	if [ $running_on_cpu -ne $run_on_cpu ]; then
275		tst_res TFAIL \
276			"Process running on cpu$running_on_cpu but expected to run on cpu$run_on_cpu"
277		ROD kill -INT $pid
278		return
279	fi
280
281	ROD kill -INT $pid
282
283	tst_res TPASS "NUMA phycpubind policy"
284}
285
286# Verification of local node allocation
287test7()
288{
289	Mem_curr=0
290
291	for node in $nodes_list; do
292		numactl --cpunodebind=$node --localalloc support_numa alloc_1MB &
293		pid=$!
294
295		wait_for_support_numa $pid
296
297		Mem_curr=$(echo "$(extract_numastat_p $pid $node) * $MB" |bc)
298		if [ $(echo "$Mem_curr < $MB" |bc ) -eq 1 ]; then
299			tst_res TFAIL \
300				"NUMA localnode memory allocated in node$node is less than expected"
301			kill -CONT $pid >/dev/null 2>&1
302			return
303		fi
304
305		kill -CONT $pid >/dev/null 2>&1
306	done
307
308	tst_res TPASS "NUMA local node allocation"
309}
310
311# Verification of memhog with interleave policy
312test8()
313{
314	Mem_curr=0
315	# Memory will be allocated using round robin on nodes.
316	Exp_incr=$(echo "$MB / $total_nodes" |bc)
317
318	numactl --interleave=all memhog -r1000000 1MB 2>&1 >ltp_numa_test8.log &
319	pid=$!
320
321	local retries=20
322	while [ $retries -gt 0 ]; do
323
324		if grep -m1 -q '.' ltp_numa_test8.log; then
325			break
326		fi
327
328		retries=$((retries-1))
329		tst_sleep 50ms
330	done
331
332	for node in $nodes_list; do
333		Mem_curr=$(echo "$(extract_numastat_p $pid $node) * $MB" |bc)
334
335		if [ $(echo "$Mem_curr < $Exp_incr" |bc ) -eq 1 ]; then
336			tst_res TFAIL \
337				"NUMA interleave memhog in node$node is less than expected"
338			kill -KILL $pid >/dev/null 2>&1
339			return
340		fi
341	done
342
343	kill -KILL $pid >/dev/null 2>&1
344	tst_res TPASS "NUMA MEMHOG policy"
345}
346
347# Function:     hardware cheking with numa_node_size api
348#
349# Description:  - Returns the size of available nodes if success.
350#
351# Input:        - o/p of numactl --hardware command which is expected in the format
352#                 shown below
353#               available: 2 nodes (0-1)
354#               node 0 size: 7808 MB
355#               node 0 free: 7457 MB
356#               node 1 size: 5807 MB
357#               node 1 free: 5731 MB
358#               node distances:
359#               node   0   1
360#                 0:  10  20
361#                 1:  20  10
362#
363test9()
364{
365	RC=0
366
367	numactl --hardware > gavail_nodes
368	RC=$(awk '{ if ( NR == 1 ) {print $1;} }' gavail_nodes)
369	if [ $RC = "available:" ]; then
370		RC=$(awk '{ if ( NR == 1 ) {print $3;} }' gavail_nodes)
371		if [ $RC = "nodes" ]; then
372			RC=$(awk '{ if ( NR == 1 ) {print $2;} }' gavail_nodes)
373			tst_res TPASS "NUMA policy on lib NUMA_NODE_SIZE API"
374		else
375			tst_res TFAIL "Failed with numa policy"
376		fi
377	else
378		tst_res TFAIL "Failed with numa policy"
379	fi
380}
381
382# Verification of migratepages
383test10()
384{
385	Mem_curr=0
386	COUNTER=1
387
388	for node in $nodes_list; do
389
390		if [ $COUNTER -eq $total_nodes ]; then
391			Preferred_node=$(echo $nodes_list | cut -d ' ' -f 1)
392		else
393			Preferred_node=$(echo $nodes_list | cut -d ' ' -f $((COUNTER+1)))
394		fi
395
396		numactl --preferred=$node support_numa alloc_1MB &
397		pid=$!
398
399		wait_for_support_numa $pid
400
401		migratepages $pid $node $Preferred_node
402
403		Mem_curr=$(echo "$(extract_numastat_p $pid $Preferred_node) * $MB" |bc)
404		if [ $(echo "$Mem_curr < $MB" |bc ) -eq 1 ]; then
405			tst_res TFAIL \
406				"NUMA migratepages is not working fine"
407			kill -CONT $pid >/dev/null 2>&1
408			return
409		fi
410
411		COUNTER=$((COUNTER+1))
412		kill -CONT $pid >/dev/null 2>&1
413	done
414
415	tst_res TPASS "NUMA MIGRATEPAGES policy"
416}
417
418# Verification of hugepage memory allocated on a node
419test11()
420{
421	Mem_huge=0
422	Sys_node=/sys/devices/system/node
423
424	if [ ! -d "/sys/kernel/mm/hugepages/" ]; then
425		tst_res TCONF "hugepage is not supported"
426		return
427	fi
428
429	for node in $nodes_list; do
430		Ori_hpgs=$(cat ${Sys_node}/node${node}/hugepages/hugepages-${HPAGE_SIZE}kB/nr_hugepages)
431		New_hpgs=$((Ori_hpgs + 1))
432		echo $New_hpgs >${Sys_node}/node${node}/hugepages/hugepages-${HPAGE_SIZE}kB/nr_hugepages
433
434		Chk_hpgs=$(cat ${Sys_node}/node${node}/hugepages/hugepages-${HPAGE_SIZE}kB/nr_hugepages)
435		if [ "$Chk_hpgs" -ne "$New_hpgs" ]; then
436			tst_res TCONF "hugepage is not enough to test"
437			return
438		fi
439
440		numactl --cpunodebind=$node --membind=$node support_numa alloc_1huge_page &
441		pid=$!
442		wait_for_support_numa $pid
443
444		Mem_huge=$(echo $(numastat -p $pid |awk '/^Huge/ {print $'$((node+2))'}'))
445		Mem_huge=$((${Mem_huge%.*} * 1024))
446
447		if [ "$Mem_huge" -lt "$HPAGE_SIZE" ]; then
448			tst_res TFAIL \
449				"NUMA memory allocated in node$node is less than expected"
450			kill -CONT $pid >/dev/null 2>&1
451			echo $Ori_hpgs >${Sys_node}/node${node}/hugepages/hugepages-${HPAGE_SIZE}kB/nr_hugepages
452			return
453		fi
454
455		kill -CONT $pid >/dev/null 2>&1
456		echo $Ori_hpgs >${Sys_node}/node${node}/hugepages/hugepages-${HPAGE_SIZE}kB/nr_hugepages
457	done
458
459	tst_res TPASS "NUMA local node hugepage memory allocated"
460}
461
462# Verification of THP memory allocated on preferred node
463test12()
464{
465	Mem_curr=0
466
467	if ! grep -q '\[always\]' /sys/kernel/mm/transparent_hugepage/enabled; then
468		tst_res TCONF "THP is not supported/enabled"
469		return
470	fi
471
472	COUNTER=1
473	for node in $nodes_list; do
474
475		if [ $COUNTER -eq $total_nodes ]; then   #wrap up for last node
476			Preferred_node=$(echo $nodes_list | cut -d ' ' -f 1)
477		else
478			# always next node is preferred node
479			Preferred_node=$(echo $nodes_list | cut -d ' ' -f $((COUNTER+1)))
480		fi
481
482		numactl --cpunodebind=$node --preferred=$Preferred_node support_numa alloc_2HPSZ_THP &
483		pid=$!
484
485		wait_for_support_numa $pid
486
487		Mem_curr=$(echo "$(extract_numastat_p $pid $Preferred_node) * 1024" |bc)
488		if [ $(echo "$Mem_curr < $HPAGE_SIZE * 2" |bc ) -eq 1 ]; then
489			tst_res TFAIL \
490				"NUMA memory allocated in node$Preferred_node is less than expected"
491			kill -CONT $pid >/dev/null 2>&1
492			return
493		fi
494
495		COUNTER=$((COUNTER+1))
496		kill -CONT $pid >/dev/null 2>&1
497	done
498
499	tst_res TPASS "NUMA preferred node policy verified with THP enabled"
500}
501
502tst_run
503