1e0493a4af57c1a73376a7bafaed542c01f588196Eric Li# Copyright 2007-2010 Google Inc. Released under the GPL v2 2e0493a4af57c1a73376a7bafaed542c01f588196Eric Li__author__ = "duanes (Duane Sand), pdahl (Peter Dahl)" 3e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 4e0493a4af57c1a73376a7bafaed542c01f588196Eric Li# A basic cpuset/cgroup container manager for limiting memory use during tests 5e0493a4af57c1a73376a7bafaed542c01f588196Eric Li# for use on kernels not running some site-specific container manager 6e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 7e0493a4af57c1a73376a7bafaed542c01f588196Eric Liimport os, sys, re, glob, fcntl, logging 8e0493a4af57c1a73376a7bafaed542c01f588196Eric Lifrom autotest_lib.client.bin import utils 9e0493a4af57c1a73376a7bafaed542c01f588196Eric Lifrom autotest_lib.client.common_lib import error 10e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 11e0493a4af57c1a73376a7bafaed542c01f588196Eric LiSUPER_ROOT = '' # root of all containers or cgroups 12e0493a4af57c1a73376a7bafaed542c01f588196Eric LiNO_LIMIT = (1 << 63) - 1 # containername/memory.limit_in_bytes if no limit 13e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 14e0493a4af57c1a73376a7bafaed542c01f588196Eric Li# propio service classes: 15e0493a4af57c1a73376a7bafaed542c01f588196Eric LiPROPIO_PRIO = 1 16e0493a4af57c1a73376a7bafaed542c01f588196Eric LiPROPIO_NORMAL = 2 17e0493a4af57c1a73376a7bafaed542c01f588196Eric LiPROPIO_IDLE = 3 18e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 19e0493a4af57c1a73376a7bafaed542c01f588196Eric Lisuper_root_path = '' # usually '/dev/cgroup'; '/dev/cpuset' on 2.6.18 20e0493a4af57c1a73376a7bafaed542c01f588196Eric Licpuset_prefix = None # usually 'cpuset.'; '' on 2.6.18 21e0493a4af57c1a73376a7bafaed542c01f588196Eric Lifake_numa_containers = False # container mem via numa=fake mem nodes, else pages 22e0493a4af57c1a73376a7bafaed542c01f588196Eric Limem_isolation_on = False 23e0493a4af57c1a73376a7bafaed542c01f588196Eric Linode_mbytes = 0 # mbytes in one typical mem node 24e0493a4af57c1a73376a7bafaed542c01f588196Eric Liroot_container_bytes = 0 # squishy limit on effective size of root container 25e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 26e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 27e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef discover_container_style(): 28e0493a4af57c1a73376a7bafaed542c01f588196Eric Li global super_root_path, cpuset_prefix 29e0493a4af57c1a73376a7bafaed542c01f588196Eric Li global mem_isolation_on, fake_numa_containers 30e0493a4af57c1a73376a7bafaed542c01f588196Eric Li global node_mbytes, root_container_bytes 31e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if super_root_path != '': 32e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return # already looked up 33e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if os.path.exists('/dev/cgroup/tasks'): 34e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # running on 2.6.26 or later kernel with containers on: 35e0493a4af57c1a73376a7bafaed542c01f588196Eric Li super_root_path = '/dev/cgroup' 36e0493a4af57c1a73376a7bafaed542c01f588196Eric Li cpuset_prefix = 'cpuset.' 37e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if get_boot_numa(): 38e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mem_isolation_on = fake_numa_containers = True 39e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: # memcg containers IFF compiled-in & mounted & non-fakenuma boot 40e0493a4af57c1a73376a7bafaed542c01f588196Eric Li fake_numa_containers = False 41e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mem_isolation_on = os.path.exists( 42e0493a4af57c1a73376a7bafaed542c01f588196Eric Li '/dev/cgroup/memory.limit_in_bytes') 43e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # TODO: handle possibility of where memcg is mounted as its own 44e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # cgroup hierarchy, separate from cpuset?? 45e0493a4af57c1a73376a7bafaed542c01f588196Eric Li elif os.path.exists('/dev/cpuset/tasks'): 46e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # running on 2.6.18 kernel with containers on: 47e0493a4af57c1a73376a7bafaed542c01f588196Eric Li super_root_path = '/dev/cpuset' 48e0493a4af57c1a73376a7bafaed542c01f588196Eric Li cpuset_prefix = '' 49e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mem_isolation_on = fake_numa_containers = get_boot_numa() != '' 50e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 51e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # neither cpuset nor cgroup filesystem active: 52e0493a4af57c1a73376a7bafaed542c01f588196Eric Li super_root_path = None 53e0493a4af57c1a73376a7bafaed542c01f588196Eric Li cpuset_prefix = 'no_cpusets_or_cgroups_exist' 54e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mem_isolation_on = fake_numa_containers = False 55e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 56e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('mem_isolation: %s', mem_isolation_on) 57e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('fake_numa_containers: %s', fake_numa_containers) 58e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if fake_numa_containers: 59e0493a4af57c1a73376a7bafaed542c01f588196Eric Li node_mbytes = int(mbytes_per_mem_node()) 60e0493a4af57c1a73376a7bafaed542c01f588196Eric Li elif mem_isolation_on: # memcg-style containers 61e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # For now, limit total of all containers to using just 98% of system's 62e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # visible total ram, to avoid oom events at system level, and avoid 63e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # page reclaim overhead from going above kswapd highwater mark. 64e0493a4af57c1a73376a7bafaed542c01f588196Eric Li system_visible_pages = utils.memtotal() >> 2 65e0493a4af57c1a73376a7bafaed542c01f588196Eric Li usable_pages = int(system_visible_pages * 0.98) 66e0493a4af57c1a73376a7bafaed542c01f588196Eric Li root_container_bytes = usable_pages << 12 67e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('root_container_bytes: %s', 68e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.human_format(root_container_bytes)) 69e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 70e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 71e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef need_mem_containers(): 72e0493a4af57c1a73376a7bafaed542c01f588196Eric Li discover_container_style() 73e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not mem_isolation_on: 74e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise error.AutotestError('Mem-isolation containers not enabled ' 75e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 'by latest reboot') 76e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 77e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef need_fake_numa(): 78e0493a4af57c1a73376a7bafaed542c01f588196Eric Li discover_container_style() 79e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not fake_numa_containers: 80e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise error.AutotestError('fake=numa not enabled by latest reboot') 81e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 82e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 83e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef full_path(container_name): 84e0493a4af57c1a73376a7bafaed542c01f588196Eric Li discover_container_style() 85e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return os.path.join(super_root_path, container_name) 86e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 87e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 88e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef unpath(container_path): 89e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return container_path[len(super_root_path)+1:] 90e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 91e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 92e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef cpuset_attr(container_name, attr): 93e0493a4af57c1a73376a7bafaed542c01f588196Eric Li discover_container_style() 94e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return os.path.join(super_root_path, container_name, cpuset_prefix+attr) 95e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 96e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 97e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef io_attr(container_name, attr): 98e0493a4af57c1a73376a7bafaed542c01f588196Eric Li discover_container_style() 99e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # current version assumes shared cgroup hierarchy 100e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return os.path.join(super_root_path, container_name, 'io.'+attr) 101e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 102e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 103e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef tasks_path(container_name): 104e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return os.path.join(full_path(container_name), 'tasks') 105e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 106e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 107e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef mems_path(container_name): 108e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return cpuset_attr(container_name, 'mems') 109e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 110e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 111e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef memory_path(container_name): 112e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return os.path.join(super_root_path, container_name, 'memory') 113e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 114e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 115e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef cpus_path(container_name): 116e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return cpuset_attr(container_name, 'cpus') 117e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 118e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 119e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef container_exists(name): 120e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return name is not None and os.path.exists(tasks_path(name)) 121e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 122e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 123e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef move_tasks_into_container(name, tasks): 124e0493a4af57c1a73376a7bafaed542c01f588196Eric Li task_file = tasks_path(name) 125e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for task in tasks: 126e0493a4af57c1a73376a7bafaed542c01f588196Eric Li try: 127e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('moving task %s into container "%s"', task, name) 128e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.write_one_line(task_file, task) 129e0493a4af57c1a73376a7bafaed542c01f588196Eric Li except Exception: 130e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if utils.pid_is_alive(task): 131e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise # task exists but couldn't move it 132e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # task is gone or zombie so ignore this exception 133e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 134e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 135e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef move_self_into_container(name): 136e0493a4af57c1a73376a7bafaed542c01f588196Eric Li me = str(os.getpid()) 137e0493a4af57c1a73376a7bafaed542c01f588196Eric Li move_tasks_into_container(name, [me]) 138e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('running self (pid %s) in container "%s"', me, name) 139e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 140e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 141e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef _avail_mbytes_via_nodes(parent): 142e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # total mbytes of mem nodes available for new containers in parent 143e0493a4af57c1a73376a7bafaed542c01f588196Eric Li free_nodes = available_exclusive_mem_nodes(parent) 144e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mbytes = nodes_avail_mbytes(free_nodes) 145e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # don't have exact model for how container mgr measures mem space 146e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # better here to underestimate than overestimate 147e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mbytes = max(mbytes - node_mbytes//2, 0) 148e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return mbytes 149e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 150e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 151e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef _avail_bytes_via_pages(parent): 152e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Get memory bytes available to parent container which could 153e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # be allocated exclusively to new child containers. 154e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # This excludes mem previously allocated to existing children. 155e0493a4af57c1a73376a7bafaed542c01f588196Eric Li available = container_bytes(parent) 156e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mem_files_pattern = os.path.join(full_path(parent), 157e0493a4af57c1a73376a7bafaed542c01f588196Eric Li '*', 'memory.limit_in_bytes') 158e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for mem_file in glob.glob(mem_files_pattern): 159e0493a4af57c1a73376a7bafaed542c01f588196Eric Li child_container = unpath(os.path.dirname(mem_file)) 160e0493a4af57c1a73376a7bafaed542c01f588196Eric Li available -= container_bytes(child_container) 161e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return available 162e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 163e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 164e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef avail_mbytes(parent=SUPER_ROOT): 165e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # total mbytes available in parent, for exclusive use in new containers 166e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if fake_numa_containers: 167e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return _avail_mbytes_via_nodes(parent) 168e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 169e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return _avail_bytes_via_pages(parent) >> 20 170e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 171e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 172e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef delete_leftover_test_containers(): 173e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # recover mems and cores tied up by containers of prior failed tests: 174e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for child in inner_containers_of(SUPER_ROOT): 175e0493a4af57c1a73376a7bafaed542c01f588196Eric Li _release_container_nest(child) 176e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 177e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 178e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef my_lock(lockname): 179e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # lockname is 'inner' 180e0493a4af57c1a73376a7bafaed542c01f588196Eric Li lockdir = os.environ['AUTODIR'] 181e0493a4af57c1a73376a7bafaed542c01f588196Eric Li lockname = os.path.join(lockdir, '.cpuset.lock.'+lockname) 182e0493a4af57c1a73376a7bafaed542c01f588196Eric Li lockfile = open(lockname, 'w') 183e0493a4af57c1a73376a7bafaed542c01f588196Eric Li fcntl.flock(lockfile, fcntl.LOCK_EX) 184e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return lockfile 185e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 186e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 187e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef my_unlock(lockfile): 188e0493a4af57c1a73376a7bafaed542c01f588196Eric Li fcntl.flock(lockfile, fcntl.LOCK_UN) 189e0493a4af57c1a73376a7bafaed542c01f588196Eric Li lockfile.close() 190e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 191e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 192e0493a4af57c1a73376a7bafaed542c01f588196Eric Li# Convert '1-3,7,9-12' to set(1,2,3,7,9,10,11,12) 193e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef rangelist_to_set(rangelist): 194e0493a4af57c1a73376a7bafaed542c01f588196Eric Li result = set() 195e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not rangelist: 196e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return result 197e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for x in rangelist.split(','): 198e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if re.match(r'^(\d+)$', x): 199e0493a4af57c1a73376a7bafaed542c01f588196Eric Li result.add(int(x)) 200e0493a4af57c1a73376a7bafaed542c01f588196Eric Li continue 201e0493a4af57c1a73376a7bafaed542c01f588196Eric Li m = re.match(r'^(\d+)-(\d+)$', x) 202e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if m: 203e0493a4af57c1a73376a7bafaed542c01f588196Eric Li start = int(m.group(1)) 204e0493a4af57c1a73376a7bafaed542c01f588196Eric Li end = int(m.group(2)) 205e0493a4af57c1a73376a7bafaed542c01f588196Eric Li result.update(set(range(start, end+1))) 206e0493a4af57c1a73376a7bafaed542c01f588196Eric Li continue 207e0493a4af57c1a73376a7bafaed542c01f588196Eric Li msg = 'Cannot understand data input: %s %s' % (x, rangelist) 208e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise ValueError(msg) 209e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return result 210e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 211e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 212e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef my_container_name(): 213e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Get current process's inherited or self-built container name 214e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # within /dev/cpuset or /dev/cgroup. Is '' for root container. 215e0493a4af57c1a73376a7bafaed542c01f588196Eric Li name = utils.read_one_line('/proc/%i/cpuset' % os.getpid()) 216e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return name[1:] # strip leading / 217e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 218e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 219e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef get_mem_nodes(container_name): 220e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # all mem nodes now available to a container, both exclusive & shared 221e0493a4af57c1a73376a7bafaed542c01f588196Eric Li file_name = mems_path(container_name) 222e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if os.path.exists(file_name): 223e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return rangelist_to_set(utils.read_one_line(file_name)) 224e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 225e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return set() 226e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 227e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 228e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef _busy_mem_nodes(parent_container): 229e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Get set of numa memory nodes now used (exclusively or shared) 230e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # by existing children of parent container 231e0493a4af57c1a73376a7bafaed542c01f588196Eric Li busy = set() 232e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mem_files_pattern = os.path.join(full_path(parent_container), 233e0493a4af57c1a73376a7bafaed542c01f588196Eric Li '*', cpuset_prefix+'mems') 234e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for mem_file in glob.glob(mem_files_pattern): 235e0493a4af57c1a73376a7bafaed542c01f588196Eric Li child_container = os.path.dirname(mem_file) 236e0493a4af57c1a73376a7bafaed542c01f588196Eric Li busy |= get_mem_nodes(child_container) 237e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return busy 238e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 239e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 240e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef available_exclusive_mem_nodes(parent_container): 241e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Get subset of numa memory nodes of parent container which could 242e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # be allocated exclusively to new child containers. 243e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # This excludes nodes now allocated to existing children. 244e0493a4af57c1a73376a7bafaed542c01f588196Eric Li need_fake_numa() 245e0493a4af57c1a73376a7bafaed542c01f588196Eric Li available = get_mem_nodes(parent_container) 246e0493a4af57c1a73376a7bafaed542c01f588196Eric Li available -= _busy_mem_nodes(parent_container) 247e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return available 248e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 249e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 250e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef my_mem_nodes(): 251e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Get set of numa memory nodes owned by current process's container. 252e0493a4af57c1a73376a7bafaed542c01f588196Eric Li discover_container_style() 253e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not mem_isolation_on: 254e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return set() # as expected by vmstress 255e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return get_mem_nodes(my_container_name()) 256e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 257e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 258e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef my_available_exclusive_mem_nodes(): 259e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Get subset of numa memory nodes owned by current process's 260e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # container, which could be allocated exclusively to new child 261e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # containers. This excludes any nodes now allocated 262e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # to existing children. 263e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return available_exclusive_mem_nodes(my_container_name()) 264e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 265e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 266e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef node_avail_kbytes(node): 267e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return node_mbytes << 10 # crude; fixed numa node size 268e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 269e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 270e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef nodes_avail_mbytes(nodes): 271e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # nodes' combined user+avail size, in Mbytes 272e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return sum(node_avail_kbytes(n) for n in nodes) // 1024 273e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 274e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 275e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef container_bytes(name): 276e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if fake_numa_containers: 277e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return nodes_avail_mbytes(get_mem_nodes(name)) << 20 278e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 279e0493a4af57c1a73376a7bafaed542c01f588196Eric Li while True: 280e0493a4af57c1a73376a7bafaed542c01f588196Eric Li file = memory_path(name) + '.limit_in_bytes' 281e0493a4af57c1a73376a7bafaed542c01f588196Eric Li limit = int(utils.read_one_line(file)) 282e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if limit < NO_LIMIT: 283e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return limit 284e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if name == SUPER_ROOT: 285e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return root_container_bytes 286e0493a4af57c1a73376a7bafaed542c01f588196Eric Li name = os.path.dirname(name) 287e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 288e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 289e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef container_mbytes(name): 290e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return container_bytes(name) >> 20 291e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 292e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 293e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef mbytes_per_mem_node(): 294e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Get mbyte size of standard numa mem node, as float 295e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # (some nodes are bigger than this) 296e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Replaces utils.node_size(). 297e0493a4af57c1a73376a7bafaed542c01f588196Eric Li numa = get_boot_numa() 298e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if numa.endswith('M'): 299e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return float(numa[:-1]) # mbyte size of fake nodes 300e0493a4af57c1a73376a7bafaed542c01f588196Eric Li elif numa: 301e0493a4af57c1a73376a7bafaed542c01f588196Eric Li nodecnt = int(numa) # fake numa mem nodes for container isolation 302e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 303e0493a4af57c1a73376a7bafaed542c01f588196Eric Li nodecnt = len(utils.numa_nodes()) # phys mem-controller nodes 304e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Use guessed total physical mem size, not kernel's 305e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # lesser 'available memory' after various system tables. 306e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return utils.rounded_memtotal() / (nodecnt * 1024.0) 307e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 308e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 309e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef get_cpus(container_name): 310e0493a4af57c1a73376a7bafaed542c01f588196Eric Li file_name = cpus_path(container_name) 311e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if os.path.exists(file_name): 312e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return rangelist_to_set(utils.read_one_line(file_name)) 313e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 314e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return set() 315e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 316e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 317e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef get_tasks(container_name): 318e0493a4af57c1a73376a7bafaed542c01f588196Eric Li file_name = tasks_path(container_name) 319e0493a4af57c1a73376a7bafaed542c01f588196Eric Li try: 320e0493a4af57c1a73376a7bafaed542c01f588196Eric Li tasks = [x.rstrip() for x in open(file_name).readlines()] 321e0493a4af57c1a73376a7bafaed542c01f588196Eric Li except IOError: 322e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if os.path.exists(file_name): 323e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise 324e0493a4af57c1a73376a7bafaed542c01f588196Eric Li tasks = [] # container doesn't exist anymore 325e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return tasks 326e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 327e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 328e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef inner_containers_of(parent): 329e0493a4af57c1a73376a7bafaed542c01f588196Eric Li pattern = os.path.join(full_path(parent), '*/tasks') 330e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return [unpath(os.path.dirname(task_file)) 331e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for task_file in glob.glob(pattern)] 332e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 333e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 334e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef _release_container_nest(nest): 335e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Destroy a container, and any nested sub-containers 336e0493a4af57c1a73376a7bafaed542c01f588196Eric Li nest_path = full_path(nest) 337e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if os.path.exists(nest_path): 338e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 339e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # bottom-up walk of tree, releasing all nested sub-containers 340e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for child in inner_containers_of(nest): 341e0493a4af57c1a73376a7bafaed542c01f588196Eric Li _release_container_nest(child) 342e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 343e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug("releasing container %s", nest) 344e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 345e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Transfer any survivor tasks (e.g. self) to parent container 346e0493a4af57c1a73376a7bafaed542c01f588196Eric Li parent = os.path.dirname(nest) 347e0493a4af57c1a73376a7bafaed542c01f588196Eric Li move_tasks_into_container(parent, get_tasks(nest)) 348e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 349e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # remove the now-empty outermost container of this nest 350e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if os.path.exists(nest_path): 351e0493a4af57c1a73376a7bafaed542c01f588196Eric Li os.rmdir(nest_path) # nested, or dead manager 352e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 353e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 354e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef release_container(container_name=None): 355e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Destroy a container 356e0493a4af57c1a73376a7bafaed542c01f588196Eric Li my_container = my_container_name() 357e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if container_name is None: 358e0493a4af57c1a73376a7bafaed542c01f588196Eric Li container_name = my_container 359e0493a4af57c1a73376a7bafaed542c01f588196Eric Li _release_container_nest(container_name) 360e0493a4af57c1a73376a7bafaed542c01f588196Eric Li displaced = my_container_name() 361e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if displaced != my_container: 362e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('now running self (pid %d) in container "%s"', 363e0493a4af57c1a73376a7bafaed542c01f588196Eric Li os.getpid(), displaced) 364e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 365e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 366e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef remove_empty_prio_classes(prios): 367e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # remove prio classes whose set of allowed priorities is empty 368e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # e.g 'no:3;rt:;be:3;id:' --> 'no:3;be:3' 369e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return ';'.join(p for p in prios.split(';') if p.split(':')[1]) 370e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 371e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 372e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef all_drive_names(): 373e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # list of all disk drives sda,sdb,... 374e0493a4af57c1a73376a7bafaed542c01f588196Eric Li paths = glob.glob('/sys/block/sd*') 375e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not paths: 376e0493a4af57c1a73376a7bafaed542c01f588196Eric Li paths = glob.glob('/sys/block/hd*') 377e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return [os.path.basename(path) for path in paths] 378e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 379e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 380e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef set_io_controls(container_name, disks=[], ioprio_classes=[PROPIO_NORMAL], 381e0493a4af57c1a73376a7bafaed542c01f588196Eric Li io_shares=[95], io_limits=[0]): 382e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # set the propio controls for one container, for selected disks 383e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # writing directly to /dev/cgroup/container_name/io.io_service_level 384e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # without using containerd or container.py 385e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # See wiki ProportionalIOScheduler for definitions 386e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # ioprio_classes: list of service classes, one per disk 387e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # using numeric propio service classes as used by kernel API, namely 388e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # 1: RT, Real Time, aka PROPIO_PRIO 389e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # 2: BE, Best Effort, aka PROPIO_NORMAL 390e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # 3: PROPIO_IDLE 391e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # io_shares: list of disk-time-fractions, one per disk, 392e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # as percentage integer 0..100 393e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # io_limits: list of limit on/off, one per disk 394e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # 0: no limit, shares use of other containers' unused disk time 395e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # 1: limited, container's use of disk time is capped to given DTF 396e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # ioprio_classes defaults to best-effort 397e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # io_limit defaults to no limit, use slack time 398e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not disks: # defaults to all drives 399e0493a4af57c1a73376a7bafaed542c01f588196Eric Li disks = all_drive_names() 400e0493a4af57c1a73376a7bafaed542c01f588196Eric Li io_shares = [io_shares [0]] * len(disks) 401e0493a4af57c1a73376a7bafaed542c01f588196Eric Li ioprio_classes = [ioprio_classes[0]] * len(disks) 402e0493a4af57c1a73376a7bafaed542c01f588196Eric Li io_limits = [io_limits [0]] * len(disks) 403e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not (len(disks) == len(ioprio_classes) and len(disks) == len(io_shares) 404e0493a4af57c1a73376a7bafaed542c01f588196Eric Li and len(disks) == len(io_limits)): 405e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise error.AutotestError('Unequal number of values for io controls') 406e0493a4af57c1a73376a7bafaed542c01f588196Eric Li service_level = io_attr(container_name, 'io_service_level') 407e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not os.path.exists(service_level): 408e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return # kernel predates propio features 409e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # or io cgroup is mounted separately from cpusets 410e0493a4af57c1a73376a7bafaed542c01f588196Eric Li disk_infos = [] 411e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for disk,ioclass,limit,share in zip(disks, ioprio_classes, 412e0493a4af57c1a73376a7bafaed542c01f588196Eric Li io_limits, io_shares): 413e0493a4af57c1a73376a7bafaed542c01f588196Eric Li parts = (disk, str(ioclass), str(limit), str(share)) 414e0493a4af57c1a73376a7bafaed542c01f588196Eric Li disk_info = ' '.join(parts) 415e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.write_one_line(service_level, disk_info) 416e0493a4af57c1a73376a7bafaed542c01f588196Eric Li disk_infos.append(disk_info) 417e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('set_io_controls of %s to %s', 418e0493a4af57c1a73376a7bafaed542c01f588196Eric Li container_name, ', '.join(disk_infos)) 419e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 420e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 421e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef abbrev_list(vals): 422e0493a4af57c1a73376a7bafaed542c01f588196Eric Li """Condense unsigned (0,4,5,6,7,10) to '0,4-7,10'.""" 423e0493a4af57c1a73376a7bafaed542c01f588196Eric Li ranges = [] 424e0493a4af57c1a73376a7bafaed542c01f588196Eric Li lower = 0 425e0493a4af57c1a73376a7bafaed542c01f588196Eric Li upper = -2 426e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for val in sorted(vals)+[-1]: 427e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if val != upper+1: 428e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if lower == upper: 429e0493a4af57c1a73376a7bafaed542c01f588196Eric Li ranges.append(str(lower)) 430e0493a4af57c1a73376a7bafaed542c01f588196Eric Li elif lower <= upper: 431e0493a4af57c1a73376a7bafaed542c01f588196Eric Li ranges.append('%d-%d' % (lower, upper)) 432e0493a4af57c1a73376a7bafaed542c01f588196Eric Li lower = val 433e0493a4af57c1a73376a7bafaed542c01f588196Eric Li upper = val 434e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return ','.join(ranges) 435e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 436e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 437e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef create_container_with_specific_mems_cpus(name, mems, cpus): 438e0493a4af57c1a73376a7bafaed542c01f588196Eric Li need_fake_numa() 439e0493a4af57c1a73376a7bafaed542c01f588196Eric Li os.mkdir(full_path(name)) 440e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.write_one_line(cpuset_attr(name, 'mem_hardwall'), '1') 441e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.write_one_line(mems_path(name), ','.join(map(str, mems))) 442e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.write_one_line(cpus_path(name), ','.join(map(str, cpus))) 443e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('container %s has %d cpus and %d nodes totalling %s bytes', 444e0493a4af57c1a73376a7bafaed542c01f588196Eric Li name, len(cpus), len(get_mem_nodes(name)), 445e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.human_format(container_bytes(name)) ) 446e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 447e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 448e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef create_container_via_memcg(name, parent, bytes, cpus): 449e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # create container via direct memcg cgroup writes 450e0493a4af57c1a73376a7bafaed542c01f588196Eric Li os.mkdir(full_path(name)) 451e0493a4af57c1a73376a7bafaed542c01f588196Eric Li nodes = utils.read_one_line(mems_path(parent)) 452e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.write_one_line(mems_path(name), nodes) # inherit parent's nodes 453e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.write_one_line(memory_path(name)+'.limit_in_bytes', str(bytes)) 454e0493a4af57c1a73376a7bafaed542c01f588196Eric Li utils.write_one_line(cpus_path(name), ','.join(map(str, cpus))) 455e0493a4af57c1a73376a7bafaed542c01f588196Eric Li logging.debug('Created container %s directly via memcg,' 456e0493a4af57c1a73376a7bafaed542c01f588196Eric Li ' has %d cpus and %s bytes', 457e0493a4af57c1a73376a7bafaed542c01f588196Eric Li name, len(cpus), utils.human_format(container_bytes(name))) 458e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 459e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 460e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef _create_fake_numa_container_directly(name, parent, mbytes, cpus): 461e0493a4af57c1a73376a7bafaed542c01f588196Eric Li need_fake_numa() 462e0493a4af57c1a73376a7bafaed542c01f588196Eric Li lockfile = my_lock('inner') # serialize race between parallel tests 463e0493a4af57c1a73376a7bafaed542c01f588196Eric Li try: 464e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # Pick specific mem nodes for new cpuset's exclusive use 465e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # For now, arbitrarily pick highest available node numbers 466e0493a4af57c1a73376a7bafaed542c01f588196Eric Li needed_kbytes = mbytes * 1024 467e0493a4af57c1a73376a7bafaed542c01f588196Eric Li nodes = sorted(list(available_exclusive_mem_nodes(parent))) 468e0493a4af57c1a73376a7bafaed542c01f588196Eric Li kbytes = 0 469e0493a4af57c1a73376a7bafaed542c01f588196Eric Li nodecnt = 0 470e0493a4af57c1a73376a7bafaed542c01f588196Eric Li while kbytes < needed_kbytes and nodecnt < len(nodes): 471e0493a4af57c1a73376a7bafaed542c01f588196Eric Li nodecnt += 1 472e0493a4af57c1a73376a7bafaed542c01f588196Eric Li kbytes += node_avail_kbytes(nodes[-nodecnt]) 473e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if kbytes < needed_kbytes: 474e0493a4af57c1a73376a7bafaed542c01f588196Eric Li parent_mbytes = container_mbytes(parent) 475e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if mbytes > parent_mbytes: 476e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise error.AutotestError( 477e0493a4af57c1a73376a7bafaed542c01f588196Eric Li "New container's %d Mbytes exceeds " 478e0493a4af57c1a73376a7bafaed542c01f588196Eric Li "parent container's %d Mbyte size" 479e0493a4af57c1a73376a7bafaed542c01f588196Eric Li % (mbytes, parent_mbytes) ) 480e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 481e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise error.AutotestError( 482e0493a4af57c1a73376a7bafaed542c01f588196Eric Li "Existing sibling containers hold " 483e0493a4af57c1a73376a7bafaed542c01f588196Eric Li "%d Mbytes needed by new container" 484e0493a4af57c1a73376a7bafaed542c01f588196Eric Li % ((needed_kbytes - kbytes)//1024) ) 485e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mems = nodes[-nodecnt:] 486e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 487e0493a4af57c1a73376a7bafaed542c01f588196Eric Li create_container_with_specific_mems_cpus(name, mems, cpus) 488e0493a4af57c1a73376a7bafaed542c01f588196Eric Li finally: 489e0493a4af57c1a73376a7bafaed542c01f588196Eric Li my_unlock(lockfile) 490e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 491e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 492e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef create_container_directly(name, mbytes, cpus): 493e0493a4af57c1a73376a7bafaed542c01f588196Eric Li parent = os.path.dirname(name) 494e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if fake_numa_containers: 495e0493a4af57c1a73376a7bafaed542c01f588196Eric Li _create_fake_numa_container_directly(name, parent, mbytes, cpus) 496e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 497e0493a4af57c1a73376a7bafaed542c01f588196Eric Li create_container_via_memcg(name, parent, mbytes<<20, cpus) 498e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 499e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 500e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef create_container_with_mbytes_and_specific_cpus(name, mbytes, 501e0493a4af57c1a73376a7bafaed542c01f588196Eric Li cpus=None, root=SUPER_ROOT, io={}, move_in=True, timeout=0): 502e0493a4af57c1a73376a7bafaed542c01f588196Eric Li """\ 503e0493a4af57c1a73376a7bafaed542c01f588196Eric Li Create a cpuset container and move job's current pid into it 504e0493a4af57c1a73376a7bafaed542c01f588196Eric Li Allocate the list "cpus" of cpus to that container 505e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 506e0493a4af57c1a73376a7bafaed542c01f588196Eric Li name = arbitrary string tag 507e0493a4af57c1a73376a7bafaed542c01f588196Eric Li mbytes = reqested memory for job in megabytes 508e0493a4af57c1a73376a7bafaed542c01f588196Eric Li cpus = list of cpu indicies to associate with the cpuset 509e0493a4af57c1a73376a7bafaed542c01f588196Eric Li defaults to all cpus avail with given root 510e0493a4af57c1a73376a7bafaed542c01f588196Eric Li root = the parent cpuset to nest this new set within 511e0493a4af57c1a73376a7bafaed542c01f588196Eric Li '': unnested top-level container 512e0493a4af57c1a73376a7bafaed542c01f588196Eric Li io = arguments for proportional IO containers 513e0493a4af57c1a73376a7bafaed542c01f588196Eric Li move_in = True: Move current process into the new container now. 514e0493a4af57c1a73376a7bafaed542c01f588196Eric Li timeout = must be 0: persist until explicitly deleted. 515e0493a4af57c1a73376a7bafaed542c01f588196Eric Li """ 516e0493a4af57c1a73376a7bafaed542c01f588196Eric Li need_mem_containers() 517e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not container_exists(root): 518e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise error.AutotestError('Parent container "%s" does not exist' 519e0493a4af57c1a73376a7bafaed542c01f588196Eric Li % root) 520e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if cpus is None: 521e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # default to biggest container we can make under root 522e0493a4af57c1a73376a7bafaed542c01f588196Eric Li cpus = get_cpus(root) 523e0493a4af57c1a73376a7bafaed542c01f588196Eric Li else: 524e0493a4af57c1a73376a7bafaed542c01f588196Eric Li cpus = set(cpus) # interface uses list 525e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if not cpus: 526e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise error.AutotestError('Creating container with no cpus') 527e0493a4af57c1a73376a7bafaed542c01f588196Eric Li name = os.path.join(root, name) # path relative to super_root 528e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if os.path.exists(full_path(name)): 529e0493a4af57c1a73376a7bafaed542c01f588196Eric Li raise error.AutotestError('Container %s already exists' % name) 530e0493a4af57c1a73376a7bafaed542c01f588196Eric Li create_container_directly(name, mbytes, cpus) 531e0493a4af57c1a73376a7bafaed542c01f588196Eric Li set_io_controls(name, **io) 532e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if move_in: 533e0493a4af57c1a73376a7bafaed542c01f588196Eric Li move_self_into_container(name) 534e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return name 535e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 536e0493a4af57c1a73376a7bafaed542c01f588196Eric Li 537e0493a4af57c1a73376a7bafaed542c01f588196Eric Lidef get_boot_numa(): 538e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # get boot-time numa=fake=xyz option for current boot 539e0493a4af57c1a73376a7bafaed542c01f588196Eric Li # eg numa=fake=nnn, numa=fake=nnnM, or nothing 540e0493a4af57c1a73376a7bafaed542c01f588196Eric Li label = 'numa=fake=' 541e0493a4af57c1a73376a7bafaed542c01f588196Eric Li for arg in utils.read_one_line('/proc/cmdline').split(): 542e0493a4af57c1a73376a7bafaed542c01f588196Eric Li if arg.startswith(label): 543e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return arg[len(label):] 544e0493a4af57c1a73376a7bafaed542c01f588196Eric Li return '' 545