1f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda/* 2f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Copyright (C) 2007-2009 NEC Corporation. All Rights Reserved. 3f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 4f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Module Author: Kiyoshi Ueda 5f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 6f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * This file is released under the GPL. 7f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 8f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Throughput oriented path selector. 9f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 10f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 11f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#include "dm.h" 12f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#include "dm-path-selector.h" 13f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 145a0e3ad6af8660be21ca98a971cd00f331318c05Tejun Heo#include <linux/slab.h> 15056075c76417b112b4924e7b6386fdc6dfc9ac03Paul Gortmaker#include <linux/module.h> 165a0e3ad6af8660be21ca98a971cd00f331318c05Tejun Heo 17f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define DM_MSG_PREFIX "multipath service-time" 18f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_MIN_IO 1 19f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_MAX_RELATIVE_THROUGHPUT 100 20f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_MAX_RELATIVE_THROUGHPUT_SHIFT 7 21f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_MAX_INFLIGHT_SIZE ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT) 22f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_VERSION "0.2.0" 23f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 24f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastruct selector { 25f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct list_head valid_paths; 26f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct list_head failed_paths; 27f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda}; 28f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 29f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastruct path_info { 30f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct list_head list; 31f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct dm_path *path; 32f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned repeat_count; 33f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned relative_throughput; 34f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda atomic_t in_flight_size; /* Total size of in-flight I/Os */ 35f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda}; 36f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 37f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic struct selector *alloc_selector(void) 38f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 39f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); 40f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 41f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (s) { 42f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda INIT_LIST_HEAD(&s->valid_paths); 43f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda INIT_LIST_HEAD(&s->failed_paths); 44f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 45f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 46f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return s; 47f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 48f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 49f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_create(struct path_selector *ps, unsigned argc, char **argv) 50f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 51f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = alloc_selector(); 52f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 53f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!s) 54f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -ENOMEM; 55f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 56f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda ps->context = s; 57f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 58f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 59f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 60f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic void free_paths(struct list_head *paths) 61f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 62f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi, *next; 63f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 64f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_for_each_entry_safe(pi, next, paths, list) { 65f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_del(&pi->list); 66f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda kfree(pi); 67f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 68f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 69f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 70f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic void st_destroy(struct path_selector *ps) 71f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 72f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 73f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 74f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda free_paths(&s->valid_paths); 75f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda free_paths(&s->failed_paths); 76f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda kfree(s); 77f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda ps->context = NULL; 78f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 79f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 80f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_status(struct path_selector *ps, struct dm_path *path, 81f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda status_type_t type, char *result, unsigned maxlen) 82f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 83f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned sz = 0; 84f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi; 85f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 86f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!path) 87f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMEMIT("0 "); 88f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda else { 89f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi = path->pscontext; 90f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 91f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda switch (type) { 92f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda case STATUSTYPE_INFO: 93f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMEMIT("%d %u ", atomic_read(&pi->in_flight_size), 94f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->relative_throughput); 95f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda break; 96f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda case STATUSTYPE_TABLE: 97f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMEMIT("%u %u ", pi->repeat_count, 98f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->relative_throughput); 99f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda break; 100f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 101f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 102f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 103f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return sz; 104f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 105f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 106f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_add_path(struct path_selector *ps, struct dm_path *path, 107f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda int argc, char **argv, char **error) 108f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 109f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 110f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi; 111f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned repeat_count = ST_MIN_IO; 112f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned relative_throughput = 1; 11331998ef19385c944600d9a981b96252f98204beeMikulas Patocka char dummy; 114f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 115f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 116f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Arguments: [<repeat_count> [<relative_throughput>]] 117f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * <repeat_count>: The number of I/Os before switching path. 118f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * If not given, default (ST_MIN_IO) is used. 119f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * <relative_throughput>: The relative throughput value of 120f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * the path among all paths in the path-group. 121f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT> 122f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * If not given, minimum value '1' is used. 123f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * If '0' is given, the path isn't selected while 124f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * other paths having a positive value are 125f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * available. 126f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 127f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (argc > 2) { 128f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *error = "service-time ps: incorrect number of arguments"; 129f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -EINVAL; 130f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 131f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 13231998ef19385c944600d9a981b96252f98204beeMikulas Patocka if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) { 133f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *error = "service-time ps: invalid repeat count"; 134f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -EINVAL; 135f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 136f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 137f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if ((argc == 2) && 13831998ef19385c944600d9a981b96252f98204beeMikulas Patocka (sscanf(argv[1], "%u%c", &relative_throughput, &dummy) != 1 || 139f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) { 140f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *error = "service-time ps: invalid relative_throughput value"; 141f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -EINVAL; 142f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 143f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 144f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* allocate the path */ 145f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi = kmalloc(sizeof(*pi), GFP_KERNEL); 146f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!pi) { 147f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *error = "service-time ps: Error allocating path context"; 148f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -ENOMEM; 149f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 150f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 151f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->path = path; 152f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->repeat_count = repeat_count; 153f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->relative_throughput = relative_throughput; 154f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda atomic_set(&pi->in_flight_size, 0); 155f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 156f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda path->pscontext = pi; 157f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 158f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_add_tail(&pi->list, &s->valid_paths); 159f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 160f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 161f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 162f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 163f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic void st_fail_path(struct path_selector *ps, struct dm_path *path) 164f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 165f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 166f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = path->pscontext; 167f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 168f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_move(&pi->list, &s->failed_paths); 169f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 170f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 171f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_reinstate_path(struct path_selector *ps, struct dm_path *path) 172f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 173f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 174f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = path->pscontext; 175f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 176f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_move_tail(&pi->list, &s->valid_paths); 177f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 178f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 179f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 180f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 181f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda/* 182f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Compare the estimated service time of 2 paths, pi1 and pi2, 183f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * for the incoming I/O. 184f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 185f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Returns: 186f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * < 0 : pi1 is better 187f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 0 : no difference between pi1 and pi2 188f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * > 0 : pi2 is better 189f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 190f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Description: 191f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Basically, the service time is estimated by: 192f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput' 193f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * To reduce the calculation, some optimizations are made. 194f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (See comments inline) 195f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 196f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_compare_load(struct path_info *pi1, struct path_info *pi2, 197f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda size_t incoming) 198f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 199f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda size_t sz1, sz2, st1, st2; 200f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 201f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz1 = atomic_read(&pi1->in_flight_size); 202f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz2 = atomic_read(&pi2->in_flight_size); 203f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 204f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 205f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 1: Both have same throughput value. Choose less loaded path. 206f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 207f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (pi1->relative_throughput == pi2->relative_throughput) 208f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return sz1 - sz2; 209f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 210f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 211f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 2a: Both have same load. Choose higher throughput path. 212f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 2b: One path has no throughput value. Choose the other one. 213f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 214f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (sz1 == sz2 || 215f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda !pi1->relative_throughput || !pi2->relative_throughput) 216f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return pi2->relative_throughput - pi1->relative_throughput; 217f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 218f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 219f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 3: Calculate service time. Choose faster path. 220f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Service time using pi1: 221f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * st1 = (sz1 + incoming) / pi1->relative_throughput 222f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Service time using pi2: 223f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * st2 = (sz2 + incoming) / pi2->relative_throughput 224f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 225f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * To avoid the division, transform the expression to use 226f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * multiplication. 227f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Because ->relative_throughput > 0 here, if st1 < st2, 228f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * the expressions below are the same meaning: 229f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (sz1 + incoming) / pi1->relative_throughput < 230f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (sz2 + incoming) / pi2->relative_throughput 231f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (sz1 + incoming) * pi2->relative_throughput < 232f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (sz2 + incoming) * pi1->relative_throughput 233f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * So use the later one. 234f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 235f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz1 += incoming; 236f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz2 += incoming; 237f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE || 238f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz2 >= ST_MAX_INFLIGHT_SIZE)) { 239f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 240f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Size may be too big for multiplying pi->relative_throughput 241f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * and overflow. 242f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * To avoid the overflow and mis-selection, shift down both. 243f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 244f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT; 245f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT; 246f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 247f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda st1 = sz1 * pi2->relative_throughput; 248f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda st2 = sz2 * pi1->relative_throughput; 249f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (st1 != st2) 250f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return st1 - st2; 251f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 252f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 253f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 4: Service time is equal. Choose higher throughput path. 254f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 255f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return pi2->relative_throughput - pi1->relative_throughput; 256f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 257f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 258f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic struct dm_path *st_select_path(struct path_selector *ps, 259f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned *repeat_count, size_t nr_bytes) 260f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 261f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 262f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = NULL, *best = NULL; 263f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 264f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (list_empty(&s->valid_paths)) 265f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return NULL; 266f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 267f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* Change preferred (first in list) path to evenly balance. */ 268f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_move_tail(s->valid_paths.next, &s->valid_paths); 269f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 270f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_for_each_entry(pi, &s->valid_paths, list) 271f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!best || (st_compare_load(pi, best, nr_bytes) < 0)) 272f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda best = pi; 273f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 274f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!best) 275f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return NULL; 276f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 277f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *repeat_count = best->repeat_count; 278f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 279f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return best->path; 280f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 281f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 282f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_start_io(struct path_selector *ps, struct dm_path *path, 283f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda size_t nr_bytes) 284f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 285f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = path->pscontext; 286f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 287f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda atomic_add(nr_bytes, &pi->in_flight_size); 288f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 289f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 290f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 291f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 292f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_end_io(struct path_selector *ps, struct dm_path *path, 293f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda size_t nr_bytes) 294f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 295f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = path->pscontext; 296f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 297f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda atomic_sub(nr_bytes, &pi->in_flight_size); 298f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 299f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 300f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 301f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 302f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic struct path_selector_type st_ps = { 303f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .name = "service-time", 304f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .module = THIS_MODULE, 305f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .table_args = 2, 306f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .info_args = 2, 307f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .create = st_create, 308f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .destroy = st_destroy, 309f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .status = st_status, 310f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .add_path = st_add_path, 311f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .fail_path = st_fail_path, 312f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .reinstate_path = st_reinstate_path, 313f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .select_path = st_select_path, 314f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .start_io = st_start_io, 315f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .end_io = st_end_io, 316f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda}; 317f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 318f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int __init dm_st_init(void) 319f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 320f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda int r = dm_register_path_selector(&st_ps); 321f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 322f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (r < 0) 323f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMERR("register failed %d", r); 324f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 325f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMINFO("version " ST_VERSION " loaded"); 326f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 327f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return r; 328f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 329f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 330f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic void __exit dm_st_exit(void) 331f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 332f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda int r = dm_unregister_path_selector(&st_ps); 333f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 334f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (r < 0) 335f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMERR("unregister failed %d", r); 336f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 337f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 338f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedamodule_init(dm_st_init); 339f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedamodule_exit(dm_st_exit); 340f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 341f392ba889b019602976082bfe7bf486c2594f85cKiyoshi UedaMODULE_DESCRIPTION(DM_NAME " throughput oriented path selector"); 342f392ba889b019602976082bfe7bf486c2594f85cKiyoshi UedaMODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>"); 343f392ba889b019602976082bfe7bf486c2594f85cKiyoshi UedaMODULE_LICENSE("GPL"); 344