dm-service-time.c revision 5a0e3ad6af8660be21ca98a971cd00f331318c05
1f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda/* 2f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Copyright (C) 2007-2009 NEC Corporation. All Rights Reserved. 3f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 4f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Module Author: Kiyoshi Ueda 5f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 6f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * This file is released under the GPL. 7f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 8f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Throughput oriented path selector. 9f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 10f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 11f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#include "dm.h" 12f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#include "dm-path-selector.h" 13f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 145a0e3ad6af8660be21ca98a971cd00f331318c05Tejun Heo#include <linux/slab.h> 155a0e3ad6af8660be21ca98a971cd00f331318c05Tejun Heo 16f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define DM_MSG_PREFIX "multipath service-time" 17f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_MIN_IO 1 18f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_MAX_RELATIVE_THROUGHPUT 100 19f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_MAX_RELATIVE_THROUGHPUT_SHIFT 7 20f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_MAX_INFLIGHT_SIZE ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT) 21f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda#define ST_VERSION "0.2.0" 22f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 23f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastruct selector { 24f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct list_head valid_paths; 25f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct list_head failed_paths; 26f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda}; 27f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 28f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastruct path_info { 29f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct list_head list; 30f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct dm_path *path; 31f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned repeat_count; 32f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned relative_throughput; 33f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda atomic_t in_flight_size; /* Total size of in-flight I/Os */ 34f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda}; 35f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 36f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic struct selector *alloc_selector(void) 37f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 38f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); 39f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 40f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (s) { 41f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda INIT_LIST_HEAD(&s->valid_paths); 42f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda INIT_LIST_HEAD(&s->failed_paths); 43f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 44f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 45f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return s; 46f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 47f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 48f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_create(struct path_selector *ps, unsigned argc, char **argv) 49f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 50f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = alloc_selector(); 51f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 52f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!s) 53f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -ENOMEM; 54f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 55f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda ps->context = s; 56f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 57f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 58f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 59f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic void free_paths(struct list_head *paths) 60f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 61f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi, *next; 62f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 63f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_for_each_entry_safe(pi, next, paths, list) { 64f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_del(&pi->list); 65f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda kfree(pi); 66f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 67f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 68f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 69f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic void st_destroy(struct path_selector *ps) 70f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 71f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 72f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 73f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda free_paths(&s->valid_paths); 74f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda free_paths(&s->failed_paths); 75f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda kfree(s); 76f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda ps->context = NULL; 77f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 78f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 79f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_status(struct path_selector *ps, struct dm_path *path, 80f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda status_type_t type, char *result, unsigned maxlen) 81f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 82f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned sz = 0; 83f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi; 84f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 85f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!path) 86f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMEMIT("0 "); 87f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda else { 88f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi = path->pscontext; 89f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 90f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda switch (type) { 91f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda case STATUSTYPE_INFO: 92f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMEMIT("%d %u ", atomic_read(&pi->in_flight_size), 93f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->relative_throughput); 94f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda break; 95f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda case STATUSTYPE_TABLE: 96f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMEMIT("%u %u ", pi->repeat_count, 97f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->relative_throughput); 98f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda break; 99f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 100f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 101f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 102f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return sz; 103f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 104f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 105f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_add_path(struct path_selector *ps, struct dm_path *path, 106f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda int argc, char **argv, char **error) 107f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 108f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 109f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi; 110f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned repeat_count = ST_MIN_IO; 111f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned relative_throughput = 1; 112f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 113f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 114f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Arguments: [<repeat_count> [<relative_throughput>]] 115f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * <repeat_count>: The number of I/Os before switching path. 116f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * If not given, default (ST_MIN_IO) is used. 117f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * <relative_throughput>: The relative throughput value of 118f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * the path among all paths in the path-group. 119f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT> 120f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * If not given, minimum value '1' is used. 121f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * If '0' is given, the path isn't selected while 122f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * other paths having a positive value are 123f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * available. 124f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 125f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (argc > 2) { 126f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *error = "service-time ps: incorrect number of arguments"; 127f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -EINVAL; 128f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 129f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 130f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) { 131f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *error = "service-time ps: invalid repeat count"; 132f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -EINVAL; 133f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 134f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 135f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if ((argc == 2) && 136f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda (sscanf(argv[1], "%u", &relative_throughput) != 1 || 137f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) { 138f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *error = "service-time ps: invalid relative_throughput value"; 139f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -EINVAL; 140f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 141f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 142f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* allocate the path */ 143f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi = kmalloc(sizeof(*pi), GFP_KERNEL); 144f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!pi) { 145f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *error = "service-time ps: Error allocating path context"; 146f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return -ENOMEM; 147f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 148f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 149f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->path = path; 150f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->repeat_count = repeat_count; 151f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda pi->relative_throughput = relative_throughput; 152f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda atomic_set(&pi->in_flight_size, 0); 153f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 154f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda path->pscontext = pi; 155f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 156f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_add_tail(&pi->list, &s->valid_paths); 157f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 158f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 159f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 160f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 161f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic void st_fail_path(struct path_selector *ps, struct dm_path *path) 162f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 163f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 164f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = path->pscontext; 165f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 166f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_move(&pi->list, &s->failed_paths); 167f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 168f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 169f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_reinstate_path(struct path_selector *ps, struct dm_path *path) 170f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 171f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 172f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = path->pscontext; 173f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 174f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_move_tail(&pi->list, &s->valid_paths); 175f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 176f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 177f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 178f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 179f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda/* 180f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Compare the estimated service time of 2 paths, pi1 and pi2, 181f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * for the incoming I/O. 182f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 183f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Returns: 184f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * < 0 : pi1 is better 185f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 0 : no difference between pi1 and pi2 186f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * > 0 : pi2 is better 187f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 188f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Description: 189f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Basically, the service time is estimated by: 190f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput' 191f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * To reduce the calculation, some optimizations are made. 192f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (See comments inline) 193f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 194f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_compare_load(struct path_info *pi1, struct path_info *pi2, 195f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda size_t incoming) 196f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 197f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda size_t sz1, sz2, st1, st2; 198f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 199f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz1 = atomic_read(&pi1->in_flight_size); 200f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz2 = atomic_read(&pi2->in_flight_size); 201f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 202f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 203f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 1: Both have same throughput value. Choose less loaded path. 204f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 205f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (pi1->relative_throughput == pi2->relative_throughput) 206f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return sz1 - sz2; 207f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 208f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 209f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 2a: Both have same load. Choose higher throughput path. 210f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 2b: One path has no throughput value. Choose the other one. 211f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 212f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (sz1 == sz2 || 213f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda !pi1->relative_throughput || !pi2->relative_throughput) 214f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return pi2->relative_throughput - pi1->relative_throughput; 215f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 216f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 217f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 3: Calculate service time. Choose faster path. 218f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Service time using pi1: 219f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * st1 = (sz1 + incoming) / pi1->relative_throughput 220f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Service time using pi2: 221f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * st2 = (sz2 + incoming) / pi2->relative_throughput 222f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * 223f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * To avoid the division, transform the expression to use 224f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * multiplication. 225f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Because ->relative_throughput > 0 here, if st1 < st2, 226f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * the expressions below are the same meaning: 227f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (sz1 + incoming) / pi1->relative_throughput < 228f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (sz2 + incoming) / pi2->relative_throughput 229f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (sz1 + incoming) * pi2->relative_throughput < 230f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * (sz2 + incoming) * pi1->relative_throughput 231f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * So use the later one. 232f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 233f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz1 += incoming; 234f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz2 += incoming; 235f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE || 236f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz2 >= ST_MAX_INFLIGHT_SIZE)) { 237f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 238f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Size may be too big for multiplying pi->relative_throughput 239f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * and overflow. 240f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * To avoid the overflow and mis-selection, shift down both. 241f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 242f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT; 243f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT; 244f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda } 245f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda st1 = sz1 * pi2->relative_throughput; 246f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda st2 = sz2 * pi1->relative_throughput; 247f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (st1 != st2) 248f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return st1 - st2; 249f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 250f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* 251f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda * Case 4: Service time is equal. Choose higher throughput path. 252f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda */ 253f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return pi2->relative_throughput - pi1->relative_throughput; 254f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 255f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 256f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic struct dm_path *st_select_path(struct path_selector *ps, 257f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda unsigned *repeat_count, size_t nr_bytes) 258f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 259f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct selector *s = ps->context; 260f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = NULL, *best = NULL; 261f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 262f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (list_empty(&s->valid_paths)) 263f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return NULL; 264f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 265f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda /* Change preferred (first in list) path to evenly balance. */ 266f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_move_tail(s->valid_paths.next, &s->valid_paths); 267f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 268f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda list_for_each_entry(pi, &s->valid_paths, list) 269f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!best || (st_compare_load(pi, best, nr_bytes) < 0)) 270f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda best = pi; 271f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 272f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (!best) 273f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return NULL; 274f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 275f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda *repeat_count = best->repeat_count; 276f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 277f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return best->path; 278f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 279f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 280f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_start_io(struct path_selector *ps, struct dm_path *path, 281f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda size_t nr_bytes) 282f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 283f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = path->pscontext; 284f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 285f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda atomic_add(nr_bytes, &pi->in_flight_size); 286f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 287f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 288f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 289f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 290f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int st_end_io(struct path_selector *ps, struct dm_path *path, 291f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda size_t nr_bytes) 292f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 293f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda struct path_info *pi = path->pscontext; 294f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 295f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda atomic_sub(nr_bytes, &pi->in_flight_size); 296f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 297f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return 0; 298f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 299f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 300f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic struct path_selector_type st_ps = { 301f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .name = "service-time", 302f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .module = THIS_MODULE, 303f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .table_args = 2, 304f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .info_args = 2, 305f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .create = st_create, 306f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .destroy = st_destroy, 307f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .status = st_status, 308f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .add_path = st_add_path, 309f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .fail_path = st_fail_path, 310f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .reinstate_path = st_reinstate_path, 311f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .select_path = st_select_path, 312f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .start_io = st_start_io, 313f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda .end_io = st_end_io, 314f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda}; 315f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 316f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic int __init dm_st_init(void) 317f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 318f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda int r = dm_register_path_selector(&st_ps); 319f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 320f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (r < 0) 321f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMERR("register failed %d", r); 322f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 323f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMINFO("version " ST_VERSION " loaded"); 324f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 325f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda return r; 326f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 327f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 328f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedastatic void __exit dm_st_exit(void) 329f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda{ 330f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda int r = dm_unregister_path_selector(&st_ps); 331f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 332f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda if (r < 0) 333f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda DMERR("unregister failed %d", r); 334f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda} 335f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 336f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedamodule_init(dm_st_init); 337f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Uedamodule_exit(dm_st_exit); 338f392ba889b019602976082bfe7bf486c2594f85cKiyoshi Ueda 339f392ba889b019602976082bfe7bf486c2594f85cKiyoshi UedaMODULE_DESCRIPTION(DM_NAME " throughput oriented path selector"); 340f392ba889b019602976082bfe7bf486c2594f85cKiyoshi UedaMODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>"); 341f392ba889b019602976082bfe7bf486c2594f85cKiyoshi UedaMODULE_LICENSE("GPL"); 342