1/* 2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * Julian Anastasov <ja@ssi.bg> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> 13 * Network name space (netns) aware. 14 * 15 */ 16 17#define KMSG_COMPONENT "IPVS" 18#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 19 20#include <linux/in.h> 21#include <linux/ip.h> 22#include <linux/kernel.h> 23#include <linux/netfilter.h> 24#include <linux/netfilter_ipv4.h> 25#include <linux/udp.h> 26 27#include <net/ip_vs.h> 28#include <net/ip.h> 29#include <net/ip6_checksum.h> 30 31static int 32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 33 int *verdict, struct ip_vs_conn **cpp, 34 struct ip_vs_iphdr *iph) 35{ 36 struct net *net; 37 struct ip_vs_service *svc; 38 struct udphdr _udph, *uh; 39 40 /* IPv6 fragments, only first fragment will hit this */ 41 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); 42 if (uh == NULL) { 43 *verdict = NF_DROP; 44 return 0; 45 } 46 net = skb_net(skb); 47 rcu_read_lock(); 48 svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 49 &iph->daddr, uh->dest); 50 if (svc) { 51 int ignored; 52 53 if (ip_vs_todrop(net_ipvs(net))) { 54 /* 55 * It seems that we are very loaded. 56 * We have to drop this packet :( 57 */ 58 rcu_read_unlock(); 59 *verdict = NF_DROP; 60 return 0; 61 } 62 63 /* 64 * Let the virtual server select a real server for the 65 * incoming connection, and create a connection entry. 66 */ 67 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); 68 if (!*cpp && ignored <= 0) { 69 if (!ignored) 70 *verdict = ip_vs_leave(svc, skb, pd, iph); 71 else 72 *verdict = NF_DROP; 73 rcu_read_unlock(); 74 return 0; 75 } 76 } 77 rcu_read_unlock(); 78 /* NF_ACCEPT */ 79 return 1; 80} 81 82 83static inline void 84udp_fast_csum_update(int af, struct udphdr *uhdr, 85 const union nf_inet_addr *oldip, 86 const union nf_inet_addr *newip, 87 __be16 oldport, __be16 newport) 88{ 89#ifdef CONFIG_IP_VS_IPV6 90 if (af == AF_INET6) 91 uhdr->check = 92 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, 93 ip_vs_check_diff2(oldport, newport, 94 ~csum_unfold(uhdr->check)))); 95 else 96#endif 97 uhdr->check = 98 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, 99 ip_vs_check_diff2(oldport, newport, 100 ~csum_unfold(uhdr->check)))); 101 if (!uhdr->check) 102 uhdr->check = CSUM_MANGLED_0; 103} 104 105static inline void 106udp_partial_csum_update(int af, struct udphdr *uhdr, 107 const union nf_inet_addr *oldip, 108 const union nf_inet_addr *newip, 109 __be16 oldlen, __be16 newlen) 110{ 111#ifdef CONFIG_IP_VS_IPV6 112 if (af == AF_INET6) 113 uhdr->check = 114 ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, 115 ip_vs_check_diff2(oldlen, newlen, 116 csum_unfold(uhdr->check)))); 117 else 118#endif 119 uhdr->check = 120 ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, 121 ip_vs_check_diff2(oldlen, newlen, 122 csum_unfold(uhdr->check)))); 123} 124 125 126static int 127udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, 128 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) 129{ 130 struct udphdr *udph; 131 unsigned int udphoff = iph->len; 132 int oldlen; 133 int payload_csum = 0; 134 135#ifdef CONFIG_IP_VS_IPV6 136 if (cp->af == AF_INET6 && iph->fragoffs) 137 return 1; 138#endif 139 oldlen = skb->len - udphoff; 140 141 /* csum_check requires unshared skb */ 142 if (!skb_make_writable(skb, udphoff+sizeof(*udph))) 143 return 0; 144 145 if (unlikely(cp->app != NULL)) { 146 int ret; 147 148 /* Some checks before mangling */ 149 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 150 return 0; 151 152 /* 153 * Call application helper if needed 154 */ 155 if (!(ret = ip_vs_app_pkt_out(cp, skb))) 156 return 0; 157 /* ret=2: csum update is needed after payload mangling */ 158 if (ret == 1) 159 oldlen = skb->len - udphoff; 160 else 161 payload_csum = 1; 162 } 163 164 udph = (void *)skb_network_header(skb) + udphoff; 165 udph->source = cp->vport; 166 167 /* 168 * Adjust UDP checksums 169 */ 170 if (skb->ip_summed == CHECKSUM_PARTIAL) { 171 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 172 htons(oldlen), 173 htons(skb->len - udphoff)); 174 } else if (!payload_csum && (udph->check != 0)) { 175 /* Only port and addr are changed, do fast csum update */ 176 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 177 cp->dport, cp->vport); 178 if (skb->ip_summed == CHECKSUM_COMPLETE) 179 skb->ip_summed = (cp->app && pp->csum_check) ? 180 CHECKSUM_UNNECESSARY : CHECKSUM_NONE; 181 } else { 182 /* full checksum calculation */ 183 udph->check = 0; 184 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); 185#ifdef CONFIG_IP_VS_IPV6 186 if (cp->af == AF_INET6) 187 udph->check = csum_ipv6_magic(&cp->vaddr.in6, 188 &cp->caddr.in6, 189 skb->len - udphoff, 190 cp->protocol, skb->csum); 191 else 192#endif 193 udph->check = csum_tcpudp_magic(cp->vaddr.ip, 194 cp->caddr.ip, 195 skb->len - udphoff, 196 cp->protocol, 197 skb->csum); 198 if (udph->check == 0) 199 udph->check = CSUM_MANGLED_0; 200 skb->ip_summed = CHECKSUM_UNNECESSARY; 201 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 202 pp->name, udph->check, 203 (char*)&(udph->check) - (char*)udph); 204 } 205 return 1; 206} 207 208 209static int 210udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, 211 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) 212{ 213 struct udphdr *udph; 214 unsigned int udphoff = iph->len; 215 int oldlen; 216 int payload_csum = 0; 217 218#ifdef CONFIG_IP_VS_IPV6 219 if (cp->af == AF_INET6 && iph->fragoffs) 220 return 1; 221#endif 222 oldlen = skb->len - udphoff; 223 224 /* csum_check requires unshared skb */ 225 if (!skb_make_writable(skb, udphoff+sizeof(*udph))) 226 return 0; 227 228 if (unlikely(cp->app != NULL)) { 229 int ret; 230 231 /* Some checks before mangling */ 232 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 233 return 0; 234 235 /* 236 * Attempt ip_vs_app call. 237 * It will fix ip_vs_conn 238 */ 239 if (!(ret = ip_vs_app_pkt_in(cp, skb))) 240 return 0; 241 /* ret=2: csum update is needed after payload mangling */ 242 if (ret == 1) 243 oldlen = skb->len - udphoff; 244 else 245 payload_csum = 1; 246 } 247 248 udph = (void *)skb_network_header(skb) + udphoff; 249 udph->dest = cp->dport; 250 251 /* 252 * Adjust UDP checksums 253 */ 254 if (skb->ip_summed == CHECKSUM_PARTIAL) { 255 udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, 256 htons(oldlen), 257 htons(skb->len - udphoff)); 258 } else if (!payload_csum && (udph->check != 0)) { 259 /* Only port and addr are changed, do fast csum update */ 260 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, 261 cp->vport, cp->dport); 262 if (skb->ip_summed == CHECKSUM_COMPLETE) 263 skb->ip_summed = (cp->app && pp->csum_check) ? 264 CHECKSUM_UNNECESSARY : CHECKSUM_NONE; 265 } else { 266 /* full checksum calculation */ 267 udph->check = 0; 268 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); 269#ifdef CONFIG_IP_VS_IPV6 270 if (cp->af == AF_INET6) 271 udph->check = csum_ipv6_magic(&cp->caddr.in6, 272 &cp->daddr.in6, 273 skb->len - udphoff, 274 cp->protocol, skb->csum); 275 else 276#endif 277 udph->check = csum_tcpudp_magic(cp->caddr.ip, 278 cp->daddr.ip, 279 skb->len - udphoff, 280 cp->protocol, 281 skb->csum); 282 if (udph->check == 0) 283 udph->check = CSUM_MANGLED_0; 284 skb->ip_summed = CHECKSUM_UNNECESSARY; 285 } 286 return 1; 287} 288 289 290static int 291udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 292{ 293 struct udphdr _udph, *uh; 294 unsigned int udphoff; 295 296#ifdef CONFIG_IP_VS_IPV6 297 if (af == AF_INET6) 298 udphoff = sizeof(struct ipv6hdr); 299 else 300#endif 301 udphoff = ip_hdrlen(skb); 302 303 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); 304 if (uh == NULL) 305 return 0; 306 307 if (uh->check != 0) { 308 switch (skb->ip_summed) { 309 case CHECKSUM_NONE: 310 skb->csum = skb_checksum(skb, udphoff, 311 skb->len - udphoff, 0); 312 case CHECKSUM_COMPLETE: 313#ifdef CONFIG_IP_VS_IPV6 314 if (af == AF_INET6) { 315 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 316 &ipv6_hdr(skb)->daddr, 317 skb->len - udphoff, 318 ipv6_hdr(skb)->nexthdr, 319 skb->csum)) { 320 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, 321 "Failed checksum for"); 322 return 0; 323 } 324 } else 325#endif 326 if (csum_tcpudp_magic(ip_hdr(skb)->saddr, 327 ip_hdr(skb)->daddr, 328 skb->len - udphoff, 329 ip_hdr(skb)->protocol, 330 skb->csum)) { 331 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, 332 "Failed checksum for"); 333 return 0; 334 } 335 break; 336 default: 337 /* No need to checksum. */ 338 break; 339 } 340 } 341 return 1; 342} 343 344static inline __u16 udp_app_hashkey(__be16 port) 345{ 346 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) 347 & UDP_APP_TAB_MASK; 348} 349 350 351static int udp_register_app(struct net *net, struct ip_vs_app *inc) 352{ 353 struct ip_vs_app *i; 354 __u16 hash; 355 __be16 port = inc->port; 356 int ret = 0; 357 struct netns_ipvs *ipvs = net_ipvs(net); 358 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 359 360 hash = udp_app_hashkey(port); 361 362 list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { 363 if (i->port == port) { 364 ret = -EEXIST; 365 goto out; 366 } 367 } 368 list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]); 369 atomic_inc(&pd->appcnt); 370 371 out: 372 return ret; 373} 374 375 376static void 377udp_unregister_app(struct net *net, struct ip_vs_app *inc) 378{ 379 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 380 381 atomic_dec(&pd->appcnt); 382 list_del_rcu(&inc->p_list); 383} 384 385 386static int udp_app_conn_bind(struct ip_vs_conn *cp) 387{ 388 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 389 int hash; 390 struct ip_vs_app *inc; 391 int result = 0; 392 393 /* Default binding: bind app only for NAT */ 394 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) 395 return 0; 396 397 /* Lookup application incarnations and bind the right one */ 398 hash = udp_app_hashkey(cp->vport); 399 400 rcu_read_lock(); 401 list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { 402 if (inc->port == cp->vport) { 403 if (unlikely(!ip_vs_app_inc_get(inc))) 404 break; 405 rcu_read_unlock(); 406 407 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 408 "%s:%u to app %s on port %u\n", 409 __func__, 410 IP_VS_DBG_ADDR(cp->af, &cp->caddr), 411 ntohs(cp->cport), 412 IP_VS_DBG_ADDR(cp->af, &cp->vaddr), 413 ntohs(cp->vport), 414 inc->name, ntohs(inc->port)); 415 416 cp->app = inc; 417 if (inc->init_conn) 418 result = inc->init_conn(inc, cp); 419 goto out; 420 } 421 } 422 rcu_read_unlock(); 423 424 out: 425 return result; 426} 427 428 429static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { 430 [IP_VS_UDP_S_NORMAL] = 5*60*HZ, 431 [IP_VS_UDP_S_LAST] = 2*HZ, 432}; 433 434static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { 435 [IP_VS_UDP_S_NORMAL] = "UDP", 436 [IP_VS_UDP_S_LAST] = "BUG!", 437}; 438 439static const char * udp_state_name(int state) 440{ 441 if (state >= IP_VS_UDP_S_LAST) 442 return "ERR!"; 443 return udp_state_name_table[state] ? udp_state_name_table[state] : "?"; 444} 445 446static void 447udp_state_transition(struct ip_vs_conn *cp, int direction, 448 const struct sk_buff *skb, 449 struct ip_vs_proto_data *pd) 450{ 451 if (unlikely(!pd)) { 452 pr_err("UDP no ns data\n"); 453 return; 454 } 455 456 cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; 457} 458 459static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) 460{ 461 struct netns_ipvs *ipvs = net_ipvs(net); 462 463 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); 464 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, 465 sizeof(udp_timeouts)); 466 if (!pd->timeout_table) 467 return -ENOMEM; 468 return 0; 469} 470 471static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) 472{ 473 kfree(pd->timeout_table); 474} 475 476 477struct ip_vs_protocol ip_vs_protocol_udp = { 478 .name = "UDP", 479 .protocol = IPPROTO_UDP, 480 .num_states = IP_VS_UDP_S_LAST, 481 .dont_defrag = 0, 482 .init = NULL, 483 .exit = NULL, 484 .init_netns = __udp_init, 485 .exit_netns = __udp_exit, 486 .conn_schedule = udp_conn_schedule, 487 .conn_in_get = ip_vs_conn_in_get_proto, 488 .conn_out_get = ip_vs_conn_out_get_proto, 489 .snat_handler = udp_snat_handler, 490 .dnat_handler = udp_dnat_handler, 491 .csum_check = udp_csum_check, 492 .state_transition = udp_state_transition, 493 .state_name = udp_state_name, 494 .register_app = udp_register_app, 495 .unregister_app = udp_unregister_app, 496 .app_conn_bind = udp_app_conn_bind, 497 .debug_packet = ip_vs_tcpudp_debug_packet, 498 .timeout_change = NULL, 499}; 500