1#include <linux/kernel.h> 2#include <linux/ip.h> 3#include <linux/sctp.h> 4#include <net/ip.h> 5#include <net/ip6_checksum.h> 6#include <linux/netfilter.h> 7#include <linux/netfilter_ipv4.h> 8#include <net/sctp/checksum.h> 9#include <net/ip_vs.h> 10 11static int 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 13 int *verdict, struct ip_vs_conn **cpp, 14 struct ip_vs_iphdr *iph) 15{ 16 struct net *net; 17 struct ip_vs_service *svc; 18 struct netns_ipvs *ipvs; 19 sctp_chunkhdr_t _schunkh, *sch; 20 sctp_sctphdr_t *sh, _sctph; 21 22 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); 23 if (sh == NULL) { 24 *verdict = NF_DROP; 25 return 0; 26 } 27 28 sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), 29 sizeof(_schunkh), &_schunkh); 30 if (sch == NULL) { 31 *verdict = NF_DROP; 32 return 0; 33 } 34 35 net = skb_net(skb); 36 ipvs = net_ipvs(net); 37 rcu_read_lock(); 38 if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && 39 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 40 &iph->daddr, sh->dest))) { 41 int ignored; 42 43 if (ip_vs_todrop(ipvs)) { 44 /* 45 * It seems that we are very loaded. 46 * We have to drop this packet :( 47 */ 48 rcu_read_unlock(); 49 *verdict = NF_DROP; 50 return 0; 51 } 52 /* 53 * Let the virtual server select a real server for the 54 * incoming connection, and create a connection entry. 55 */ 56 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); 57 if (!*cpp && ignored <= 0) { 58 if (!ignored) 59 *verdict = ip_vs_leave(svc, skb, pd, iph); 60 else 61 *verdict = NF_DROP; 62 rcu_read_unlock(); 63 return 0; 64 } 65 } 66 rcu_read_unlock(); 67 /* NF_ACCEPT */ 68 return 1; 69} 70 71static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, 72 unsigned int sctphoff) 73{ 74 sctph->checksum = sctp_compute_cksum(skb, sctphoff); 75 skb->ip_summed = CHECKSUM_UNNECESSARY; 76} 77 78static int 79sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, 80 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) 81{ 82 sctp_sctphdr_t *sctph; 83 unsigned int sctphoff = iph->len; 84 bool payload_csum = false; 85 86#ifdef CONFIG_IP_VS_IPV6 87 if (cp->af == AF_INET6 && iph->fragoffs) 88 return 1; 89#endif 90 91 /* csum_check requires unshared skb */ 92 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 93 return 0; 94 95 if (unlikely(cp->app != NULL)) { 96 int ret; 97 98 /* Some checks before mangling */ 99 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 100 return 0; 101 102 /* Call application helper if needed */ 103 ret = ip_vs_app_pkt_out(cp, skb); 104 if (ret == 0) 105 return 0; 106 /* ret=2: csum update is needed after payload mangling */ 107 if (ret == 2) 108 payload_csum = true; 109 } 110 111 sctph = (void *) skb_network_header(skb) + sctphoff; 112 113 /* Only update csum if we really have to */ 114 if (sctph->source != cp->vport || payload_csum || 115 skb->ip_summed == CHECKSUM_PARTIAL) { 116 sctph->source = cp->vport; 117 sctp_nat_csum(skb, sctph, sctphoff); 118 } else { 119 skb->ip_summed = CHECKSUM_UNNECESSARY; 120 } 121 122 return 1; 123} 124 125static int 126sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, 127 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) 128{ 129 sctp_sctphdr_t *sctph; 130 unsigned int sctphoff = iph->len; 131 bool payload_csum = false; 132 133#ifdef CONFIG_IP_VS_IPV6 134 if (cp->af == AF_INET6 && iph->fragoffs) 135 return 1; 136#endif 137 138 /* csum_check requires unshared skb */ 139 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 140 return 0; 141 142 if (unlikely(cp->app != NULL)) { 143 int ret; 144 145 /* Some checks before mangling */ 146 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 147 return 0; 148 149 /* Call application helper if needed */ 150 ret = ip_vs_app_pkt_in(cp, skb); 151 if (ret == 0) 152 return 0; 153 /* ret=2: csum update is needed after payload mangling */ 154 if (ret == 2) 155 payload_csum = true; 156 } 157 158 sctph = (void *) skb_network_header(skb) + sctphoff; 159 160 /* Only update csum if we really have to */ 161 if (sctph->dest != cp->dport || payload_csum || 162 (skb->ip_summed == CHECKSUM_PARTIAL && 163 !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { 164 sctph->dest = cp->dport; 165 sctp_nat_csum(skb, sctph, sctphoff); 166 } else if (skb->ip_summed != CHECKSUM_PARTIAL) { 167 skb->ip_summed = CHECKSUM_UNNECESSARY; 168 } 169 170 return 1; 171} 172 173static int 174sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 175{ 176 unsigned int sctphoff; 177 struct sctphdr *sh, _sctph; 178 __le32 cmp, val; 179 180#ifdef CONFIG_IP_VS_IPV6 181 if (af == AF_INET6) 182 sctphoff = sizeof(struct ipv6hdr); 183 else 184#endif 185 sctphoff = ip_hdrlen(skb); 186 187 sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph); 188 if (sh == NULL) 189 return 0; 190 191 cmp = sh->checksum; 192 val = sctp_compute_cksum(skb, sctphoff); 193 194 if (val != cmp) { 195 /* CRC failure, dump it. */ 196 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, 197 "Failed checksum for"); 198 return 0; 199 } 200 return 1; 201} 202 203enum ipvs_sctp_event_t { 204 IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ 205 IP_VS_SCTP_INIT, 206 IP_VS_SCTP_INIT_ACK, 207 IP_VS_SCTP_COOKIE_ECHO, 208 IP_VS_SCTP_COOKIE_ACK, 209 IP_VS_SCTP_SHUTDOWN, 210 IP_VS_SCTP_SHUTDOWN_ACK, 211 IP_VS_SCTP_SHUTDOWN_COMPLETE, 212 IP_VS_SCTP_ERROR, 213 IP_VS_SCTP_ABORT, 214 IP_VS_SCTP_EVENT_LAST 215}; 216 217/* RFC 2960, 3.2 Chunk Field Descriptions */ 218static __u8 sctp_events[] = { 219 [SCTP_CID_DATA] = IP_VS_SCTP_DATA, 220 [SCTP_CID_INIT] = IP_VS_SCTP_INIT, 221 [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, 222 [SCTP_CID_SACK] = IP_VS_SCTP_DATA, 223 [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, 224 [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, 225 [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, 226 [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, 227 [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, 228 [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, 229 [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, 230 [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, 231 [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, 232 [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, 233 [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, 234}; 235 236/* SCTP States: 237 * See RFC 2960, 4. SCTP Association State Diagram 238 * 239 * New states (not in diagram): 240 * - INIT1 state: use shorter timeout for dropped INIT packets 241 * - REJECTED state: use shorter timeout if INIT is rejected with ABORT 242 * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging 243 * 244 * The states are as seen in real server. In the diagram, INIT1, INIT, 245 * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. 246 * 247 * States as per packets from client (C) and server (S): 248 * 249 * Setup of client connection: 250 * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK 251 * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK 252 * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO 253 * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK 254 * 255 * Setup of server connection: 256 * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK 257 * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO 258 * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK 259 */ 260 261#define sNO IP_VS_SCTP_S_NONE 262#define sI1 IP_VS_SCTP_S_INIT1 263#define sIN IP_VS_SCTP_S_INIT 264#define sCS IP_VS_SCTP_S_COOKIE_SENT 265#define sCR IP_VS_SCTP_S_COOKIE_REPLIED 266#define sCW IP_VS_SCTP_S_COOKIE_WAIT 267#define sCO IP_VS_SCTP_S_COOKIE 268#define sCE IP_VS_SCTP_S_COOKIE_ECHOED 269#define sES IP_VS_SCTP_S_ESTABLISHED 270#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT 271#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED 272#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT 273#define sRJ IP_VS_SCTP_S_REJECTED 274#define sCL IP_VS_SCTP_S_CLOSED 275 276static const __u8 sctp_states 277 [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { 278 { /* INPUT */ 279/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ 280/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 281/* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, 282/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 283/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 284/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, 285/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, 286/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, 287/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, 288/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, 289/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, 290 }, 291 { /* OUTPUT */ 292/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ 293/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 294/* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, 295/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 296/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 297/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, 298/* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, 299/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, 300/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 301/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 302/* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, 303 }, 304 { /* INPUT-ONLY */ 305/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ 306/* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 307/* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, 308/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 309/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 310/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, 311/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, 312/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, 313/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, 314/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, 315/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, 316 }, 317}; 318 319#define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) 320 321/* Timeout table[state] */ 322static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { 323 [IP_VS_SCTP_S_NONE] = 2 * HZ, 324 [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, 325 [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, 326 [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, 327 [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, 328 [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, 329 [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, 330 [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, 331 [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, 332 [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, 333 [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, 334 [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, 335 [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, 336 [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, 337 [IP_VS_SCTP_S_LAST] = 2 * HZ, 338}; 339 340static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { 341 [IP_VS_SCTP_S_NONE] = "NONE", 342 [IP_VS_SCTP_S_INIT1] = "INIT1", 343 [IP_VS_SCTP_S_INIT] = "INIT", 344 [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", 345 [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", 346 [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", 347 [IP_VS_SCTP_S_COOKIE] = "COOKIE", 348 [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", 349 [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", 350 [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", 351 [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", 352 [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", 353 [IP_VS_SCTP_S_REJECTED] = "REJECTED", 354 [IP_VS_SCTP_S_CLOSED] = "CLOSED", 355 [IP_VS_SCTP_S_LAST] = "BUG!", 356}; 357 358 359static const char *sctp_state_name(int state) 360{ 361 if (state >= IP_VS_SCTP_S_LAST) 362 return "ERR!"; 363 if (sctp_state_name_table[state]) 364 return sctp_state_name_table[state]; 365 return "?"; 366} 367 368static inline void 369set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, 370 int direction, const struct sk_buff *skb) 371{ 372 sctp_chunkhdr_t _sctpch, *sch; 373 unsigned char chunk_type; 374 int event, next_state; 375 int ihl, cofs; 376 377#ifdef CONFIG_IP_VS_IPV6 378 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); 379#else 380 ihl = ip_hdrlen(skb); 381#endif 382 383 cofs = ihl + sizeof(sctp_sctphdr_t); 384 sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); 385 if (sch == NULL) 386 return; 387 388 chunk_type = sch->type; 389 /* 390 * Section 3: Multiple chunks can be bundled into one SCTP packet 391 * up to the MTU size, except for the INIT, INIT ACK, and 392 * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with 393 * any other chunk in a packet. 394 * 395 * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control 396 * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be 397 * bundled with an ABORT, but they MUST be placed before the ABORT 398 * in the SCTP packet or they will be ignored by the receiver. 399 */ 400 if ((sch->type == SCTP_CID_COOKIE_ECHO) || 401 (sch->type == SCTP_CID_COOKIE_ACK)) { 402 int clen = ntohs(sch->length); 403 404 if (clen >= sizeof(sctp_chunkhdr_t)) { 405 sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), 406 sizeof(_sctpch), &_sctpch); 407 if (sch && sch->type == SCTP_CID_ABORT) 408 chunk_type = sch->type; 409 } 410 } 411 412 event = (chunk_type < sizeof(sctp_events)) ? 413 sctp_events[chunk_type] : IP_VS_SCTP_DATA; 414 415 /* Update direction to INPUT_ONLY if necessary 416 * or delete NO_OUTPUT flag if output packet detected 417 */ 418 if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { 419 if (direction == IP_VS_DIR_OUTPUT) 420 cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; 421 else 422 direction = IP_VS_DIR_INPUT_ONLY; 423 } 424 425 next_state = sctp_states[direction][event][cp->state]; 426 427 if (next_state != cp->state) { 428 struct ip_vs_dest *dest = cp->dest; 429 430 IP_VS_DBG_BUF(8, "%s %s %s:%d->" 431 "%s:%d state: %s->%s conn->refcnt:%d\n", 432 pd->pp->name, 433 ((direction == IP_VS_DIR_OUTPUT) ? 434 "output " : "input "), 435 IP_VS_DBG_ADDR(cp->daf, &cp->daddr), 436 ntohs(cp->dport), 437 IP_VS_DBG_ADDR(cp->af, &cp->caddr), 438 ntohs(cp->cport), 439 sctp_state_name(cp->state), 440 sctp_state_name(next_state), 441 atomic_read(&cp->refcnt)); 442 if (dest) { 443 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && 444 (next_state != IP_VS_SCTP_S_ESTABLISHED)) { 445 atomic_dec(&dest->activeconns); 446 atomic_inc(&dest->inactconns); 447 cp->flags |= IP_VS_CONN_F_INACTIVE; 448 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && 449 (next_state == IP_VS_SCTP_S_ESTABLISHED)) { 450 atomic_inc(&dest->activeconns); 451 atomic_dec(&dest->inactconns); 452 cp->flags &= ~IP_VS_CONN_F_INACTIVE; 453 } 454 } 455 } 456 if (likely(pd)) 457 cp->timeout = pd->timeout_table[cp->state = next_state]; 458 else /* What to do ? */ 459 cp->timeout = sctp_timeouts[cp->state = next_state]; 460} 461 462static void 463sctp_state_transition(struct ip_vs_conn *cp, int direction, 464 const struct sk_buff *skb, struct ip_vs_proto_data *pd) 465{ 466 spin_lock_bh(&cp->lock); 467 set_sctp_state(pd, cp, direction, skb); 468 spin_unlock_bh(&cp->lock); 469} 470 471static inline __u16 sctp_app_hashkey(__be16 port) 472{ 473 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) 474 & SCTP_APP_TAB_MASK; 475} 476 477static int sctp_register_app(struct net *net, struct ip_vs_app *inc) 478{ 479 struct ip_vs_app *i; 480 __u16 hash; 481 __be16 port = inc->port; 482 int ret = 0; 483 struct netns_ipvs *ipvs = net_ipvs(net); 484 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); 485 486 hash = sctp_app_hashkey(port); 487 488 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { 489 if (i->port == port) { 490 ret = -EEXIST; 491 goto out; 492 } 493 } 494 list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); 495 atomic_inc(&pd->appcnt); 496out: 497 498 return ret; 499} 500 501static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) 502{ 503 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); 504 505 atomic_dec(&pd->appcnt); 506 list_del_rcu(&inc->p_list); 507} 508 509static int sctp_app_conn_bind(struct ip_vs_conn *cp) 510{ 511 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 512 int hash; 513 struct ip_vs_app *inc; 514 int result = 0; 515 516 /* Default binding: bind app only for NAT */ 517 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) 518 return 0; 519 /* Lookup application incarnations and bind the right one */ 520 hash = sctp_app_hashkey(cp->vport); 521 522 rcu_read_lock(); 523 list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { 524 if (inc->port == cp->vport) { 525 if (unlikely(!ip_vs_app_inc_get(inc))) 526 break; 527 rcu_read_unlock(); 528 529 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" 530 "%s:%u to app %s on port %u\n", 531 __func__, 532 IP_VS_DBG_ADDR(cp->af, &cp->caddr), 533 ntohs(cp->cport), 534 IP_VS_DBG_ADDR(cp->af, &cp->vaddr), 535 ntohs(cp->vport), 536 inc->name, ntohs(inc->port)); 537 cp->app = inc; 538 if (inc->init_conn) 539 result = inc->init_conn(inc, cp); 540 goto out; 541 } 542 } 543 rcu_read_unlock(); 544out: 545 return result; 546} 547 548/* --------------------------------------------- 549 * timeouts is netns related now. 550 * --------------------------------------------- 551 */ 552static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) 553{ 554 struct netns_ipvs *ipvs = net_ipvs(net); 555 556 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); 557 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, 558 sizeof(sctp_timeouts)); 559 if (!pd->timeout_table) 560 return -ENOMEM; 561 return 0; 562} 563 564static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) 565{ 566 kfree(pd->timeout_table); 567} 568 569struct ip_vs_protocol ip_vs_protocol_sctp = { 570 .name = "SCTP", 571 .protocol = IPPROTO_SCTP, 572 .num_states = IP_VS_SCTP_S_LAST, 573 .dont_defrag = 0, 574 .init = NULL, 575 .exit = NULL, 576 .init_netns = __ip_vs_sctp_init, 577 .exit_netns = __ip_vs_sctp_exit, 578 .register_app = sctp_register_app, 579 .unregister_app = sctp_unregister_app, 580 .conn_schedule = sctp_conn_schedule, 581 .conn_in_get = ip_vs_conn_in_get_proto, 582 .conn_out_get = ip_vs_conn_out_get_proto, 583 .snat_handler = sctp_snat_handler, 584 .dnat_handler = sctp_dnat_handler, 585 .csum_check = sctp_csum_check, 586 .state_name = sctp_state_name, 587 .state_transition = sctp_state_transition, 588 .app_conn_bind = sctp_app_conn_bind, 589 .debug_packet = ip_vs_tcpudp_debug_packet, 590 .timeout_change = NULL, 591}; 592