socket.c revision b29f14284989b3d0b3a5ce268b5b1fc4df9c5795
1/*
2 * net/tipc/socket.c: TIPC socket API
3 *
4 * Copyright (c) 2001-2007, Ericsson AB
5 * Copyright (c) 2004-2008, Wind River Systems
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 *    contributors may be used to endorse or promote products derived from
18 *    this software without specific prior written permission.
19 *
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/net.h>
40#include <linux/socket.h>
41#include <linux/errno.h>
42#include <linux/mm.h>
43#include <linux/poll.h>
44#include <linux/fcntl.h>
45#include <linux/gfp.h>
46#include <asm/string.h>
47#include <asm/atomic.h>
48#include <net/sock.h>
49
50#include <linux/tipc.h>
51#include <linux/tipc_config.h>
52
53#include "core.h"
54#include "port.h"
55
56#define SS_LISTENING	-1	/* socket is listening */
57#define SS_READY	-2	/* socket is connectionless */
58
59#define OVERLOAD_LIMIT_BASE	5000
60#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
61
62struct tipc_sock {
63	struct sock sk;
64	struct tipc_port *p;
65	struct tipc_portid peer_name;
66	long conn_timeout;
67};
68
69#define tipc_sk(sk) ((struct tipc_sock *)(sk))
70#define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p))
71
72static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
73static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
74static void wakeupdispatch(struct tipc_port *tport);
75
76static const struct proto_ops packet_ops;
77static const struct proto_ops stream_ops;
78static const struct proto_ops msg_ops;
79
80static struct proto tipc_proto;
81
82static int sockets_enabled = 0;
83
84static atomic_t tipc_queue_size = ATOMIC_INIT(0);
85
86/*
87 * Revised TIPC socket locking policy:
88 *
89 * Most socket operations take the standard socket lock when they start
90 * and hold it until they finish (or until they need to sleep).  Acquiring
91 * this lock grants the owner exclusive access to the fields of the socket
92 * data structures, with the exception of the backlog queue.  A few socket
93 * operations can be done without taking the socket lock because they only
94 * read socket information that never changes during the life of the socket.
95 *
96 * Socket operations may acquire the lock for the associated TIPC port if they
97 * need to perform an operation on the port.  If any routine needs to acquire
98 * both the socket lock and the port lock it must take the socket lock first
99 * to avoid the risk of deadlock.
100 *
101 * The dispatcher handling incoming messages cannot grab the socket lock in
102 * the standard fashion, since invoked it runs at the BH level and cannot block.
103 * Instead, it checks to see if the socket lock is currently owned by someone,
104 * and either handles the message itself or adds it to the socket's backlog
105 * queue; in the latter case the queued message is processed once the process
106 * owning the socket lock releases it.
107 *
108 * NOTE: Releasing the socket lock while an operation is sleeping overcomes
109 * the problem of a blocked socket operation preventing any other operations
110 * from occurring.  However, applications must be careful if they have
111 * multiple threads trying to send (or receive) on the same socket, as these
112 * operations might interfere with each other.  For example, doing a connect
113 * and a receive at the same time might allow the receive to consume the
114 * ACK message meant for the connect.  While additional work could be done
115 * to try and overcome this, it doesn't seem to be worthwhile at the present.
116 *
117 * NOTE: Releasing the socket lock while an operation is sleeping also ensures
118 * that another operation that must be performed in a non-blocking manner is
119 * not delayed for very long because the lock has already been taken.
120 *
121 * NOTE: This code assumes that certain fields of a port/socket pair are
122 * constant over its lifetime; such fields can be examined without taking
123 * the socket lock and/or port lock, and do not need to be re-read even
124 * after resuming processing after waiting.  These fields include:
125 *   - socket type
126 *   - pointer to socket sk structure (aka tipc_sock structure)
127 *   - pointer to port structure
128 *   - port reference
129 */
130
131/**
132 * advance_rx_queue - discard first buffer in socket receive queue
133 *
134 * Caller must hold socket lock
135 */
136
137static void advance_rx_queue(struct sock *sk)
138{
139	buf_discard(__skb_dequeue(&sk->sk_receive_queue));
140	atomic_dec(&tipc_queue_size);
141}
142
143/**
144 * discard_rx_queue - discard all buffers in socket receive queue
145 *
146 * Caller must hold socket lock
147 */
148
149static void discard_rx_queue(struct sock *sk)
150{
151	struct sk_buff *buf;
152
153	while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
154		atomic_dec(&tipc_queue_size);
155		buf_discard(buf);
156	}
157}
158
159/**
160 * reject_rx_queue - reject all buffers in socket receive queue
161 *
162 * Caller must hold socket lock
163 */
164
165static void reject_rx_queue(struct sock *sk)
166{
167	struct sk_buff *buf;
168
169	while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
170		tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
171		atomic_dec(&tipc_queue_size);
172	}
173}
174
175/**
176 * tipc_create - create a TIPC socket
177 * @net: network namespace (must be default network)
178 * @sock: pre-allocated socket structure
179 * @protocol: protocol indicator (must be 0)
180 * @kern: caused by kernel or by userspace?
181 *
182 * This routine creates additional data structures used by the TIPC socket,
183 * initializes them, and links them together.
184 *
185 * Returns 0 on success, errno otherwise
186 */
187
188static int tipc_create(struct net *net, struct socket *sock, int protocol,
189		       int kern)
190{
191	const struct proto_ops *ops;
192	socket_state state;
193	struct sock *sk;
194	struct tipc_port *tp_ptr;
195
196	/* Validate arguments */
197
198	if (!net_eq(net, &init_net))
199		return -EAFNOSUPPORT;
200
201	if (unlikely(protocol != 0))
202		return -EPROTONOSUPPORT;
203
204	switch (sock->type) {
205	case SOCK_STREAM:
206		ops = &stream_ops;
207		state = SS_UNCONNECTED;
208		break;
209	case SOCK_SEQPACKET:
210		ops = &packet_ops;
211		state = SS_UNCONNECTED;
212		break;
213	case SOCK_DGRAM:
214	case SOCK_RDM:
215		ops = &msg_ops;
216		state = SS_READY;
217		break;
218	default:
219		return -EPROTOTYPE;
220	}
221
222	/* Allocate socket's protocol area */
223
224	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
225	if (sk == NULL)
226		return -ENOMEM;
227
228	/* Allocate TIPC port for socket to use */
229
230	tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch,
231				     TIPC_LOW_IMPORTANCE);
232	if (unlikely(!tp_ptr)) {
233		sk_free(sk);
234		return -ENOMEM;
235	}
236
237	/* Finish initializing socket data structures */
238
239	sock->ops = ops;
240	sock->state = state;
241
242	sock_init_data(sock, sk);
243	sk->sk_backlog_rcv = backlog_rcv;
244	tipc_sk(sk)->p = tp_ptr;
245	tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
246
247	spin_unlock_bh(tp_ptr->lock);
248
249	if (sock->state == SS_READY) {
250		tipc_set_portunreturnable(tp_ptr->ref, 1);
251		if (sock->type == SOCK_DGRAM)
252			tipc_set_portunreliable(tp_ptr->ref, 1);
253	}
254
255	atomic_inc(&tipc_user_count);
256	return 0;
257}
258
259/**
260 * release - destroy a TIPC socket
261 * @sock: socket to destroy
262 *
263 * This routine cleans up any messages that are still queued on the socket.
264 * For DGRAM and RDM socket types, all queued messages are rejected.
265 * For SEQPACKET and STREAM socket types, the first message is rejected
266 * and any others are discarded.  (If the first message on a STREAM socket
267 * is partially-read, it is discarded and the next one is rejected instead.)
268 *
269 * NOTE: Rejected messages are not necessarily returned to the sender!  They
270 * are returned or discarded according to the "destination droppable" setting
271 * specified for the message by the sender.
272 *
273 * Returns 0 on success, errno otherwise
274 */
275
276static int release(struct socket *sock)
277{
278	struct sock *sk = sock->sk;
279	struct tipc_port *tport;
280	struct sk_buff *buf;
281	int res;
282
283	/*
284	 * Exit if socket isn't fully initialized (occurs when a failed accept()
285	 * releases a pre-allocated child socket that was never used)
286	 */
287
288	if (sk == NULL)
289		return 0;
290
291	tport = tipc_sk_port(sk);
292	lock_sock(sk);
293
294	/*
295	 * Reject all unreceived messages, except on an active connection
296	 * (which disconnects locally & sends a 'FIN+' to peer)
297	 */
298
299	while (sock->state != SS_DISCONNECTING) {
300		buf = __skb_dequeue(&sk->sk_receive_queue);
301		if (buf == NULL)
302			break;
303		atomic_dec(&tipc_queue_size);
304		if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf)))
305			buf_discard(buf);
306		else {
307			if ((sock->state == SS_CONNECTING) ||
308			    (sock->state == SS_CONNECTED)) {
309				sock->state = SS_DISCONNECTING;
310				tipc_disconnect(tport->ref);
311			}
312			tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
313		}
314	}
315
316	/*
317	 * Delete TIPC port; this ensures no more messages are queued
318	 * (also disconnects an active connection & sends a 'FIN-' to peer)
319	 */
320
321	res = tipc_deleteport(tport->ref);
322
323	/* Discard any remaining (connection-based) messages in receive queue */
324
325	discard_rx_queue(sk);
326
327	/* Reject any messages that accumulated in backlog queue */
328
329	sock->state = SS_DISCONNECTING;
330	release_sock(sk);
331
332	sock_put(sk);
333	sock->sk = NULL;
334
335	atomic_dec(&tipc_user_count);
336	return res;
337}
338
339/**
340 * bind - associate or disassocate TIPC name(s) with a socket
341 * @sock: socket structure
342 * @uaddr: socket address describing name(s) and desired operation
343 * @uaddr_len: size of socket address data structure
344 *
345 * Name and name sequence binding is indicated using a positive scope value;
346 * a negative scope value unbinds the specified name.  Specifying no name
347 * (i.e. a socket address length of 0) unbinds all names from the socket.
348 *
349 * Returns 0 on success, errno otherwise
350 *
351 * NOTE: This routine doesn't need to take the socket lock since it doesn't
352 *       access any non-constant socket information.
353 */
354
355static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
356{
357	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
358	u32 portref = tipc_sk_port(sock->sk)->ref;
359
360	if (unlikely(!uaddr_len))
361		return tipc_withdraw(portref, 0, NULL);
362
363	if (uaddr_len < sizeof(struct sockaddr_tipc))
364		return -EINVAL;
365	if (addr->family != AF_TIPC)
366		return -EAFNOSUPPORT;
367
368	if (addr->addrtype == TIPC_ADDR_NAME)
369		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
370	else if (addr->addrtype != TIPC_ADDR_NAMESEQ)
371		return -EAFNOSUPPORT;
372
373	return (addr->scope > 0) ?
374		tipc_publish(portref, addr->scope, &addr->addr.nameseq) :
375		tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq);
376}
377
378/**
379 * get_name - get port ID of socket or peer socket
380 * @sock: socket structure
381 * @uaddr: area for returned socket address
382 * @uaddr_len: area for returned length of socket address
383 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
384 *
385 * Returns 0 on success, errno otherwise
386 *
387 * NOTE: This routine doesn't need to take the socket lock since it only
388 *       accesses socket information that is unchanging (or which changes in
389 * 	 a completely predictable manner).
390 */
391
392static int get_name(struct socket *sock, struct sockaddr *uaddr,
393		    int *uaddr_len, int peer)
394{
395	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
396	struct tipc_sock *tsock = tipc_sk(sock->sk);
397
398	memset(addr, 0, sizeof(*addr));
399	if (peer) {
400		if ((sock->state != SS_CONNECTED) &&
401			((peer != 2) || (sock->state != SS_DISCONNECTING)))
402			return -ENOTCONN;
403		addr->addr.id.ref = tsock->peer_name.ref;
404		addr->addr.id.node = tsock->peer_name.node;
405	} else {
406		addr->addr.id.ref = tsock->p->ref;
407		addr->addr.id.node = tipc_own_addr;
408	}
409
410	*uaddr_len = sizeof(*addr);
411	addr->addrtype = TIPC_ADDR_ID;
412	addr->family = AF_TIPC;
413	addr->scope = 0;
414	addr->addr.name.domain = 0;
415
416	return 0;
417}
418
419/**
420 * poll - read and possibly block on pollmask
421 * @file: file structure associated with the socket
422 * @sock: socket for which to calculate the poll bits
423 * @wait: ???
424 *
425 * Returns pollmask value
426 *
427 * COMMENTARY:
428 * It appears that the usual socket locking mechanisms are not useful here
429 * since the pollmask info is potentially out-of-date the moment this routine
430 * exits.  TCP and other protocols seem to rely on higher level poll routines
431 * to handle any preventable race conditions, so TIPC will do the same ...
432 *
433 * TIPC sets the returned events as follows:
434 *
435 * socket state		flags set
436 * ------------		---------
437 * unconnected		no read flags
438 *			no write flags
439 *
440 * connecting		POLLIN/POLLRDNORM if ACK/NACK in rx queue
441 *			no write flags
442 *
443 * connected		POLLIN/POLLRDNORM if data in rx queue
444 *			POLLOUT if port is not congested
445 *
446 * disconnecting	POLLIN/POLLRDNORM/POLLHUP
447 *			no write flags
448 *
449 * listening		POLLIN if SYN in rx queue
450 *			no write flags
451 *
452 * ready		POLLIN/POLLRDNORM if data in rx queue
453 * [connectionless]	POLLOUT (since port cannot be congested)
454 *
455 * IMPORTANT: The fact that a read or write operation is indicated does NOT
456 * imply that the operation will succeed, merely that it should be performed
457 * and will not block.
458 */
459
460static unsigned int poll(struct file *file, struct socket *sock,
461			 poll_table *wait)
462{
463	struct sock *sk = sock->sk;
464	u32 mask = 0;
465
466	poll_wait(file, sk_sleep(sk), wait);
467
468	switch ((int)sock->state) {
469	case SS_READY:
470	case SS_CONNECTED:
471		if (!tipc_sk_port(sk)->congested)
472			mask |= POLLOUT;
473		/* fall thru' */
474	case SS_CONNECTING:
475	case SS_LISTENING:
476		if (!skb_queue_empty(&sk->sk_receive_queue))
477			mask |= (POLLIN | POLLRDNORM);
478		break;
479	case SS_DISCONNECTING:
480		mask = (POLLIN | POLLRDNORM | POLLHUP);
481		break;
482	}
483
484	return mask;
485}
486
487/**
488 * dest_name_check - verify user is permitted to send to specified port name
489 * @dest: destination address
490 * @m: descriptor for message to be sent
491 *
492 * Prevents restricted configuration commands from being issued by
493 * unauthorized users.
494 *
495 * Returns 0 if permission is granted, otherwise errno
496 */
497
498static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
499{
500	struct tipc_cfg_msg_hdr hdr;
501
502	if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
503		return 0;
504	if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
505		return 0;
506	if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
507		return -EACCES;
508
509	if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
510		return -EFAULT;
511	if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
512		return -EACCES;
513
514	return 0;
515}
516
517/**
518 * send_msg - send message in connectionless manner
519 * @iocb: if NULL, indicates that socket lock is already held
520 * @sock: socket structure
521 * @m: message to send
522 * @total_len: length of message
523 *
524 * Message must have an destination specified explicitly.
525 * Used for SOCK_RDM and SOCK_DGRAM messages,
526 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
527 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
528 *
529 * Returns the number of bytes sent on success, or errno otherwise
530 */
531
532static int send_msg(struct kiocb *iocb, struct socket *sock,
533		    struct msghdr *m, size_t total_len)
534{
535	struct sock *sk = sock->sk;
536	struct tipc_port *tport = tipc_sk_port(sk);
537	struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
538	int needs_conn;
539	int res = -EINVAL;
540
541	if (unlikely(!dest))
542		return -EDESTADDRREQ;
543	if (unlikely((m->msg_namelen < sizeof(*dest)) ||
544		     (dest->family != AF_TIPC)))
545		return -EINVAL;
546
547	if (iocb)
548		lock_sock(sk);
549
550	needs_conn = (sock->state != SS_READY);
551	if (unlikely(needs_conn)) {
552		if (sock->state == SS_LISTENING) {
553			res = -EPIPE;
554			goto exit;
555		}
556		if (sock->state != SS_UNCONNECTED) {
557			res = -EISCONN;
558			goto exit;
559		}
560		if ((tport->published) ||
561		    ((sock->type == SOCK_STREAM) && (total_len != 0))) {
562			res = -EOPNOTSUPP;
563			goto exit;
564		}
565		if (dest->addrtype == TIPC_ADDR_NAME) {
566			tport->conn_type = dest->addr.name.name.type;
567			tport->conn_instance = dest->addr.name.name.instance;
568		}
569
570		/* Abort any pending connection attempts (very unlikely) */
571
572		reject_rx_queue(sk);
573	}
574
575	do {
576		if (dest->addrtype == TIPC_ADDR_NAME) {
577			if ((res = dest_name_check(dest, m)))
578				break;
579			res = tipc_send2name(tport->ref,
580					     &dest->addr.name.name,
581					     dest->addr.name.domain,
582					     m->msg_iovlen,
583					     m->msg_iov);
584		}
585		else if (dest->addrtype == TIPC_ADDR_ID) {
586			res = tipc_send2port(tport->ref,
587					     &dest->addr.id,
588					     m->msg_iovlen,
589					     m->msg_iov);
590		}
591		else if (dest->addrtype == TIPC_ADDR_MCAST) {
592			if (needs_conn) {
593				res = -EOPNOTSUPP;
594				break;
595			}
596			if ((res = dest_name_check(dest, m)))
597				break;
598			res = tipc_multicast(tport->ref,
599					     &dest->addr.nameseq,
600					     m->msg_iovlen,
601					     m->msg_iov);
602		}
603		if (likely(res != -ELINKCONG)) {
604			if (needs_conn && (res >= 0)) {
605				sock->state = SS_CONNECTING;
606			}
607			break;
608		}
609		if (m->msg_flags & MSG_DONTWAIT) {
610			res = -EWOULDBLOCK;
611			break;
612		}
613		release_sock(sk);
614		res = wait_event_interruptible(*sk_sleep(sk),
615					       !tport->congested);
616		lock_sock(sk);
617		if (res)
618			break;
619	} while (1);
620
621exit:
622	if (iocb)
623		release_sock(sk);
624	return res;
625}
626
627/**
628 * send_packet - send a connection-oriented message
629 * @iocb: if NULL, indicates that socket lock is already held
630 * @sock: socket structure
631 * @m: message to send
632 * @total_len: length of message
633 *
634 * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
635 *
636 * Returns the number of bytes sent on success, or errno otherwise
637 */
638
639static int send_packet(struct kiocb *iocb, struct socket *sock,
640		       struct msghdr *m, size_t total_len)
641{
642	struct sock *sk = sock->sk;
643	struct tipc_port *tport = tipc_sk_port(sk);
644	struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
645	int res;
646
647	/* Handle implied connection establishment */
648
649	if (unlikely(dest))
650		return send_msg(iocb, sock, m, total_len);
651
652	if (iocb)
653		lock_sock(sk);
654
655	do {
656		if (unlikely(sock->state != SS_CONNECTED)) {
657			if (sock->state == SS_DISCONNECTING)
658				res = -EPIPE;
659			else
660				res = -ENOTCONN;
661			break;
662		}
663
664		res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov);
665		if (likely(res != -ELINKCONG)) {
666			break;
667		}
668		if (m->msg_flags & MSG_DONTWAIT) {
669			res = -EWOULDBLOCK;
670			break;
671		}
672		release_sock(sk);
673		res = wait_event_interruptible(*sk_sleep(sk),
674			(!tport->congested || !tport->connected));
675		lock_sock(sk);
676		if (res)
677			break;
678	} while (1);
679
680	if (iocb)
681		release_sock(sk);
682	return res;
683}
684
685/**
686 * send_stream - send stream-oriented data
687 * @iocb: (unused)
688 * @sock: socket structure
689 * @m: data to send
690 * @total_len: total length of data to be sent
691 *
692 * Used for SOCK_STREAM data.
693 *
694 * Returns the number of bytes sent on success (or partial success),
695 * or errno if no data sent
696 */
697
698static int send_stream(struct kiocb *iocb, struct socket *sock,
699		       struct msghdr *m, size_t total_len)
700{
701	struct sock *sk = sock->sk;
702	struct tipc_port *tport = tipc_sk_port(sk);
703	struct msghdr my_msg;
704	struct iovec my_iov;
705	struct iovec *curr_iov;
706	int curr_iovlen;
707	char __user *curr_start;
708	u32 hdr_size;
709	int curr_left;
710	int bytes_to_send;
711	int bytes_sent;
712	int res;
713
714	lock_sock(sk);
715
716	/* Handle special cases where there is no connection */
717
718	if (unlikely(sock->state != SS_CONNECTED)) {
719		if (sock->state == SS_UNCONNECTED) {
720			res = send_packet(NULL, sock, m, total_len);
721			goto exit;
722		} else if (sock->state == SS_DISCONNECTING) {
723			res = -EPIPE;
724			goto exit;
725		} else {
726			res = -ENOTCONN;
727			goto exit;
728		}
729	}
730
731	if (unlikely(m->msg_name)) {
732		res = -EISCONN;
733		goto exit;
734	}
735
736	/*
737	 * Send each iovec entry using one or more messages
738	 *
739	 * Note: This algorithm is good for the most likely case
740	 * (i.e. one large iovec entry), but could be improved to pass sets
741	 * of small iovec entries into send_packet().
742	 */
743
744	curr_iov = m->msg_iov;
745	curr_iovlen = m->msg_iovlen;
746	my_msg.msg_iov = &my_iov;
747	my_msg.msg_iovlen = 1;
748	my_msg.msg_flags = m->msg_flags;
749	my_msg.msg_name = NULL;
750	bytes_sent = 0;
751
752	hdr_size = msg_hdr_sz(&tport->phdr);
753
754	while (curr_iovlen--) {
755		curr_start = curr_iov->iov_base;
756		curr_left = curr_iov->iov_len;
757
758		while (curr_left) {
759			bytes_to_send = tport->max_pkt - hdr_size;
760			if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
761				bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
762			if (curr_left < bytes_to_send)
763				bytes_to_send = curr_left;
764			my_iov.iov_base = curr_start;
765			my_iov.iov_len = bytes_to_send;
766			if ((res = send_packet(NULL, sock, &my_msg, 0)) < 0) {
767				if (bytes_sent)
768					res = bytes_sent;
769				goto exit;
770			}
771			curr_left -= bytes_to_send;
772			curr_start += bytes_to_send;
773			bytes_sent += bytes_to_send;
774		}
775
776		curr_iov++;
777	}
778	res = bytes_sent;
779exit:
780	release_sock(sk);
781	return res;
782}
783
784/**
785 * auto_connect - complete connection setup to a remote port
786 * @sock: socket structure
787 * @msg: peer's response message
788 *
789 * Returns 0 on success, errno otherwise
790 */
791
792static int auto_connect(struct socket *sock, struct tipc_msg *msg)
793{
794	struct tipc_sock *tsock = tipc_sk(sock->sk);
795
796	if (msg_errcode(msg)) {
797		sock->state = SS_DISCONNECTING;
798		return -ECONNREFUSED;
799	}
800
801	tsock->peer_name.ref = msg_origport(msg);
802	tsock->peer_name.node = msg_orignode(msg);
803	tipc_connect2port(tsock->p->ref, &tsock->peer_name);
804	tipc_set_portimportance(tsock->p->ref, msg_importance(msg));
805	sock->state = SS_CONNECTED;
806	return 0;
807}
808
809/**
810 * set_orig_addr - capture sender's address for received message
811 * @m: descriptor for message info
812 * @msg: received message header
813 *
814 * Note: Address is not captured if not requested by receiver.
815 */
816
817static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
818{
819	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)m->msg_name;
820
821	if (addr) {
822		addr->family = AF_TIPC;
823		addr->addrtype = TIPC_ADDR_ID;
824		addr->addr.id.ref = msg_origport(msg);
825		addr->addr.id.node = msg_orignode(msg);
826		addr->addr.name.domain = 0;   	/* could leave uninitialized */
827		addr->scope = 0;   		/* could leave uninitialized */
828		m->msg_namelen = sizeof(struct sockaddr_tipc);
829	}
830}
831
832/**
833 * anc_data_recv - optionally capture ancillary data for received message
834 * @m: descriptor for message info
835 * @msg: received message header
836 * @tport: TIPC port associated with message
837 *
838 * Note: Ancillary data is not captured if not requested by receiver.
839 *
840 * Returns 0 if successful, otherwise errno
841 */
842
843static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
844				struct tipc_port *tport)
845{
846	u32 anc_data[3];
847	u32 err;
848	u32 dest_type;
849	int has_name;
850	int res;
851
852	if (likely(m->msg_controllen == 0))
853		return 0;
854
855	/* Optionally capture errored message object(s) */
856
857	err = msg ? msg_errcode(msg) : 0;
858	if (unlikely(err)) {
859		anc_data[0] = err;
860		anc_data[1] = msg_data_sz(msg);
861		if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data)))
862			return res;
863		if (anc_data[1] &&
864		    (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
865				    msg_data(msg))))
866			return res;
867	}
868
869	/* Optionally capture message destination object */
870
871	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
872	switch (dest_type) {
873	case TIPC_NAMED_MSG:
874		has_name = 1;
875		anc_data[0] = msg_nametype(msg);
876		anc_data[1] = msg_namelower(msg);
877		anc_data[2] = msg_namelower(msg);
878		break;
879	case TIPC_MCAST_MSG:
880		has_name = 1;
881		anc_data[0] = msg_nametype(msg);
882		anc_data[1] = msg_namelower(msg);
883		anc_data[2] = msg_nameupper(msg);
884		break;
885	case TIPC_CONN_MSG:
886		has_name = (tport->conn_type != 0);
887		anc_data[0] = tport->conn_type;
888		anc_data[1] = tport->conn_instance;
889		anc_data[2] = tport->conn_instance;
890		break;
891	default:
892		has_name = 0;
893	}
894	if (has_name &&
895	    (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data)))
896		return res;
897
898	return 0;
899}
900
901/**
902 * recv_msg - receive packet-oriented message
903 * @iocb: (unused)
904 * @m: descriptor for message info
905 * @buf_len: total size of user buffer area
906 * @flags: receive flags
907 *
908 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
909 * If the complete message doesn't fit in user area, truncate it.
910 *
911 * Returns size of returned message data, errno otherwise
912 */
913
914static int recv_msg(struct kiocb *iocb, struct socket *sock,
915		    struct msghdr *m, size_t buf_len, int flags)
916{
917	struct sock *sk = sock->sk;
918	struct tipc_port *tport = tipc_sk_port(sk);
919	struct sk_buff *buf;
920	struct tipc_msg *msg;
921	unsigned int sz;
922	u32 err;
923	int res;
924
925	/* Catch invalid receive requests */
926
927	if (m->msg_iovlen != 1)
928		return -EOPNOTSUPP;   /* Don't do multiple iovec entries yet */
929
930	if (unlikely(!buf_len))
931		return -EINVAL;
932
933	lock_sock(sk);
934
935	if (unlikely(sock->state == SS_UNCONNECTED)) {
936		res = -ENOTCONN;
937		goto exit;
938	}
939
940restart:
941
942	/* Look for a message in receive queue; wait if necessary */
943
944	while (skb_queue_empty(&sk->sk_receive_queue)) {
945		if (sock->state == SS_DISCONNECTING) {
946			res = -ENOTCONN;
947			goto exit;
948		}
949		if (flags & MSG_DONTWAIT) {
950			res = -EWOULDBLOCK;
951			goto exit;
952		}
953		release_sock(sk);
954		res = wait_event_interruptible(*sk_sleep(sk),
955			(!skb_queue_empty(&sk->sk_receive_queue) ||
956			 (sock->state == SS_DISCONNECTING)));
957		lock_sock(sk);
958		if (res)
959			goto exit;
960	}
961
962	/* Look at first message in receive queue */
963
964	buf = skb_peek(&sk->sk_receive_queue);
965	msg = buf_msg(buf);
966	sz = msg_data_sz(msg);
967	err = msg_errcode(msg);
968
969	/* Complete connection setup for an implied connect */
970
971	if (unlikely(sock->state == SS_CONNECTING)) {
972		res = auto_connect(sock, msg);
973		if (res)
974			goto exit;
975	}
976
977	/* Discard an empty non-errored message & try again */
978
979	if ((!sz) && (!err)) {
980		advance_rx_queue(sk);
981		goto restart;
982	}
983
984	/* Capture sender's address (optional) */
985
986	set_orig_addr(m, msg);
987
988	/* Capture ancillary data (optional) */
989
990	res = anc_data_recv(m, msg, tport);
991	if (res)
992		goto exit;
993
994	/* Capture message data (if valid) & compute return value (always) */
995
996	if (!err) {
997		if (unlikely(buf_len < sz)) {
998			sz = buf_len;
999			m->msg_flags |= MSG_TRUNC;
1000		}
1001		if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg),
1002					  sz))) {
1003			res = -EFAULT;
1004			goto exit;
1005		}
1006		res = sz;
1007	} else {
1008		if ((sock->state == SS_READY) ||
1009		    ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
1010			res = 0;
1011		else
1012			res = -ECONNRESET;
1013	}
1014
1015	/* Consume received message (optional) */
1016
1017	if (likely(!(flags & MSG_PEEK))) {
1018		if ((sock->state != SS_READY) &&
1019		    (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1020			tipc_acknowledge(tport->ref, tport->conn_unacked);
1021		advance_rx_queue(sk);
1022	}
1023exit:
1024	release_sock(sk);
1025	return res;
1026}
1027
1028/**
1029 * recv_stream - receive stream-oriented data
1030 * @iocb: (unused)
1031 * @m: descriptor for message info
1032 * @buf_len: total size of user buffer area
1033 * @flags: receive flags
1034 *
1035 * Used for SOCK_STREAM messages only.  If not enough data is available
1036 * will optionally wait for more; never truncates data.
1037 *
1038 * Returns size of returned message data, errno otherwise
1039 */
1040
1041static int recv_stream(struct kiocb *iocb, struct socket *sock,
1042		       struct msghdr *m, size_t buf_len, int flags)
1043{
1044	struct sock *sk = sock->sk;
1045	struct tipc_port *tport = tipc_sk_port(sk);
1046	struct sk_buff *buf;
1047	struct tipc_msg *msg;
1048	unsigned int sz;
1049	int sz_to_copy, target, needed;
1050	int sz_copied = 0;
1051	char __user *crs = m->msg_iov->iov_base;
1052	unsigned char *buf_crs;
1053	u32 err;
1054	int res = 0;
1055
1056	/* Catch invalid receive attempts */
1057
1058	if (m->msg_iovlen != 1)
1059		return -EOPNOTSUPP;   /* Don't do multiple iovec entries yet */
1060
1061	if (unlikely(!buf_len))
1062		return -EINVAL;
1063
1064	lock_sock(sk);
1065
1066	if (unlikely((sock->state == SS_UNCONNECTED) ||
1067		     (sock->state == SS_CONNECTING))) {
1068		res = -ENOTCONN;
1069		goto exit;
1070	}
1071
1072	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1073
1074restart:
1075
1076	/* Look for a message in receive queue; wait if necessary */
1077
1078	while (skb_queue_empty(&sk->sk_receive_queue)) {
1079		if (sock->state == SS_DISCONNECTING) {
1080			res = -ENOTCONN;
1081			goto exit;
1082		}
1083		if (flags & MSG_DONTWAIT) {
1084			res = -EWOULDBLOCK;
1085			goto exit;
1086		}
1087		release_sock(sk);
1088		res = wait_event_interruptible(*sk_sleep(sk),
1089			(!skb_queue_empty(&sk->sk_receive_queue) ||
1090			 (sock->state == SS_DISCONNECTING)));
1091		lock_sock(sk);
1092		if (res)
1093			goto exit;
1094	}
1095
1096	/* Look at first message in receive queue */
1097
1098	buf = skb_peek(&sk->sk_receive_queue);
1099	msg = buf_msg(buf);
1100	sz = msg_data_sz(msg);
1101	err = msg_errcode(msg);
1102
1103	/* Discard an empty non-errored message & try again */
1104
1105	if ((!sz) && (!err)) {
1106		advance_rx_queue(sk);
1107		goto restart;
1108	}
1109
1110	/* Optionally capture sender's address & ancillary data of first msg */
1111
1112	if (sz_copied == 0) {
1113		set_orig_addr(m, msg);
1114		res = anc_data_recv(m, msg, tport);
1115		if (res)
1116			goto exit;
1117	}
1118
1119	/* Capture message data (if valid) & compute return value (always) */
1120
1121	if (!err) {
1122		buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
1123		sz = (unsigned char *)msg + msg_size(msg) - buf_crs;
1124
1125		needed = (buf_len - sz_copied);
1126		sz_to_copy = (sz <= needed) ? sz : needed;
1127		if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) {
1128			res = -EFAULT;
1129			goto exit;
1130		}
1131		sz_copied += sz_to_copy;
1132
1133		if (sz_to_copy < sz) {
1134			if (!(flags & MSG_PEEK))
1135				TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy;
1136			goto exit;
1137		}
1138
1139		crs += sz_to_copy;
1140	} else {
1141		if (sz_copied != 0)
1142			goto exit; /* can't add error msg to valid data */
1143
1144		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
1145			res = 0;
1146		else
1147			res = -ECONNRESET;
1148	}
1149
1150	/* Consume received message (optional) */
1151
1152	if (likely(!(flags & MSG_PEEK))) {
1153		if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1154			tipc_acknowledge(tport->ref, tport->conn_unacked);
1155		advance_rx_queue(sk);
1156	}
1157
1158	/* Loop around if more data is required */
1159
1160	if ((sz_copied < buf_len) &&	/* didn't get all requested data */
1161	    (!skb_queue_empty(&sk->sk_receive_queue) ||
1162	    (sz_copied < target)) &&	/* and more is ready or required */
1163	    (!(flags & MSG_PEEK)) &&	/* and aren't just peeking at data */
1164	    (!err))			/* and haven't reached a FIN */
1165		goto restart;
1166
1167exit:
1168	release_sock(sk);
1169	return sz_copied ? sz_copied : res;
1170}
1171
1172/**
1173 * rx_queue_full - determine if receive queue can accept another message
1174 * @msg: message to be added to queue
1175 * @queue_size: current size of queue
1176 * @base: nominal maximum size of queue
1177 *
1178 * Returns 1 if queue is unable to accept message, 0 otherwise
1179 */
1180
1181static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1182{
1183	u32 threshold;
1184	u32 imp = msg_importance(msg);
1185
1186	if (imp == TIPC_LOW_IMPORTANCE)
1187		threshold = base;
1188	else if (imp == TIPC_MEDIUM_IMPORTANCE)
1189		threshold = base * 2;
1190	else if (imp == TIPC_HIGH_IMPORTANCE)
1191		threshold = base * 100;
1192	else
1193		return 0;
1194
1195	if (msg_connected(msg))
1196		threshold *= 4;
1197
1198	return queue_size >= threshold;
1199}
1200
1201/**
1202 * filter_rcv - validate incoming message
1203 * @sk: socket
1204 * @buf: message
1205 *
1206 * Enqueues message on receive queue if acceptable; optionally handles
1207 * disconnect indication for a connected socket.
1208 *
1209 * Called with socket lock already taken; port lock may also be taken.
1210 *
1211 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1212 */
1213
1214static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1215{
1216	struct socket *sock = sk->sk_socket;
1217	struct tipc_msg *msg = buf_msg(buf);
1218	u32 recv_q_len;
1219
1220	/* Reject message if it is wrong sort of message for socket */
1221
1222	/*
1223	 * WOULD IT BE BETTER TO JUST DISCARD THESE MESSAGES INSTEAD?
1224	 * "NO PORT" ISN'T REALLY THE RIGHT ERROR CODE, AND THERE MAY
1225	 * BE SECURITY IMPLICATIONS INHERENT IN REJECTING INVALID TRAFFIC
1226	 */
1227
1228	if (sock->state == SS_READY) {
1229		if (msg_connected(msg))
1230			return TIPC_ERR_NO_PORT;
1231	} else {
1232		if (msg_mcast(msg))
1233			return TIPC_ERR_NO_PORT;
1234		if (sock->state == SS_CONNECTED) {
1235			if (!msg_connected(msg))
1236				return TIPC_ERR_NO_PORT;
1237		} else if (sock->state == SS_CONNECTING) {
1238			if (!msg_connected(msg) && (msg_errcode(msg) == 0))
1239				return TIPC_ERR_NO_PORT;
1240		} else if (sock->state == SS_LISTENING) {
1241			if (msg_connected(msg) || msg_errcode(msg))
1242				return TIPC_ERR_NO_PORT;
1243		} else if (sock->state == SS_DISCONNECTING) {
1244			return TIPC_ERR_NO_PORT;
1245		} else /* (sock->state == SS_UNCONNECTED) */ {
1246			if (msg_connected(msg) || msg_errcode(msg))
1247				return TIPC_ERR_NO_PORT;
1248		}
1249	}
1250
1251	/* Reject message if there isn't room to queue it */
1252
1253	recv_q_len = (u32)atomic_read(&tipc_queue_size);
1254	if (unlikely(recv_q_len >= OVERLOAD_LIMIT_BASE)) {
1255		if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE))
1256			return TIPC_ERR_OVERLOAD;
1257	}
1258	recv_q_len = skb_queue_len(&sk->sk_receive_queue);
1259	if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) {
1260		if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2))
1261			return TIPC_ERR_OVERLOAD;
1262	}
1263
1264	/* Enqueue message (finally!) */
1265
1266	TIPC_SKB_CB(buf)->handle = msg_data(msg);
1267	atomic_inc(&tipc_queue_size);
1268	__skb_queue_tail(&sk->sk_receive_queue, buf);
1269
1270	/* Initiate connection termination for an incoming 'FIN' */
1271
1272	if (unlikely(msg_errcode(msg) && (sock->state == SS_CONNECTED))) {
1273		sock->state = SS_DISCONNECTING;
1274		tipc_disconnect_port(tipc_sk_port(sk));
1275	}
1276
1277	if (waitqueue_active(sk_sleep(sk)))
1278		wake_up_interruptible(sk_sleep(sk));
1279	return TIPC_OK;
1280}
1281
1282/**
1283 * backlog_rcv - handle incoming message from backlog queue
1284 * @sk: socket
1285 * @buf: message
1286 *
1287 * Caller must hold socket lock, but not port lock.
1288 *
1289 * Returns 0
1290 */
1291
1292static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
1293{
1294	u32 res;
1295
1296	res = filter_rcv(sk, buf);
1297	if (res)
1298		tipc_reject_msg(buf, res);
1299	return 0;
1300}
1301
1302/**
1303 * dispatch - handle incoming message
1304 * @tport: TIPC port that received message
1305 * @buf: message
1306 *
1307 * Called with port lock already taken.
1308 *
1309 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1310 */
1311
1312static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1313{
1314	struct sock *sk = (struct sock *)tport->usr_handle;
1315	u32 res;
1316
1317	/*
1318	 * Process message if socket is unlocked; otherwise add to backlog queue
1319	 *
1320	 * This code is based on sk_receive_skb(), but must be distinct from it
1321	 * since a TIPC-specific filter/reject mechanism is utilized
1322	 */
1323
1324	bh_lock_sock(sk);
1325	if (!sock_owned_by_user(sk)) {
1326		res = filter_rcv(sk, buf);
1327	} else {
1328		if (sk_add_backlog(sk, buf))
1329			res = TIPC_ERR_OVERLOAD;
1330		else
1331			res = TIPC_OK;
1332	}
1333	bh_unlock_sock(sk);
1334
1335	return res;
1336}
1337
1338/**
1339 * wakeupdispatch - wake up port after congestion
1340 * @tport: port to wakeup
1341 *
1342 * Called with port lock already taken.
1343 */
1344
1345static void wakeupdispatch(struct tipc_port *tport)
1346{
1347	struct sock *sk = (struct sock *)tport->usr_handle;
1348
1349	if (waitqueue_active(sk_sleep(sk)))
1350		wake_up_interruptible(sk_sleep(sk));
1351}
1352
1353/**
1354 * connect - establish a connection to another TIPC port
1355 * @sock: socket structure
1356 * @dest: socket address for destination port
1357 * @destlen: size of socket address data structure
1358 * @flags: file-related flags associated with socket
1359 *
1360 * Returns 0 on success, errno otherwise
1361 */
1362
1363static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1364		   int flags)
1365{
1366	struct sock *sk = sock->sk;
1367	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
1368	struct msghdr m = {NULL,};
1369	struct sk_buff *buf;
1370	struct tipc_msg *msg;
1371	long timeout;
1372	int res;
1373
1374	lock_sock(sk);
1375
1376	/* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
1377
1378	if (sock->state == SS_READY) {
1379		res = -EOPNOTSUPP;
1380		goto exit;
1381	}
1382
1383	/* For now, TIPC does not support the non-blocking form of connect() */
1384
1385	if (flags & O_NONBLOCK) {
1386		res = -EOPNOTSUPP;
1387		goto exit;
1388	}
1389
1390	/* Issue Posix-compliant error code if socket is in the wrong state */
1391
1392	if (sock->state == SS_LISTENING) {
1393		res = -EOPNOTSUPP;
1394		goto exit;
1395	}
1396	if (sock->state == SS_CONNECTING) {
1397		res = -EALREADY;
1398		goto exit;
1399	}
1400	if (sock->state != SS_UNCONNECTED) {
1401		res = -EISCONN;
1402		goto exit;
1403	}
1404
1405	/*
1406	 * Reject connection attempt using multicast address
1407	 *
1408	 * Note: send_msg() validates the rest of the address fields,
1409	 *       so there's no need to do it here
1410	 */
1411
1412	if (dst->addrtype == TIPC_ADDR_MCAST) {
1413		res = -EINVAL;
1414		goto exit;
1415	}
1416
1417	/* Reject any messages already in receive queue (very unlikely) */
1418
1419	reject_rx_queue(sk);
1420
1421	/* Send a 'SYN-' to destination */
1422
1423	m.msg_name = dest;
1424	m.msg_namelen = destlen;
1425	res = send_msg(NULL, sock, &m, 0);
1426	if (res < 0) {
1427		goto exit;
1428	}
1429
1430	/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1431
1432	timeout = tipc_sk(sk)->conn_timeout;
1433	release_sock(sk);
1434	res = wait_event_interruptible_timeout(*sk_sleep(sk),
1435			(!skb_queue_empty(&sk->sk_receive_queue) ||
1436			(sock->state != SS_CONNECTING)),
1437			timeout ? timeout : MAX_SCHEDULE_TIMEOUT);
1438	lock_sock(sk);
1439
1440	if (res > 0) {
1441		buf = skb_peek(&sk->sk_receive_queue);
1442		if (buf != NULL) {
1443			msg = buf_msg(buf);
1444			res = auto_connect(sock, msg);
1445			if (!res) {
1446				if (!msg_data_sz(msg))
1447					advance_rx_queue(sk);
1448			}
1449		} else {
1450			if (sock->state == SS_CONNECTED) {
1451				res = -EISCONN;
1452			} else {
1453				res = -ECONNREFUSED;
1454			}
1455		}
1456	} else {
1457		if (res == 0)
1458			res = -ETIMEDOUT;
1459		else
1460			; /* leave "res" unchanged */
1461		sock->state = SS_DISCONNECTING;
1462	}
1463
1464exit:
1465	release_sock(sk);
1466	return res;
1467}
1468
1469/**
1470 * listen - allow socket to listen for incoming connections
1471 * @sock: socket structure
1472 * @len: (unused)
1473 *
1474 * Returns 0 on success, errno otherwise
1475 */
1476
1477static int listen(struct socket *sock, int len)
1478{
1479	struct sock *sk = sock->sk;
1480	int res;
1481
1482	lock_sock(sk);
1483
1484	if (sock->state == SS_READY)
1485		res = -EOPNOTSUPP;
1486	else if (sock->state != SS_UNCONNECTED)
1487		res = -EINVAL;
1488	else {
1489		sock->state = SS_LISTENING;
1490		res = 0;
1491	}
1492
1493	release_sock(sk);
1494	return res;
1495}
1496
1497/**
1498 * accept - wait for connection request
1499 * @sock: listening socket
1500 * @newsock: new socket that is to be connected
1501 * @flags: file-related flags associated with socket
1502 *
1503 * Returns 0 on success, errno otherwise
1504 */
1505
1506static int accept(struct socket *sock, struct socket *new_sock, int flags)
1507{
1508	struct sock *sk = sock->sk;
1509	struct sk_buff *buf;
1510	int res;
1511
1512	lock_sock(sk);
1513
1514	if (sock->state == SS_READY) {
1515		res = -EOPNOTSUPP;
1516		goto exit;
1517	}
1518	if (sock->state != SS_LISTENING) {
1519		res = -EINVAL;
1520		goto exit;
1521	}
1522
1523	while (skb_queue_empty(&sk->sk_receive_queue)) {
1524		if (flags & O_NONBLOCK) {
1525			res = -EWOULDBLOCK;
1526			goto exit;
1527		}
1528		release_sock(sk);
1529		res = wait_event_interruptible(*sk_sleep(sk),
1530				(!skb_queue_empty(&sk->sk_receive_queue)));
1531		lock_sock(sk);
1532		if (res)
1533			goto exit;
1534	}
1535
1536	buf = skb_peek(&sk->sk_receive_queue);
1537
1538	res = tipc_create(sock_net(sock->sk), new_sock, 0, 0);
1539	if (!res) {
1540		struct sock *new_sk = new_sock->sk;
1541		struct tipc_sock *new_tsock = tipc_sk(new_sk);
1542		struct tipc_port *new_tport = new_tsock->p;
1543		u32 new_ref = new_tport->ref;
1544		struct tipc_msg *msg = buf_msg(buf);
1545
1546		lock_sock(new_sk);
1547
1548		/*
1549		 * Reject any stray messages received by new socket
1550		 * before the socket lock was taken (very, very unlikely)
1551		 */
1552
1553		reject_rx_queue(new_sk);
1554
1555		/* Connect new socket to it's peer */
1556
1557		new_tsock->peer_name.ref = msg_origport(msg);
1558		new_tsock->peer_name.node = msg_orignode(msg);
1559		tipc_connect2port(new_ref, &new_tsock->peer_name);
1560		new_sock->state = SS_CONNECTED;
1561
1562		tipc_set_portimportance(new_ref, msg_importance(msg));
1563		if (msg_named(msg)) {
1564			new_tport->conn_type = msg_nametype(msg);
1565			new_tport->conn_instance = msg_nameinst(msg);
1566		}
1567
1568		/*
1569		 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
1570		 * Respond to 'SYN+' by queuing it on new socket.
1571		 */
1572
1573		if (!msg_data_sz(msg)) {
1574			struct msghdr m = {NULL,};
1575
1576			advance_rx_queue(sk);
1577			send_packet(NULL, new_sock, &m, 0);
1578		} else {
1579			__skb_dequeue(&sk->sk_receive_queue);
1580			__skb_queue_head(&new_sk->sk_receive_queue, buf);
1581		}
1582		release_sock(new_sk);
1583	}
1584exit:
1585	release_sock(sk);
1586	return res;
1587}
1588
1589/**
1590 * shutdown - shutdown socket connection
1591 * @sock: socket structure
1592 * @how: direction to close (must be SHUT_RDWR)
1593 *
1594 * Terminates connection (if necessary), then purges socket's receive queue.
1595 *
1596 * Returns 0 on success, errno otherwise
1597 */
1598
1599static int shutdown(struct socket *sock, int how)
1600{
1601	struct sock *sk = sock->sk;
1602	struct tipc_port *tport = tipc_sk_port(sk);
1603	struct sk_buff *buf;
1604	int res;
1605
1606	if (how != SHUT_RDWR)
1607		return -EINVAL;
1608
1609	lock_sock(sk);
1610
1611	switch (sock->state) {
1612	case SS_CONNECTING:
1613	case SS_CONNECTED:
1614
1615		/* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
1616restart:
1617		buf = __skb_dequeue(&sk->sk_receive_queue);
1618		if (buf) {
1619			atomic_dec(&tipc_queue_size);
1620			if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) {
1621				buf_discard(buf);
1622				goto restart;
1623			}
1624			tipc_disconnect(tport->ref);
1625			tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
1626		} else {
1627			tipc_shutdown(tport->ref);
1628		}
1629
1630		sock->state = SS_DISCONNECTING;
1631
1632		/* fall through */
1633
1634	case SS_DISCONNECTING:
1635
1636		/* Discard any unreceived messages; wake up sleeping tasks */
1637
1638		discard_rx_queue(sk);
1639		if (waitqueue_active(sk_sleep(sk)))
1640			wake_up_interruptible(sk_sleep(sk));
1641		res = 0;
1642		break;
1643
1644	default:
1645		res = -ENOTCONN;
1646	}
1647
1648	release_sock(sk);
1649	return res;
1650}
1651
1652/**
1653 * setsockopt - set socket option
1654 * @sock: socket structure
1655 * @lvl: option level
1656 * @opt: option identifier
1657 * @ov: pointer to new option value
1658 * @ol: length of option value
1659 *
1660 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
1661 * (to ease compatibility).
1662 *
1663 * Returns 0 on success, errno otherwise
1664 */
1665
1666static int setsockopt(struct socket *sock,
1667		      int lvl, int opt, char __user *ov, unsigned int ol)
1668{
1669	struct sock *sk = sock->sk;
1670	struct tipc_port *tport = tipc_sk_port(sk);
1671	u32 value;
1672	int res;
1673
1674	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1675		return 0;
1676	if (lvl != SOL_TIPC)
1677		return -ENOPROTOOPT;
1678	if (ol < sizeof(value))
1679		return -EINVAL;
1680	if ((res = get_user(value, (u32 __user *)ov)))
1681		return res;
1682
1683	lock_sock(sk);
1684
1685	switch (opt) {
1686	case TIPC_IMPORTANCE:
1687		res = tipc_set_portimportance(tport->ref, value);
1688		break;
1689	case TIPC_SRC_DROPPABLE:
1690		if (sock->type != SOCK_STREAM)
1691			res = tipc_set_portunreliable(tport->ref, value);
1692		else
1693			res = -ENOPROTOOPT;
1694		break;
1695	case TIPC_DEST_DROPPABLE:
1696		res = tipc_set_portunreturnable(tport->ref, value);
1697		break;
1698	case TIPC_CONN_TIMEOUT:
1699		tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value);
1700		/* no need to set "res", since already 0 at this point */
1701		break;
1702	default:
1703		res = -EINVAL;
1704	}
1705
1706	release_sock(sk);
1707
1708	return res;
1709}
1710
1711/**
1712 * getsockopt - get socket option
1713 * @sock: socket structure
1714 * @lvl: option level
1715 * @opt: option identifier
1716 * @ov: receptacle for option value
1717 * @ol: receptacle for length of option value
1718 *
1719 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
1720 * (to ease compatibility).
1721 *
1722 * Returns 0 on success, errno otherwise
1723 */
1724
1725static int getsockopt(struct socket *sock,
1726		      int lvl, int opt, char __user *ov, int __user *ol)
1727{
1728	struct sock *sk = sock->sk;
1729	struct tipc_port *tport = tipc_sk_port(sk);
1730	int len;
1731	u32 value;
1732	int res;
1733
1734	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1735		return put_user(0, ol);
1736	if (lvl != SOL_TIPC)
1737		return -ENOPROTOOPT;
1738	if ((res = get_user(len, ol)))
1739		return res;
1740
1741	lock_sock(sk);
1742
1743	switch (opt) {
1744	case TIPC_IMPORTANCE:
1745		res = tipc_portimportance(tport->ref, &value);
1746		break;
1747	case TIPC_SRC_DROPPABLE:
1748		res = tipc_portunreliable(tport->ref, &value);
1749		break;
1750	case TIPC_DEST_DROPPABLE:
1751		res = tipc_portunreturnable(tport->ref, &value);
1752		break;
1753	case TIPC_CONN_TIMEOUT:
1754		value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
1755		/* no need to set "res", since already 0 at this point */
1756		break;
1757	 case TIPC_NODE_RECVQ_DEPTH:
1758		value = (u32)atomic_read(&tipc_queue_size);
1759		break;
1760	 case TIPC_SOCK_RECVQ_DEPTH:
1761		value = skb_queue_len(&sk->sk_receive_queue);
1762		break;
1763	default:
1764		res = -EINVAL;
1765	}
1766
1767	release_sock(sk);
1768
1769	if (res) {
1770		/* "get" failed */
1771	}
1772	else if (len < sizeof(value)) {
1773		res = -EINVAL;
1774	}
1775	else if (copy_to_user(ov, &value, sizeof(value))) {
1776		res = -EFAULT;
1777	}
1778	else {
1779		res = put_user(sizeof(value), ol);
1780	}
1781
1782	return res;
1783}
1784
1785/**
1786 * Protocol switches for the various types of TIPC sockets
1787 */
1788
1789static const struct proto_ops msg_ops = {
1790	.owner 		= THIS_MODULE,
1791	.family		= AF_TIPC,
1792	.release	= release,
1793	.bind		= bind,
1794	.connect	= connect,
1795	.socketpair	= sock_no_socketpair,
1796	.accept		= accept,
1797	.getname	= get_name,
1798	.poll		= poll,
1799	.ioctl		= sock_no_ioctl,
1800	.listen		= listen,
1801	.shutdown	= shutdown,
1802	.setsockopt	= setsockopt,
1803	.getsockopt	= getsockopt,
1804	.sendmsg	= send_msg,
1805	.recvmsg	= recv_msg,
1806	.mmap		= sock_no_mmap,
1807	.sendpage	= sock_no_sendpage
1808};
1809
1810static const struct proto_ops packet_ops = {
1811	.owner 		= THIS_MODULE,
1812	.family		= AF_TIPC,
1813	.release	= release,
1814	.bind		= bind,
1815	.connect	= connect,
1816	.socketpair	= sock_no_socketpair,
1817	.accept		= accept,
1818	.getname	= get_name,
1819	.poll		= poll,
1820	.ioctl		= sock_no_ioctl,
1821	.listen		= listen,
1822	.shutdown	= shutdown,
1823	.setsockopt	= setsockopt,
1824	.getsockopt	= getsockopt,
1825	.sendmsg	= send_packet,
1826	.recvmsg	= recv_msg,
1827	.mmap		= sock_no_mmap,
1828	.sendpage	= sock_no_sendpage
1829};
1830
1831static const struct proto_ops stream_ops = {
1832	.owner 		= THIS_MODULE,
1833	.family		= AF_TIPC,
1834	.release	= release,
1835	.bind		= bind,
1836	.connect	= connect,
1837	.socketpair	= sock_no_socketpair,
1838	.accept		= accept,
1839	.getname	= get_name,
1840	.poll		= poll,
1841	.ioctl		= sock_no_ioctl,
1842	.listen		= listen,
1843	.shutdown	= shutdown,
1844	.setsockopt	= setsockopt,
1845	.getsockopt	= getsockopt,
1846	.sendmsg	= send_stream,
1847	.recvmsg	= recv_stream,
1848	.mmap		= sock_no_mmap,
1849	.sendpage	= sock_no_sendpage
1850};
1851
1852static const struct net_proto_family tipc_family_ops = {
1853	.owner 		= THIS_MODULE,
1854	.family		= AF_TIPC,
1855	.create		= tipc_create
1856};
1857
1858static struct proto tipc_proto = {
1859	.name		= "TIPC",
1860	.owner		= THIS_MODULE,
1861	.obj_size	= sizeof(struct tipc_sock)
1862};
1863
1864/**
1865 * tipc_socket_init - initialize TIPC socket interface
1866 *
1867 * Returns 0 on success, errno otherwise
1868 */
1869int tipc_socket_init(void)
1870{
1871	int res;
1872
1873	res = proto_register(&tipc_proto, 1);
1874	if (res) {
1875		err("Failed to register TIPC protocol type\n");
1876		goto out;
1877	}
1878
1879	res = sock_register(&tipc_family_ops);
1880	if (res) {
1881		err("Failed to register TIPC socket type\n");
1882		proto_unregister(&tipc_proto);
1883		goto out;
1884	}
1885
1886	sockets_enabled = 1;
1887 out:
1888	return res;
1889}
1890
1891/**
1892 * tipc_socket_stop - stop TIPC socket interface
1893 */
1894
1895void tipc_socket_stop(void)
1896{
1897	if (!sockets_enabled)
1898		return;
1899
1900	sockets_enabled = 0;
1901	sock_unregister(tipc_family_ops.family);
1902	proto_unregister(&tipc_proto);
1903}
1904
1905