patch-2.3.41 linux/net/ipv4/af_inet.c

Next file: linux/net/ipv4/arp.c
Previous file: linux/net/ethernet/eth.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.40/linux/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
  *
  *		PF_INET protocol family socket handler.
  *
- * Version:	$Id: af_inet.c,v 1.101 2000/01/09 02:19:38 davem Exp $
+ * Version:	$Id: af_inet.c,v 1.104 2000/01/18 08:24:14 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -117,7 +117,9 @@
 
 struct linux_mib net_statistics[NR_CPUS*2];
 
+#ifdef INET_REFCNT_DEBUG
 atomic_t inet_sock_nr;
+#endif
 
 extern int raw_get_info(char *, char **, off_t, int);
 extern int snmp_get_info(char *, char **, off_t, int);
@@ -159,8 +161,8 @@
 	if (sk->protinfo.af_inet.opt)
 		kfree(sk->protinfo.af_inet.opt);
 	dst_release(sk->dst_cache);
-	atomic_dec(&inet_sock_nr);
 #ifdef INET_REFCNT_DEBUG
+	atomic_dec(&inet_sock_nr);
 	printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", sk, atomic_read(&inet_sock_nr));
 #endif
 }
@@ -171,32 +173,28 @@
 		sk->prot->destroy(sk);
 
 	/* Observation: when inet_sock_release is called, processes have
-	   no access to socket. But net still has.
-	   Step one, detach it from networking:
-
-	   A. Remove from hash tables.
+	 * no access to socket. But net still has.
+	 * Step one, detach it from networking:
+	 *
+	 * A. Remove from hash tables.
 	 */
 
 	sk->prot->unhash(sk);
 
 	/* In this point socket cannot receive new packets,
-	   but it is possible that some packets are in flight
-	   because some CPU runs receiver and did hash table lookup
-	   before we unhashed socket. They will achieve receive queue
-	   and will be purged by socket destructor.
-
-	   Also we still have packets pending on receive
-	   queue and probably, our own packets waiting in device queues.
-	   sock_destroy will drain receive queue, but transmitted
-	   packets will delay socket destruction until the last reference
-	   will be released.
+	 * but it is possible that some packets are in flight
+	 * because some CPU runs receiver and did hash table lookup
+	 * before we unhashed socket. They will achieve receive queue
+	 * and will be purged by socket destructor.
+	 *
+	 * Also we still have packets pending on receive
+	 * queue and probably, our own packets waiting in device queues.
+	 * sock_destroy will drain receive queue, but transmitted
+	 * packets will delay socket destruction until the last reference
+	 * will be released.
 	 */
 
-	write_lock_irq(&sk->callback_lock);
-	sk->dead=1;
-	sk->socket = NULL;
-	sk->sleep = NULL;
-	write_unlock_irq(&sk->callback_lock);
+	sock_orphan(sk);
 
 #ifdef INET_REFCNT_DEBUG
 	if (atomic_read(&sk->refcnt) != 1) {
@@ -222,8 +220,7 @@
 		    char *optval, int optlen)
 {
 	struct sock *sk=sock->sk;
-	if (sk->prot->setsockopt==NULL)
-		return -EOPNOTSUPP;
+
 	return sk->prot->setsockopt(sk,level,optname,optval,optlen);
 }
 
@@ -239,8 +236,7 @@
 		    char *optval, int *optlen)
 {
 	struct sock *sk=sock->sk;
-	if (sk->prot->getsockopt==NULL)
-		return -EOPNOTSUPP;
+
 	return sk->prot->getsockopt(sk,level,optname,optval,optlen);
 }
 
@@ -264,14 +260,6 @@
 	return 0;
 }
 
-/* Listening INET sockets never sleep to wait for memory, so
- * it is completely silly to wake them up on queue space
- * available events.  So we hook them up to this dummy callback.
- */
-static void inet_listen_write_space(struct sock *sk)
-{
-}
-
 /*
  *	Move a socket into listening state.
  */
@@ -282,12 +270,13 @@
 	unsigned char old_state;
 	int err;
 
+	lock_sock(sk);
+
+	err = -EINVAL;
 	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
-		return -EINVAL;
+		goto out;
 
-	lock_sock(sk);
 	old_state = sk->state;
-	err = -EINVAL;
 	if (!((1<<old_state)&(TCPF_CLOSE|TCPF_LISTEN)))
 		goto out;
 
@@ -295,25 +284,9 @@
 	 * we can only allow the backlog to be adjusted.
 	 */
 	if (old_state != TCP_LISTEN) {
-		sk->state = TCP_LISTEN;
-		sk->ack_backlog = 0;
-		if (sk->num == 0) {
-			if (sk->prot->get_port(sk, 0) != 0) {
-				sk->state = old_state;
-				err = -EAGAIN;
-				goto out;
-			}
-			sk->sport = htons(sk->num);
-		} else {
-			/* Not nice, but the simplest solution however */
-			if (sk->prev)
-				((struct tcp_bind_bucket*)sk->prev)->fastreuse = 0;
-		}
-
-		sk_dst_reset(sk);
-		sk->prot->hash(sk);
-		sk->socket->flags |= SO_ACCEPTCON;
-		sk->write_space = inet_listen_write_space;
+		err = tcp_listen_start(sk);
+		if (err)
+			goto out;
 	}
 	sk->max_ack_backlog = backlog;
 	err = 0;
@@ -345,10 +318,6 @@
 		if (protocol && protocol != IPPROTO_TCP)
 			goto free_and_noproto;
 		protocol = IPPROTO_TCP;
-		if (ipv4_config.no_pmtu_disc)
-			sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
-		else
-			sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT;
 		prot = &tcp_prot;
 		sock->ops = &inet_stream_ops;
 		break;
@@ -359,7 +328,6 @@
 			goto free_and_noproto;
 		protocol = IPPROTO_UDP;
 		sk->no_check = UDP_CSUM_DEFAULT;
-		sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
 		prot=&udp_prot;
 		sock->ops = &inet_dgram_ops;
 		break;
@@ -370,7 +338,6 @@
 			goto free_and_noproto;
 		prot = &raw_prot;
 		sk->reuse = 1;
-		sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
 		sk->num = protocol;
 		sock->ops = &inet_dgram_ops;
 		if (protocol == IPPROTO_RAW)
@@ -380,23 +347,22 @@
 		goto free_and_badtype;
 	}
 
+	if (ipv4_config.no_pmtu_disc)
+		sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
+	else
+		sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT;
+
 	sock_init_data(sock,sk);
 
 	sk->destruct = inet_sock_destruct;
 
-	sk->zapped=0;
-#ifdef CONFIG_TCP_NAGLE_OFF
-	sk->nonagle = 1;
-#endif  
+	sk->zapped = 0;
 	sk->family = PF_INET;
 	sk->protocol = protocol;
 
 	sk->prot = prot;
 	sk->backlog_rcv = prot->backlog_rcv;
 
-	sk->timer.data = (unsigned long)sk;
-	sk->timer.function = &tcp_keepalive_timer;
-
 	sk->protinfo.af_inet.ttl=sysctl_ip_default_ttl;
 
 	sk->protinfo.af_inet.mc_loop=1;
@@ -404,7 +370,9 @@
 	sk->protinfo.af_inet.mc_index=0;
 	sk->protinfo.af_inet.mc_list=NULL;
 
+#ifdef INET_REFCNT_DEBUG
 	atomic_inc(&inet_sock_nr);
+#endif
 
 	if (sk->num) {
 		/* It assumes that any protocol which allows
@@ -469,11 +437,8 @@
 		 * linger..
 		 */
 		timeout = 0;
-		if (sk->linger && !(current->flags & PF_EXITING)) {
-			timeout = HZ * sk->lingertime;
-			if (!timeout)
-				timeout = MAX_SCHEDULE_TIMEOUT;
-		}
+		if (sk->linger && !(current->flags & PF_EXITING))
+			timeout = sk->lingertime;
 		sock->sk = NULL;
 		sk->prot->close(sk, timeout);
 	}
@@ -496,10 +461,6 @@
 		return -EINVAL;
 		
 	chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
-	if (addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL &&
-	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) {
-		return -EADDRNOTAVAIL;	/* Source address MUST be ours! */
-	}
 
 	snum = ntohs(addr->sin_port);
 	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
@@ -555,25 +516,29 @@
 	return sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
 }
 
-static void inet_wait_for_connect(struct sock *sk)
+static long inet_wait_for_connect(struct sock *sk, long timeo)
 {
 	DECLARE_WAITQUEUE(wait, current);
 
 	__set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(sk->sleep, &wait);
 
+	/* Basic assumption: if someone sets sk->err, he _must_
+	 * change state of the socket from TCP_SYN_*.
+	 * Connect() does not allow to get error notifications
+	 * without closing the socket.
+	 */
 	while ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
-		if (signal_pending(current))
-			break;
-		if (sk->err)
-			break;
 		release_sock(sk);
-		schedule();
+		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
+		if (signal_pending(current) || !timeo)
+			break;
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk->sleep, &wait);
+	return timeo;
 }
 
 /*
@@ -586,16 +551,16 @@
 {
 	struct sock *sk=sock->sk;
 	int err;
+	long timeo;
+
+	lock_sock(sk);
 
 	if (uaddr->sa_family == AF_UNSPEC) {
-		lock_sock(sk);
 		err = sk->prot->disconnect(sk, flags);
 		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
-		release_sock(sk);
-		return err;
+		goto out;
 	}
 
-	lock_sock(sk);
 	switch (sock->state) {
 	default:
 		err = -EINVAL;
@@ -604,40 +569,58 @@
 		err = -EISCONN;
 		goto out;
 	case SS_CONNECTING:
-		if (tcp_established(sk->state)) {
-			sock->state = SS_CONNECTED;
-			err = 0;
-			goto out;
-		}
-		if (sk->err)
-			goto sock_error;
 		err = -EALREADY;
-		if (flags & O_NONBLOCK)
-			goto out;
+		/* Fall out of switch with err, set for this state */
 		break;
 	case SS_UNCONNECTED:
+		err = -EISCONN;
+		if (sk->state != TCP_CLOSE) 
+			goto out;
+
+		err = -EAGAIN;
+		if (sk->num == 0) {
+			if (sk->prot->get_port(sk, 0) != 0)
+				goto out;
+			sk->sport = htons(sk->num);
+		}
+
 		err = sk->prot->connect(sk, uaddr, addr_len);
 		if (err < 0)
 			goto out;
+
   		sock->state = SS_CONNECTING;
-	}
 
-	if (sk->state > TCP_FIN_WAIT2)
-		goto sock_error;
+		/* Just entered SS_CONNECTING state; the only
+		 * difference is that return value in non-blocking
+		 * case is EINPROGRESS, rather than EALREADY.
+		 */
+		err = -EINPROGRESS;
+		break;
+	}
 
-	err = -EINPROGRESS;
-	if (!tcp_established(sk->state) && (flags & O_NONBLOCK))
-		goto out;
+	timeo = sock_sndtimeo(sk, flags&O_NONBLOCK);
 
 	if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
-		inet_wait_for_connect(sk);
+		/* Error code is set above */
+		if (!timeo || !inet_wait_for_connect(sk, timeo))
+			goto out;
+
 		err = -ERESTARTSYS;
 		if (signal_pending(current))
 			goto out;
 	}
 
-	if (sk->err && !tcp_established(sk->state))
-		goto sock_error; 
+	/* Connection was closed by RST, timeout, ICMP error
+	 * or another process disconnected us.
+	 */
+	if (sk->state == TCP_CLOSE)
+		goto sock_error;
+
+	/* sk->err may be not zero now, if RECVERR was ordered by user
+	 * and error was received after socket entered established state.
+	 * Hence, it is handled normally after connect() return successfully.
+	 */
+
 	sock->state = SS_CONNECTED;
 	err = 0;
 out:
@@ -647,11 +630,9 @@
 sock_error:
 	err = sock_error(sk) ? : -ECONNABORTED;
 	sock->state = SS_UNCONNECTED;
-	if (sk->prot->disconnect(sk, O_NONBLOCK))
+	if (sk->prot->disconnect(sk, flags))
 		sock->state = SS_DISCONNECTING;
-	release_sock(sk);
-
-	return err;
+	goto out;
 }
 
 /*
@@ -671,11 +652,7 @@
 
 	BUG_TRAP((1<<sk2->state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_CLOSE));
 
-	write_lock_irq(&sk2->callback_lock);
-	sk2->sleep = &newsock->wait;
-	newsock->sk = sk2;
-	sk2->socket = newsock;
-	write_unlock_irq(&sk2->callback_lock);
+	sock_graft(sk2, newsock);
 
 	newsock->state = SS_CONNECTED;
 	release_sock(sk2);
@@ -749,7 +726,7 @@
 int inet_shutdown(struct socket *sock, int how)
 {
 	struct sock *sk = sock->sk;
-	int err;
+	int err = 0;
 
 	/* This should really check to make sure
 	 * the socket is a TCP socket. (WHY AC...)
@@ -759,35 +736,45 @@
 		       2->3 */
 	if ((how & ~SHUTDOWN_MASK) || how==0)	/* MAXINT->0 */
 		return -EINVAL;
-	if (!sk)
-		return -ENOTCONN;
 
 	lock_sock(sk);
-	if (sock->state == SS_CONNECTING && tcp_established(sk->state))
-		sock->state = SS_CONNECTED;
-	err = -ENOTCONN;
-	if (!tcp_connected(sk->state))
-		goto out;
-	sk->shutdown |= how;
-	if (sk->prot->shutdown)
-		sk->prot->shutdown(sk, how);
+	if (sock->state == SS_CONNECTING) {
+		if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE))
+			sock->state = SS_DISCONNECTING;
+		else
+			sock->state = SS_CONNECTED;
+	}
+
+	switch (sk->state) {
+	default:	
+		sk->shutdown |= how;
+		if (sk->prot->shutdown)
+			sk->prot->shutdown(sk, how);
+		break;
+	case TCP_CLOSE:
+		err = -ENOTCONN;
+		break;
+
+	/* Remaining two branches are temporary solution for missing
+	 * close() in multithreaded environment. It is _not_ a good idea,
+	 * but we have no choice until close() is repaired at VFS level.
+	 */
+	case TCP_LISTEN:
+		if (!(how & RCV_SHUTDOWN))
+			break;
+		/* Fall through */
+	case TCP_SYN_SENT:
+		err = sk->prot->disconnect(sk, O_NONBLOCK);
+		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+		break;
+	}
+
 	/* Wake up anyone sleeping in poll. */
 	sk->state_change(sk);
-	err = 0;
-out:
 	release_sock(sk);
 	return err;
 }
 
-unsigned int inet_poll(struct file * file, struct socket *sock, poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-
-	if (sk->prot->poll == NULL)
-		return(0);
-	return sk->prot->poll(file, sock, wait);
-}
-
 /*
  *	ioctl() calls you can issue on an INET socket. Most of these are
  *	device configuration and stuff and very rarely used. Some ioctls
@@ -909,7 +896,7 @@
 	sock_no_socketpair,
 	inet_accept,
 	inet_getname, 
-	inet_poll,
+	tcp_poll,
 	inet_ioctl,
 	inet_listen,
 	inet_shutdown,

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)