patch-2.3.43 linux/net/sunrpc/svcsock.c

Next file: linux/net/sunrpc/sysctl.c
Previous file: linux/net/sunrpc/svc.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.42/linux/net/sunrpc/svcsock.c linux/net/sunrpc/svcsock.c
@@ -33,15 +33,20 @@
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/ip.h>
-#if LINUX_VERSION_CODE >= 0x020100
 #include <asm/uaccess.h>
-#endif
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/stats.h>
 
+/* SMP locking strategy:
+ *
+ * 	svc_sock->sk_lock and svc_serv->sv_lock protect their
+ *	respective structures.
+ *
+ *	Antideadlock ordering is sk_lock --> sv_lock.
+ */
 
 #define RPCDBG_FACILITY	RPCDBG_SVCSOCK
 
@@ -54,20 +59,22 @@
 
 
 /*
- * Queue up an idle server thread.
+ * Queue up an idle server thread.  Must have serv->sv_lock held.
  */
 static inline void
 svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp)
 {
+	BUG_TRAP(spin_is_locked(&serv->sv_lock));
 	rpc_append_list(&serv->sv_threads, rqstp);
 }
 
 /*
- * Dequeue an nfsd thread.
+ * Dequeue an nfsd thread.  Must have serv->sv_lock held.
  */
 static inline void
 svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp)
 {
+	BUG_TRAP(spin_is_locked(&serv->sv_lock));
 	rpc_remove_list(&serv->sv_threads, rqstp);
 }
 
@@ -90,8 +97,8 @@
 /*
  * Queue up a socket with data pending. If there are idle nfsd
  * processes, wake 'em up.
- * When calling this function, you should make sure it can't be interrupted
- * by the network bottom half.
+ *
+ * This must be called with svsk->sk_lock held.
  */
 static void
 svc_sock_enqueue(struct svc_sock *svsk)
@@ -99,6 +106,11 @@
 	struct svc_serv	*serv = svsk->sk_server;
 	struct svc_rqst	*rqstp;
 
+	BUG_TRAP(spin_is_locked(&svsk->sk_lock));
+
+	/* NOTE: Local BH is already disabled by our caller. */
+	spin_lock(&serv->sv_lock);
+
 	if (serv->sv_threads && serv->sv_sockets)
 		printk(KERN_ERR
 			"svc_sock_enqueue: threads and sockets both waiting??\n");
@@ -106,7 +118,7 @@
 	if (svsk->sk_busy) {
 		/* Don't enqueue socket while daemon is receiving */
 		dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
-		return;
+		goto out_unlock;
 	}
 
 	/* Mark socket as busy. It will remain in this state until the
@@ -131,20 +143,23 @@
 		rpc_append_list(&serv->sv_sockets, svsk);
 		svsk->sk_qued = 1;
 	}
+
+out_unlock:
+	spin_unlock(&serv->sv_lock);
 }
 
 /*
- * Dequeue the first socket.
+ * Dequeue the first socket.  Must be called with the serv->sv_lock held.
  */
 static inline struct svc_sock *
 svc_sock_dequeue(struct svc_serv *serv)
 {
 	struct svc_sock	*svsk;
 
-	start_bh_atomic();
+	BUG_TRAP(spin_is_locked(&serv->sv_lock));
+
 	if ((svsk = serv->sv_sockets) != NULL)
 		rpc_remove_list(&serv->sv_sockets, svsk);
-	end_bh_atomic();
 
 	if (svsk) {
 		dprintk("svc: socket %p dequeued, inuse=%d\n",
@@ -162,7 +177,7 @@
 static inline void
 svc_sock_received(struct svc_sock *svsk, int count)
 {
-	start_bh_atomic();
+	spin_lock_bh(&svsk->sk_lock);
 	if ((svsk->sk_data -= count) < 0) {
 		printk(KERN_NOTICE "svc: sk_data negative!\n");
 		svsk->sk_data = 0;
@@ -174,7 +189,7 @@
 						svsk->sk_sk);
 		svc_sock_enqueue(svsk);
 	}
-	end_bh_atomic();
+	spin_unlock_bh(&svsk->sk_lock);
 }
 
 /*
@@ -183,7 +198,7 @@
 static inline void
 svc_sock_accepted(struct svc_sock *svsk)
 {
-	start_bh_atomic();
+	spin_lock_bh(&svsk->sk_lock);
         svsk->sk_busy = 0;
         svsk->sk_conn--;
         if (svsk->sk_conn || svsk->sk_data || svsk->sk_close) {
@@ -191,7 +206,7 @@
 						svsk->sk_sk);
                 svc_sock_enqueue(svsk);
         }
-	end_bh_atomic();
+	spin_unlock_bh(&svsk->sk_lock);
 }
 
 /*
@@ -221,6 +236,7 @@
 {
 	struct svc_rqst	*rqstp;
 
+	spin_lock_bh(&serv->sv_lock);
 	if ((rqstp = serv->sv_threads) != NULL) {
 		dprintk("svc: daemon %p woken up.\n", rqstp);
 		/*
@@ -229,6 +245,7 @@
 		 */
 		wake_up(&rqstp->rq_wait);
 	}
+	spin_unlock_bh(&serv->sv_lock);
 }
 
 /*
@@ -252,24 +269,14 @@
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
 
-#if LINUX_VERSION_CODE >= 0x020100
 	msg.msg_flags	= MSG_DONTWAIT;
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
 	len = sock_sendmsg(sock, &msg, buflen);
 	set_fs(oldfs);
-#else
-	msg.msg_flags	= 0;
-
-	oldfs = get_fs(); set_fs(KERNEL_DS);
-	len = sock->ops->sendmsg(sock, &msg, buflen, 1, 0);
-	set_fs(oldfs);
-#endif
 
-	dprintk("svc: socket %p sendto([%p %lu... ], %d, %d) = %d\n",
-		rqstp->rq_sock, iov[0].iov_base,
-		(unsigned long) iov[0].iov_len, nr,
-		buflen, len);
+	dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n",
+			rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len);
 
 	return len;
 }
@@ -312,22 +319,14 @@
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
 
-#if LINUX_VERSION_CODE >= 0x020100
 	msg.msg_flags	= MSG_DONTWAIT;
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
 	len = sock_recvmsg(sock, &msg, buflen, MSG_DONTWAIT);
 	set_fs(oldfs);
-#else
-	msg.msg_flags	= 0;
 
-	oldfs = get_fs(); set_fs(KERNEL_DS);
-	len = sock->ops->recvmsg(sock, &msg, buflen, 0, 1, &rqstp->rq_addrlen);
-	set_fs(oldfs);
-#endif
-
-	dprintk("svc: socket %p recvfrom(%p, %lu) = %d\n", rqstp->rq_sock,
-		iov[0].iov_base, (unsigned long) iov[0].iov_len, len);
+	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
+		rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
 
 	return len;
 }
@@ -344,8 +343,10 @@
 		return;
 	dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
 		svsk, sk, count, svsk->sk_busy);
+	spin_lock_bh(&svsk->sk_lock);
 	svsk->sk_data = 1;
 	svc_sock_enqueue(svsk);
+	spin_unlock_bh(&svsk->sk_lock);
 }
 
 /*
@@ -385,11 +386,7 @@
 	/* Get sender address */
 	rqstp->rq_addr.sin_family = AF_INET;
 	rqstp->rq_addr.sin_port = skb->h.uh->source;
-#if LINUX_VERSION_CODE >= 0x020100
 	rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
-#else
-	rqstp->rq_addr.sin_addr.s_addr = skb->saddr;
-#endif
 
 	if (serv->sv_stats)
 		serv->sv_stats->netudpcnt++;
@@ -456,8 +453,10 @@
 		printk("svc: socket %p: no user data\n", sk);
 		return;
 	}
+	spin_lock_bh(&svsk->sk_lock);
 	svsk->sk_conn++;
 	svc_sock_enqueue(svsk);
+	spin_unlock_bh(&svsk->sk_lock);
 }
 
 /*
@@ -475,8 +474,10 @@
 		printk("svc: socket %p: no user data\n", sk);
 		return;
 	}
+	spin_lock_bh(&svsk->sk_lock);
 	svsk->sk_close = 1;
 	svc_sock_enqueue(svsk);
+	spin_unlock_bh(&svsk->sk_lock);
 }
 
 static void
@@ -494,8 +495,10 @@
 			sk, sk->user_data);
 	if (!(svsk = (struct svc_sock *)(sk->user_data)))
 		return;
+	spin_lock_bh(&svsk->sk_lock);
 	svsk->sk_data++;
 	svc_sock_enqueue(svsk);
+	spin_unlock_bh(&svsk->sk_lock);
 }
 
 /*
@@ -562,9 +565,11 @@
 	/* Precharge. Data may have arrived on the socket before we
 	 * installed the data_ready callback. 
 	 */
+	spin_lock_bh(&newsvsk->sk_lock);
 	newsvsk->sk_data = 1;
 	newsvsk->sk_temp = 1;
 	svc_sock_enqueue(newsvsk);
+	spin_unlock_bh(&newsvsk->sk_lock);
 
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
@@ -758,7 +763,7 @@
 	if (signalled())
 		return -EINTR;
 
-	start_bh_atomic();
+	spin_lock_bh(&serv->sv_lock);
 	if ((svsk = svc_sock_dequeue(serv)) != NULL) {
 		rqstp->rq_sock = svsk;
 		svsk->sk_inuse++;
@@ -772,20 +777,21 @@
 		 */
 		current->state = TASK_INTERRUPTIBLE;
 		add_wait_queue(&rqstp->rq_wait, &wait);
-		end_bh_atomic();
+		spin_unlock_bh(&serv->sv_lock);
+
 		schedule_timeout(timeout);
 
+		spin_lock_bh(&serv->sv_lock);
 		remove_wait_queue(&rqstp->rq_wait, &wait);
 
-		start_bh_atomic();
 		if (!(svsk = rqstp->rq_sock)) {
 			svc_serv_dequeue(serv, rqstp);
-			end_bh_atomic();
+			spin_unlock_bh(&serv->sv_lock);
 			dprintk("svc: server %p, no data yet\n", rqstp);
 			return signalled()? -EINTR : -EAGAIN;
 		}
 	}
-	end_bh_atomic();
+	spin_unlock_bh(&serv->sv_lock);
 
 	dprintk("svc: server %p, socket %p, inuse=%d\n",
 		 rqstp, svsk, svsk->sk_inuse);
@@ -867,17 +873,14 @@
 	}
 	memset(svsk, 0, sizeof(*svsk));
 
-#if LINUX_VERSION_CODE >= 0x020100
 	inet = sock->sk;
-#else
-	inet = (struct sock *) sock->data;
-#endif
 	inet->user_data = svsk;
 	svsk->sk_sock = sock;
 	svsk->sk_sk = inet;
 	svsk->sk_ostate = inet->state_change;
 	svsk->sk_odata = inet->data_ready;
 	svsk->sk_server = serv;
+	spin_lock_init(&svsk->sk_lock);
 
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)
@@ -897,8 +900,10 @@
 		return NULL;
 	}
 
+	spin_lock_bh(&serv->sv_lock);
 	svsk->sk_list = serv->sv_allsocks;
 	serv->sv_allsocks = svsk;
+	spin_unlock_bh(&serv->sv_lock);
 
 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
 				svsk, svsk->sk_sk);
@@ -971,16 +976,22 @@
 	sk->state_change = svsk->sk_ostate;
 	sk->data_ready = svsk->sk_odata;
 
+	spin_lock_bh(&serv->sv_lock);
+
 	for (rsk = &serv->sv_allsocks; *rsk; rsk = &(*rsk)->sk_list) {
 		if (*rsk == svsk)
 			break;
 	}
-	if (!*rsk)
+	if (!*rsk) {
+		spin_unlock_bh(&serv->sv_lock);
 		return;
+	}
 	*rsk = svsk->sk_list;
-
 	if (svsk->sk_qued)
 		rpc_remove_list(&serv->sv_sockets, svsk);
+
+	spin_unlock_bh(&serv->sv_lock);
+
 	svsk->sk_dead = 1;
 
 	if (!svsk->sk_inuse) {

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)