patch-2.1.8 linux/net/ipv4/tcp_ipv4.c
Next file: linux/net/ipv4/tcp_output.c
Previous file: linux/net/ipv4/tcp_input.c
Back to the patch index
Back to the overall index
- Lines: 1351
- Date:
Sun Nov 3 11:04:45 1996
- Orig file:
v2.1.7/linux/net/ipv4/tcp_ipv4.c
- Orig date:
Thu Jan 1 02:00:00 1970
diff -u --recursive --new-file v2.1.7/linux/net/ipv4/tcp_ipv4.c linux/net/ipv4/tcp_ipv4.c
@@ -0,0 +1,1350 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Implementation of the Transmission Control Protocol(TCP).
+ *
+ *
+ * IPv4 specific functions
+ *
+ *
+ * code split from:
+ * linux/ipv4/tcp.c
+ * linux/ipv4/tcp_input.c
+ * linux/ipv4/tcp_output.c
+ *
+ * See tcp.c for author information
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/random.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+
+#include <asm/segment.h>
+
+static void tcp_v4_send_reset(unsigned long saddr, unsigned long daddr,
+ struct tcphdr *th, struct proto *prot,
+ struct options *opt,
+ struct device *dev, int tos, int ttl);
+
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
+ struct sk_buff *skb);
+
+/*
+ * Cached last hit socket
+ */
+
+static volatile unsigned long th_cache_saddr, th_cache_daddr;
+static volatile unsigned short th_cache_dport, th_cache_sport;
+static volatile struct sock *th_cache_sk;
+
+void tcp_cache_zap(void)
+{
+ th_cache_sk=NULL;
+}
+
+/*
+ * Find the socket, using the last hit cache if applicable.
+ * The cache is not quite right...
+ */
+
+static inline struct sock * get_tcp_sock(u32 saddr, u16 sport,
+ u32 daddr, u16 dport,
+ u32 paddr, u16 pport)
+{
+ struct sock * sk;
+
+ sk = (struct sock *) th_cache_sk;
+ if (!sk || saddr != th_cache_saddr || daddr != th_cache_daddr ||
+ sport != th_cache_sport || dport != th_cache_dport) {
+ sk = get_sock(&tcp_prot, dport, saddr, sport, daddr,
+ paddr, pport);
+ if (sk) {
+ th_cache_saddr=saddr;
+ th_cache_daddr=daddr;
+ th_cache_dport=dport;
+ th_cache_sport=sport;
+ th_cache_sk=sk;
+ }
+ }
+ return sk;
+}
+
+static __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+ return secure_tcp_sequence_number(sk->saddr, sk->daddr,
+ skb->h.th->dest,
+ skb->h.th->source);
+}
+
+/*
+ * From tcp.c
+ */
+
+/*
+ * Check that a TCP address is unique, don't allow multiple
+ * connects to/from the same address
+ */
+
+static int tcp_unique_address(u32 saddr, u16 snum, u32 daddr, u16 dnum)
+{
+ int retval = 1;
+ struct sock * sk;
+
+ /* Make sure we are allowed to connect here. */
+ cli();
+ for (sk = tcp_prot.sock_array[snum & (SOCK_ARRAY_SIZE -1)];
+ sk != NULL; sk = sk->next)
+ {
+ /* hash collision? */
+ if (sk->num != snum)
+ continue;
+ if (sk->saddr != saddr)
+ continue;
+ if (sk->daddr != daddr)
+ continue;
+ if (sk->dummy_th.dest != dnum)
+ continue;
+ retval = 0;
+ break;
+ }
+ sti();
+ return retval;
+}
+
+/*
+ * This will initiate an outgoing connection.
+ */
+
+int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ struct sk_buff *buff;
+ struct sk_buff *skb1;
+ struct device *dev=NULL;
+ unsigned char *ptr;
+ int tmp;
+ int atype;
+ struct tcphdr *t1;
+ struct rtable *rt;
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+
+ if (sk->state != TCP_CLOSE)
+ return(-EISCONN);
+
+ /*
+ * Don't allow a double connect.
+ */
+
+ if(sk->daddr)
+ return -EINVAL;
+
+ if (addr_len < sizeof(struct sockaddr_in))
+ return(-EINVAL);
+
+ if (usin->sin_family && usin->sin_family != AF_INET)
+ return(-EAFNOSUPPORT);
+
+ /*
+ * connect() to INADDR_ANY means loopback (BSD'ism).
+ */
+
+ if (usin->sin_addr.s_addr==INADDR_ANY)
+ usin->sin_addr.s_addr=ip_my_addr();
+
+ /*
+ * Don't want a TCP connection going to a broadcast address
+ */
+
+ if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST
+ || atype==IS_MULTICAST)
+ {
+ return -ENETUNREACH;
+ }
+
+ if (!tcp_unique_address(sk->saddr, sk->num, usin->sin_addr.s_addr,
+ usin->sin_port))
+ {
+ return -EADDRNOTAVAIL;
+ }
+
+ lock_sock(sk);
+ sk->daddr = usin->sin_addr.s_addr;
+ sk->dummy_th.dest = usin->sin_port;
+ sk->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
+ sk->dummy_th.source,
+ usin->sin_port);
+
+ tp->snd_wnd = 0;
+ tp->snd_wl1 = 0;
+ tp->snd_wl2 = sk->write_seq;
+ tp->snd_una = sk->write_seq;
+
+ tp->rcv_nxt = 0;
+
+ sk->err = 0;
+
+ buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
+ if (buff == NULL)
+ {
+ release_sock(sk);
+ return(-ENOMEM);
+ }
+
+ buff->sk = sk;
+ buff->free = 0;
+ buff->localroute = sk->localroute;
+
+ /*
+ * Put in the IP header and routing stuff.
+ */
+
+ tmp = ip_build_header(buff, sk->saddr, sk->daddr, &dev,
+ IPPROTO_TCP, NULL, MAX_SYN_SIZE, sk->ip_tos,
+ sk->ip_ttl,&sk->ip_route_cache);
+
+ if (tmp < 0)
+ {
+ sock_wfree(sk, buff);
+ release_sock(sk);
+ return(-ENETUNREACH);
+ }
+ if ((rt = sk->ip_route_cache) != NULL && !sk->saddr)
+ sk->saddr = rt->rt_src;
+ sk->rcv_saddr = sk->saddr;
+
+ t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
+ buff->h.th = t1;
+
+ memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
+ buff->seq = sk->write_seq++;
+ t1->seq = htonl(buff->seq);
+ tp->snd_nxt = sk->write_seq;
+ buff->end_seq = sk->write_seq;
+ t1->ack = 0;
+ t1->window = htons(512);
+ t1->syn = 1;
+ t1->doff = 6;
+
+ /* use 512 or whatever user asked for */
+
+ if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
+ sk->window_clamp=rt->rt_window;
+ else
+ sk->window_clamp=0;
+
+
+ if (rt)
+ sk->mtu = rt->rt_mtu;
+ else
+ sk->mtu = dev->mtu;
+
+#ifdef CONFIG_SKIP
+
+ /*
+ * SKIP devices set their MTU to 65535. This is so they can take packets
+ * unfragmented to security process then fragment. They could lie to the
+ * TCP layer about a suitable MTU, but its easier to let skip sort it out
+ * simply because the final package we want unfragmented is going to be
+ *
+ * [IPHDR][IPSP][Security data][Modified TCP data][Security data]
+ */
+
+ if(skip_pick_mtu!=NULL) /* If SKIP is loaded.. */
+ sk->mtu=skip_pick_mtu(sk->mtu,dev);
+#endif
+
+ if(sk->mtu < 64)
+ sk->mtu = 64; /* Sanity limit */
+
+ if (sk->user_mss)
+ sk->mss = sk->user_mss;
+ else
+ sk->mss = (sk->mtu - sizeof(struct iphdr) -
+ sizeof(struct tcphdr));
+
+ /*
+ * Put in the TCP options to say MSS.
+ */
+
+ ptr = skb_put(buff,4);
+ ptr[0] = TCPOPT_MSS;
+ ptr[1] = TCPOLEN_MSS;
+ ptr[2] = (sk->mss) >> 8;
+ ptr[3] = (sk->mss) & 0xff;
+ buff->csum = csum_partial(ptr, 4, 0);
+ tcp_v4_send_check(sk, t1, sizeof(struct tcphdr) + 4, buff);
+
+ /*
+ * This must go first otherwise a really quick response
+ * will get reset.
+ */
+
+ tcp_cache_zap();
+ tcp_set_state(sk,TCP_SYN_SENT);
+
+ if(rt && (rt->rt_flags&RTF_IRTT))
+ tp->rto = rt->rt_irtt;
+ else
+ tp->rto = TCP_TIMEOUT_INIT;
+
+ tcp_init_xmit_timers(sk);
+
+ /* Now works the right way instead of a hacked initial setting */
+ sk->retransmits = 0;
+
+ skb_queue_tail(&sk->write_queue, buff);
+
+ sk->packets_out++;
+ buff->when = jiffies;
+
+ skb1 = skb_clone(buff, GFP_KERNEL);
+ sk->wmem_alloc += skb1->truesize;
+ ip_queue_xmit(sk, dev, skb1, 1);
+
+ /* Timer for repeating the SYN until an answer */
+ tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+ tcp_statistics.TcpActiveOpens++;
+ tcp_statistics.TcpOutSegs++;
+
+ release_sock(sk);
+ return(0);
+}
+
+static int tcp_v4_sendmsg(struct sock *sk, struct msghdr *msg,
+ int len, int nonblock, int flags)
+{
+ int retval = -EINVAL;
+
+ /*
+ * Do sanity checking for sendmsg/sendto/send
+ */
+
+ if (flags & ~(MSG_OOB|MSG_DONTROUTE))
+ goto out;
+ if (msg->msg_name) {
+ struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name;
+
+ if (msg->msg_namelen < sizeof(*addr))
+ goto out;
+ if (addr->sin_family && addr->sin_family != AF_INET)
+ goto out;
+ retval = -ENOTCONN;
+ if(sk->state == TCP_CLOSE)
+ goto out;
+ retval = -EISCONN;
+ if (addr->sin_port != sk->dummy_th.dest)
+ goto out;
+ if (addr->sin_addr.s_addr != sk->daddr)
+ goto out;
+ }
+
+ lock_sock(sk);
+ retval = tcp_do_sendmsg(sk, msg->msg_iovlen, msg->msg_iov,
+ len, nonblock, flags);
+
+ release_sock(sk);
+
+out:
+ return retval;
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition. If err < 0 then the socket should
+ * be closed and the error returned to the user. If err > 0
+ * it's just the icmp type << 8 | icmp code. After adjustment
+ * header points to the first 8 bytes of the tcp header. We need
+ * to find the appropriate port.
+ */
+
+void tcp_v4_err(int type, int code, unsigned char *header, __u32 info,
+ __u32 daddr, __u32 saddr, struct inet_protocol *protocol)
+{
+ struct tcphdr *th = (struct tcphdr *)header;
+ struct tcp_opt *tp;
+ struct sock *sk;
+
+ th =(struct tcphdr *)header;
+ sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr, 0, 0);
+
+ if (sk == NULL)
+ return;
+
+ if (type == ICMP_SOURCE_QUENCH)
+ {
+ /*
+ * FIXME:
+ * Follow BSD for now and just reduce cong_window to 1 again.
+ * It is possible that we just want to reduce the
+ * window by 1/2, or that we want to reduce ssthresh by 1/2
+ * here as well.
+ */
+
+ tp = &sk->tp_pinfo.af_tcp;
+
+ sk->cong_window = 1;
+ tp->high_seq = tp->snd_nxt;
+
+ return;
+ }
+
+ if (type == ICMP_PARAMETERPROB)
+ {
+ sk->err=EPROTO;
+ sk->error_report(sk);
+ }
+
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+ {
+ struct rtable * rt;
+ /*
+ * Ugly trick to pass MTU to protocol layer.
+ * Really we should add argument "info" to error handler.
+ */
+ unsigned short new_mtu = info;
+
+ if ((rt = sk->ip_route_cache) != NULL)
+ if (rt->rt_mtu > new_mtu)
+ rt->rt_mtu = new_mtu;
+
+ if ((sk->mtu > new_mtu) &&
+ (new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr)))
+ {
+ sk->mss = (new_mtu - sizeof(struct iphdr)
+ - sizeof(struct tcphdr));
+ }
+
+ return;
+ }
+#endif
+
+ /*
+ * If we've already connected we will keep trying
+ * until we time out, or the user gives up.
+ */
+
+ if (code <= NR_ICMP_UNREACH)
+ {
+ if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
+ {
+ sk->err = icmp_err_convert[code].errno;
+ if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
+ {
+ tcp_statistics.TcpAttemptFails++;
+ tcp_set_state(sk,TCP_CLOSE);
+ sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
+ }
+ }
+ else /* Only an error on timeout */
+ sk->err_soft = icmp_err_convert[code].errno;
+ }
+}
+
+/*
+ * This routine computes a TCP checksum.
+ *
+ * Modified January 1995 from a go-faster DOS routine by
+ * Jorge Cwik <jorge@laser.satlink.net>
+ */
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
+ struct sk_buff *skb)
+{
+ __u32 saddr = sk->saddr;
+ __u32 daddr = sk->daddr;
+#ifdef DEBUG_TCP_CHECK
+ u16 check;
+#endif
+ th->check = 0;
+ th->check = tcp_v4_check(th, len, saddr, daddr,
+ csum_partial((char *)th, sizeof(*th),
+ skb->csum));
+
+#ifdef DEBUG_TCP_CHECK
+ check = th->check;
+ th->check = 0;
+ th->check = tcp_v4_check(th, len, saddr, daddr,
+ csum_partial((char *)th,len,0));
+ if (check != th->check) {
+ static int count = 0;
+ if (++count < 10) {
+ printk("Checksum %x (%x) from %p\n", th->check, check,
+ __builtin_return_address(0));
+ printk("TCP=<off:%d a:%d s:%d f:%d> len=%d\n", th->doff*4, th->ack, th->syn, th->fin, len);
+ }
+ }
+#endif
+}
+
+/*
+ * This routine will send an RST to the other tcp.
+ */
+
+static void tcp_v4_send_reset(unsigned long saddr, unsigned long daddr,
+ struct tcphdr *th, struct proto *prot,
+ struct options *opt,
+ struct device *dev, int tos, int ttl)
+{
+ struct sk_buff *buff;
+ struct tcphdr *t1;
+ int tmp;
+ struct device *ndev=NULL;
+
+ /*
+ * Cannot reset a reset (Think about it).
+ */
+
+ if(th->rst)
+ return;
+
+ /*
+ * We need to grab some memory, and put together an RST,
+ * and then put it into the queue to be sent.
+ */
+
+ buff = alloc_skb(MAX_RESET_SIZE, GFP_ATOMIC);
+ if (buff == NULL)
+ return;
+
+ buff->sk = NULL;
+ buff->dev = dev;
+ buff->localroute = 0;
+
+
+ /*
+ * Put in the IP header and routing stuff.
+ */
+
+ tmp = ip_build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
+ sizeof(struct tcphdr),tos,ttl,NULL);
+ if (tmp < 0)
+ {
+ buff->free = 1;
+ sock_wfree(NULL, buff);
+ return;
+ }
+
+ t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+ memset(t1, 0, sizeof(*t1));
+
+ /*
+ * Swap the send and the receive.
+ */
+
+ t1->dest = th->source;
+ t1->source = th->dest;
+ t1->doff = sizeof(*t1)/4;
+ t1->rst = 1;
+
+ if(th->ack)
+ {
+ t1->seq = th->ack_seq;
+ }
+ else
+ {
+ t1->ack = 1;
+ if(!th->syn)
+ t1->ack_seq = th->seq;
+ else
+ t1->ack_seq = htonl(ntohl(th->seq)+1);
+ }
+
+
+ buff->csum = csum_partial((u8 *) t1, sizeof(*t1), 0);
+ t1->check = tcp_v4_check(t1, sizeof(*t1), saddr, daddr, buff->csum);
+
+ ip_queue_xmit(NULL, ndev, buff, 1);
+ tcp_statistics.TcpOutSegs++;
+}
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+/*
+ * Check whether a received TCP packet might be for one of our
+ * connections.
+ */
+
+int tcp_chkaddr(struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->h.iph;
+ struct tcphdr *th = (struct tcphdr *)(skb->h.raw + iph->ihl*4);
+ struct sock *sk;
+
+ sk = get_sock(&tcp_prot, th->dest, iph->saddr, th->source, iph->daddr,
+ 0, 0);
+
+ if (!sk)
+ return 0;
+
+ /* 0 means accept all LOCAL addresses here, not all the world... */
+
+ if (sk->rcv_saddr == 0)
+ return 0;
+
+ return 1;
+}
+#endif
+
+static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
+{
+ struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req;
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+ struct sk_buff * skb;
+ struct device *dev = NULL;
+ struct rtable *rt = NULL;
+ struct tcphdr *th;
+ unsigned char *ptr;
+ int mss;
+ int tmp;
+
+ skb = sock_wmalloc(sk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
+
+ if (skb == NULL)
+ {
+ return;
+ }
+
+ tmp = ip_build_header(skb, af_req->loc_addr, af_req->rmt_addr, &dev,
+ IPPROTO_TCP, af_req->opt, skb->truesize,
+ sk->ip_tos, sk->ip_ttl, &rt);
+
+ if (tmp < 0)
+ {
+ skb->free = 1;
+ kfree_skb(skb, FREE_WRITE);
+ return;
+ }
+
+ skb->dev = dev;
+
+ if (rt)
+ mss = rt->rt_mtu;
+ else
+ mss = dev->mtu;
+
+ mss -= sizeof(struct iphdr) + sizeof(struct tcphdr);
+
+ if (sk->user_mss)
+ mss = min(mss, sk->user_mss);
+
+ ip_rt_put(rt);
+
+ th =(struct tcphdr *) skb_put(skb, sizeof(struct tcphdr));
+ skb->h.th = th;
+ memset(th, 0, sizeof(struct tcphdr));
+
+ th->syn = 1;
+ th->ack = 1;
+
+ th->source = sk->dummy_th.source;
+ th->dest = req->rmt_port;
+
+ skb->seq = req->snt_isn;
+ skb->end_seq = skb->seq + 1;
+
+ th->seq = ntohl(skb->seq);
+ th->ack_seq = htonl(req->rcv_isn + 1);
+ th->doff = sizeof(*th)/4 + 1;
+
+ th->window = ntohs(tp->rcv_wnd);
+
+ ptr = skb_put(skb, TCPOLEN_MSS);
+ ptr[0] = TCPOPT_MSS;
+ ptr[1] = TCPOLEN_MSS;
+ ptr[2] = (mss >> 8) & 0xff;
+ ptr[3] = mss & 0xff;
+ skb->csum = csum_partial(ptr, TCPOLEN_MSS, 0);
+
+ th->check = tcp_v4_check(th, sizeof(*th) + TCPOLEN_MSS, af_req->loc_addr,
+ af_req->rmt_addr,
+ csum_partial((char *)th, sizeof(*th), skb->csum));
+
+ ip_queue_xmit(sk, dev, skb, 1);
+ tcp_statistics.TcpOutSegs++;
+
+}
+
+static void tcp_v4_or_free(struct open_request *req)
+{
+ struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req;
+
+ if (af_req->req.sk)
+ return;
+
+ if (af_req->opt)
+ {
+ kfree_s(af_req->opt, sizeof(struct options) + af_req->opt->optlen);
+ }
+}
+
+static struct or_calltable or_ipv4 = {
+ tcp_v4_send_synack,
+ tcp_v4_or_free
+};
+
+static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr)
+{
+ return 0;
+}
+
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn)
+{
+ struct options *opt = (struct options *) ptr;
+ struct tcp_v4_open_req *af_req;
+ struct open_request *req;
+ struct tcphdr *th = skb->h.th;
+ __u32 saddr = skb->saddr;
+ __u32 daddr = skb->daddr;
+
+ /* If the socket is dead, don't accept the connection. */
+ if (sk->dead)
+ {
+ if(sk->debug)
+ {
+ printk("Reset on %p: Connect on dead socket.\n",sk);
+ }
+ tcp_statistics.TcpAttemptFails++;
+ return -ENOTCONN;
+ }
+
+ if (sk->ack_backlog >= sk->max_ack_backlog ||
+ tcp_v4_syn_filter(sk, skb, saddr))
+ {
+ printk(KERN_DEBUG "droping syn ack:%d max:%d\n",
+ sk->ack_backlog, sk->max_ack_backlog);
+#ifdef CONFIG_IP_TCPSF
+ tcp_v4_random_drop(sk);
+#endif
+ tcp_statistics.TcpAttemptFails++;
+ goto exit;
+ }
+
+
+ af_req = kmalloc(sizeof(struct tcp_v4_open_req), GFP_ATOMIC);
+
+ if (af_req == NULL)
+ {
+ tcp_statistics.TcpAttemptFails++;
+ goto exit;
+ }
+
+ sk->ack_backlog++;
+ req = (struct open_request *) af_req;
+
+ memset(af_req, 0, sizeof(struct tcp_v4_open_req));
+
+ req->rcv_isn = skb->seq;
+ req->snt_isn = isn;
+
+ /* mss */
+ req->mss = tcp_parse_options(th);
+
+ if (!req->mss)
+ {
+ req->mss = 536;
+ }
+
+ req->rmt_port = th->source;
+
+ af_req->loc_addr = daddr;
+ af_req->rmt_addr = saddr;
+
+ /*
+ * options
+ */
+
+ if (opt && opt->optlen)
+ {
+ af_req->opt = (struct options*) kmalloc(sizeof(struct options) +
+ opt->optlen, GFP_ATOMIC);
+ if (af_req->opt)
+ {
+ if (ip_options_echo(af_req->opt, opt, skb->daddr,
+ skb->saddr, skb))
+ {
+ kfree_s(af_req->opt, sizeof(struct options) +
+ opt->optlen);
+ af_req->opt = NULL;
+ }
+ }
+ }
+
+ req->class = &or_ipv4;
+
+ tcp_v4_send_synack(sk, req);
+
+ req->expires = jiffies + TCP_TIMEOUT_INIT;
+ tcp_inc_slow_timer(TCP_SLT_SYNACK);
+ tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
+
+ sk->data_ready(sk, 0);
+
+ exit:
+ kfree_skb(skb, FREE_READ);
+ return 0;
+}
+
+struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+ struct open_request *req)
+{
+ struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req;
+ struct tcp_opt *newtp;
+ struct sock *newsk;
+ struct rtable *rt;
+ int snd_mss;
+
+ newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
+ if (newsk == NULL)
+ {
+ return NULL;
+ }
+
+ memcpy(newsk, sk, sizeof(*newsk));
+ newsk->opt = NULL;
+ newsk->ip_route_cache = NULL;
+ skb_queue_head_init(&newsk->write_queue);
+ skb_queue_head_init(&newsk->receive_queue);
+ skb_queue_head_init(&newsk->out_of_order_queue);
+
+ /*
+ * Unused
+ */
+
+ newsk->send_head = NULL;
+ newsk->send_tail = NULL;
+
+ newtp = &(newsk->tp_pinfo.af_tcp);
+ newtp->send_head = NULL;
+ newtp->retrans_head = NULL;
+
+ newtp->pending = 0;
+
+ skb_queue_head_init(&newsk->back_log);
+
+ newsk->prot->init(newsk);
+
+ newsk->cong_count = 0;
+ newsk->ssthresh = 0;
+ newtp->backoff = 0;
+ newsk->blog = 0;
+ newsk->intr = 0;
+ newsk->proc = 0;
+ newsk->done = 0;
+ newsk->partial = NULL;
+ newsk->pair = NULL;
+ newsk->wmem_alloc = 0;
+ newsk->rmem_alloc = 0;
+ newsk->localroute = sk->localroute;
+
+ newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
+
+ newsk->err = 0;
+ newsk->shutdown = 0;
+ newsk->ack_backlog = 0;
+
+ newsk->fin_seq = req->rcv_isn;
+ newsk->syn_seq = req->rcv_isn;
+ newsk->state = TCP_SYN_RECV;
+ newsk->timeout = 0;
+ newsk->ip_xmit_timeout = 0;
+
+ newsk->write_seq = req->snt_isn;
+
+ newtp->snd_wnd = ntohs(skb->h.th->window);
+ newsk->max_window = newtp->snd_wnd;
+ newtp->snd_wl1 = req->rcv_isn;
+ newtp->snd_wl2 = newsk->write_seq;
+ newtp->snd_una = newsk->write_seq++;
+ newtp->snd_nxt = newsk->write_seq;
+
+ newsk->urg_data = 0;
+ newsk->packets_out = 0;
+ newsk->retransmits = 0;
+ newsk->linger=0;
+ newsk->destroy = 0;
+ init_timer(&newsk->timer);
+ newsk->timer.data = (unsigned long) newsk;
+ newsk->timer.function = &net_timer;
+
+ tcp_init_xmit_timers(newsk);
+
+ newsk->dummy_th.source = sk->dummy_th.source;
+ newsk->dummy_th.dest = req->rmt_port;
+
+ newtp->rcv_nxt = req->rcv_isn + 1;
+ newtp->rcv_wup = req->rcv_isn + 1;
+ newsk->copied_seq = req->rcv_isn + 1;
+
+ newsk->socket = NULL;
+
+ newsk->daddr = af_req->rmt_addr;
+ newsk->saddr = af_req->loc_addr;
+ newsk->rcv_saddr = af_req->loc_addr;
+
+ /*
+ * options / mss / route_cache
+ */
+ newsk->opt = af_req->opt;
+ rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr :
+ newsk->saddr, 0);
+
+ newsk->ip_route_cache = rt;
+
+ if(rt != NULL && (rt->rt_flags&RTF_WINDOW))
+ newsk->window_clamp = rt->rt_window;
+ else
+ newsk->window_clamp = 0;
+
+ if (rt)
+ snd_mss = rt->rt_mtu;
+ else
+ snd_mss = skb->dev->mtu;
+
+ newsk->mtu = snd_mss;
+ /* sanity check */
+ if (newsk->mtu < 64)
+ {
+ newsk->mtu = 64;
+ }
+
+ snd_mss -= sizeof(struct iphdr) - sizeof(struct tcphdr);
+
+ if (sk->user_mss)
+ {
+ snd_mss = min(snd_mss, sk->user_mss);
+ }
+
+ newsk->mss = min(req->mss, snd_mss);
+
+ inet_put_sock(newsk->num, newsk);
+
+ tcp_cache_zap();
+
+ return newsk;
+}
+
+/*
+ * From tcp_input.c
+ */
+
+int tcp_v4_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
+ __u32 daddr, unsigned short len,
+ __u32 saddr, int redo, struct inet_protocol * protocol)
+{
+ struct tcphdr *th;
+ struct sock *sk;
+
+ /*
+ * "redo" is 1 if we have already seen this skb but couldn't
+ * use it at that time (the socket was locked). In that case
+ * we have already done a lot of the work (looked up the socket
+ * etc).
+ */
+
+ th = skb->h.th;
+
+ sk = skb->sk;
+
+ if (!redo)
+ {
+
+ if (skb->pkt_type!=PACKET_HOST)
+ goto discard_it;
+
+ /*
+ * Pull up the IP header.
+ */
+
+ skb_pull(skb, skb->h.raw-skb->data);
+
+ /*
+ * Try to use the device checksum if provided.
+ */
+
+ switch (skb->ip_summed)
+ {
+ case CHECKSUM_NONE:
+ skb->csum = csum_partial((char *)th, len, 0);
+ case CHECKSUM_HW:
+ if (tcp_v4_check(th,len,saddr,daddr,skb->csum))
+ goto discard_it;
+ default:
+ /* CHECKSUM_UNNECESSARY */
+ }
+
+ sk = get_tcp_sock(saddr, th->source, daddr, th->dest,
+ dev->pa_addr, skb->redirport);
+
+ if (!sk)
+ goto no_tcp_socket;
+
+ skb->sk = sk;
+ skb->seq = ntohl(th->seq);
+ skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
+ skb->ack_seq = ntohl(th->ack_seq);
+
+ skb->acked = 0;
+ skb->used = 0;
+ skb->free = 1;
+ skb->saddr = saddr;
+ skb->daddr = daddr;
+ }
+
+ /*
+ * We may need to add it to the backlog here.
+ */
+
+ if (sk->users)
+ {
+ __skb_queue_tail(&sk->back_log, skb);
+ return(0);
+ }
+
+ if (!sk->prot)
+ {
+ printk(KERN_DEBUG "tcp_rcv: sk->prot == NULL\n");
+ return(0);
+ }
+
+ atomic_add(skb->truesize, &sk->rmem_alloc);
+
+ if (sk->state == TCP_ESTABLISHED)
+ {
+ tcp_rcv_established(sk, skb, th, len);
+ return 0;
+ }
+
+ if (sk->state == TCP_LISTEN)
+ {
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct open_request *req;
+ struct tcp_v4_open_req *af_req;
+
+ /*
+ * assumption: the socket is not in use.
+ * as we checked the user count above and we're
+ * running from a soft interrupt.
+ */
+
+ req = tp->syn_wait_queue;
+ af_req = (struct tcp_v4_open_req *) req;
+
+ if (req)
+ {
+ do {
+ if (af_req->rmt_addr == saddr &&
+ af_req->loc_addr == daddr &&
+ req->rmt_port == th->source)
+ {
+ if (req->sk)
+ {
+ printk(KERN_DEBUG "bug: syn_recv socket "
+ "exists\n");
+ break;
+ }
+
+ /* match */
+
+ atomic_sub(skb->truesize, &sk->rmem_alloc);
+ sk = tp->af_specific->syn_recv_sock(sk, skb, req);
+
+ tcp_dec_slow_timer(TCP_SLT_SYNACK);
+
+ if (sk == NULL)
+ {
+ goto no_tcp_socket;
+ }
+
+ atomic_add(skb->truesize, &sk->rmem_alloc);
+ req->sk = sk;
+ skb->sk = sk;
+ break;
+ }
+
+ req = req->dl_next;
+ } while (req != tp->syn_wait_queue);
+ }
+ }
+
+ if (tcp_rcv_state_process(sk, skb, th, opt, len) == 0)
+ return 0;
+
+no_tcp_socket:
+
+ /*
+ * No such TCB. If th->rst is 0 send a reset
+ * (checked in tcp_send_reset)
+ */
+
+ tcp_v4_send_reset(daddr, saddr, th, &tcp_prot, opt, dev,
+ skb->ip_hdr->tos, 255);
+
+discard_it:
+
+ /*
+ * Discard frame
+ */
+
+ kfree_skb(skb, FREE_READ);
+ return 0;
+}
+
+int tcp_v4_rebuild_header(struct sock *sk, struct sk_buff *skb)
+{
+ struct options * opt = (struct options*)skb->proto_priv;
+ struct device * dev;
+ struct rtable *rt;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ int size;
+
+ /*
+ * Discard the surplus MAC header
+ */
+
+ skb_pull(skb, ((unsigned char *)skb->ip_hdr)-skb->data);
+
+ iph = skb->ip_hdr;
+ th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
+ size = skb->tail - (unsigned char *) th;
+
+ dev = skb->dev;
+
+ rt = ip_check_route(&sk->ip_route_cache,
+ opt->srr?opt->faddr:iph->daddr,
+ skb->localroute);
+
+
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+ if (rt && ntohs(iph->tot_len) > rt->rt_mtu)
+ iph->frag_off &= ~htons(IP_DF);
+#endif
+
+ if (rt==NULL) /* Deep poo */
+ {
+ if(skb->sk)
+ {
+ skb->sk->err_soft=ENETUNREACH;
+ skb->sk->error_report(skb->sk);
+ }
+ return -1;
+ }
+
+
+ dev=rt->rt_dev;
+ skb->raddr=rt->rt_gateway;
+ skb->dev=dev;
+ skb->arp=1;
+
+ if (rt->rt_hh)
+ {
+ memcpy(skb_push(skb, dev->hard_header_len),
+ rt->rt_hh->hh_data, dev->hard_header_len);
+
+ if (!rt->rt_hh->hh_uptodate)
+ {
+ skb->arp = 0;
+#if RT_CACHE_DEBUG >= 2
+ printk("tcp_do_rebuild_header: "
+ "hh miss %08x via %08x\n",
+ iph->daddr, rt->rt_gateway);
+#endif
+ }
+ }
+ else if (dev->hard_header)
+ {
+ if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL,
+ skb->len)<0)
+ skb->arp=0;
+ }
+
+ return 0;
+}
+
+int tcp_v4_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ return tcp_v4_rcv(skb, skb->dev, (struct options *) skb->proto_priv,
+ skb->daddr, skb->len, skb->saddr, 1,
+ (struct inet_protocol *) sk->pair);
+}
+
+static struct sock * tcp_v4_get_sock(struct sk_buff *skb, struct tcphdr *th)
+{
+ struct sock *sk;
+
+ sk = get_tcp_sock(skb->saddr, th->source, skb->daddr, th->dest, 0, 0);
+
+ return sk;
+}
+
+int tcp_v4_build_header(struct sock *sk, struct sk_buff *skb)
+{
+ struct device *dev = NULL;
+ int tmp;
+
+ tmp = ip_build_header(skb, sk->saddr, sk->daddr, &dev,
+ IPPROTO_TCP, sk->opt, skb->truesize,
+ sk->ip_tos, sk->ip_ttl,
+ &sk->ip_route_cache);
+ skb->dev = dev;
+
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+ if (tmp > 0)
+ {
+ skb->ip_hdr->frag_off |= htons(IP_DF);
+ }
+#endif
+
+ return tmp;
+}
+
+
+static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
+
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = sk->daddr;
+ sin->sin_port = sk->dummy_th.dest;
+
+}
+
+struct tcp_func ipv4_specific = {
+ tcp_v4_build_header,
+ ip_queue_xmit,
+ tcp_v4_send_check,
+ tcp_v4_rebuild_header,
+ tcp_v4_conn_request,
+ tcp_v4_syn_recv_sock,
+ tcp_v4_init_sequence,
+ tcp_v4_get_sock,
+ ip_setsockopt,
+ ip_getsockopt,
+ v4_addr2sockaddr,
+ sizeof(struct sockaddr_in)
+};
+
+static int tcp_v4_init_sock(struct sock *sk)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+ skb_queue_head_init(&sk->out_of_order_queue);
+ tcp_init_xmit_timers(sk);
+
+ tp->srtt = 0;
+ tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/
+ tp->mdev = TCP_TIMEOUT_INIT;
+
+ tp->ato = 0;
+ tp->iat = (HZ/5) << 3;
+
+ tp->rcv_wnd = 8192;
+
+ /*
+ * See draft-stevens-tcpca-spec-01 for discussion of the
+ * initialization of these values.
+ */
+ sk->cong_window = 1;
+ sk->ssthresh = 0x7fffffff;
+
+ sk->priority = 1;
+ sk->state = TCP_CLOSE;
+
+ /* this is how many unacked bytes we will accept for this socket. */
+ sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
+ sk->max_ack_backlog = SOMAXCONN;
+
+ sk->mtu = 576;
+ sk->mss = 536;
+
+ sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
+
+
+ /*
+ * Speed up by setting some standard state for the dummy_th
+ * if TCP uses it (maybe move to tcp_init later)
+ */
+
+ sk->dummy_th.ack=1;
+ sk->dummy_th.doff=sizeof(struct tcphdr)>>2;
+
+ sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
+
+ return 0;
+}
+
+static int tcp_v4_destroy_sock(struct sock *sk)
+{
+ struct sk_buff *skb;
+
+ tcp_clear_xmit_timers(sk);
+
+ if (sk->keepopen)
+ {
+ tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
+ }
+
+ /*
+ * Cleanup up the write buffer.
+ */
+
+ while((skb = skb_dequeue(&sk->write_queue)) != NULL) {
+ IS_SKB(skb);
+ skb->free = 1;
+ kfree_skb(skb, FREE_WRITE);
+ }
+
+ /*
+ * Cleans up our, hopefuly empty, out_of_order_queue
+ */
+
+ while((skb = skb_dequeue(&sk->out_of_order_queue)) != NULL) {
+ IS_SKB(skb);
+ kfree_skb(skb, FREE_READ);
+ }
+
+ return 0;
+}
+
+struct proto tcp_prot = {
+ tcp_close,
+ tcp_v4_connect,
+ tcp_accept,
+ NULL,
+ tcp_write_wakeup,
+ tcp_read_wakeup,
+ tcp_select,
+ tcp_ioctl,
+ tcp_v4_init_sock,
+ tcp_v4_destroy_sock,
+ tcp_shutdown,
+ tcp_setsockopt,
+ tcp_getsockopt,
+ tcp_v4_sendmsg,
+ tcp_recvmsg,
+ NULL, /* No special bind() */
+ tcp_v4_backlog_rcv,
+ 128,
+ 0,
+ "TCP",
+ 0, 0,
+ NULL
+};
+
+/*
+ * Local variables:
+ * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -c -o tcp_ipv4.o tcp_ipv4.c"
+ * c-file-style: "Linux"
+ * End:
+ */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov