/*
       * INET		An implementation of the TCP/IP protocol suite for the LINUX
       *		operating system.  INET is implemented using the  BSD Socket
       *		interface as the means of communication with the user level.
       *
       *		Generic socket support routines. Memory allocators, socket lock/release
       *		handler for protocols to use and generic option handler.
       *
       *
       * Version:	$Id: sock.c,v 1.102 2000/12/11 23:00:24 davem Exp $
       *
       * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
       *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
       *		Florian La Roche, <flla@stud.uni-sb.de>
       *		Alan Cox, <A.Cox@swansea.ac.uk>
       *
       * Fixes:
       *		Alan Cox	: 	Numerous verify_area() problems
       *		Alan Cox	:	Connecting on a connecting socket
       *					now returns an error for tcp.
       *		Alan Cox	:	sock->protocol is set correctly.
       *					and is not sometimes left as 0.
       *		Alan Cox	:	connect handles icmp errors on a
       *					connect properly. Unfortunately there
       *					is a restart syscall nasty there. I
       *					can't match BSD without hacking the C
       *					library. Ideas urgently sought!
       *		Alan Cox	:	Disallow bind() to addresses that are
       *					not ours - especially broadcast ones!!
       *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
       *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
       *					instead they leave that for the DESTROY timer.
       *		Alan Cox	:	Clean up error flag in accept
       *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
       *					was buggy. Put a remove_sock() in the handler
       *					for memory when we hit 0. Also altered the timer
       *					code. The ACK stuff can wait and needs major 
       *					TCP layer surgery.
       *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
       *					and fixed timer/inet_bh race.
       *		Alan Cox	:	Added zapped flag for TCP
       *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
       *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
       *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
       *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
       *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
       *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
       *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
       *	Pauline Middelink	:	identd support
       *		Alan Cox	:	Fixed connect() taking signals I think.
       *		Alan Cox	:	SO_LINGER supported
       *		Alan Cox	:	Error reporting fixes
       *		Anonymous	:	inet_create tidied up (sk->reuse setting)
       *		Alan Cox	:	inet sockets don't set sk->type!
       *		Alan Cox	:	Split socket option code
       *		Alan Cox	:	Callbacks
       *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
       *		Alex		:	Removed restriction on inet fioctl
       *		Alan Cox	:	Splitting INET from NET core
       *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
       *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
       *		Alan Cox	:	Split IP from generic code
       *		Alan Cox	:	New kfree_skbmem()
       *		Alan Cox	:	Make SO_DEBUG superuser only.
       *		Alan Cox	:	Allow anyone to clear SO_DEBUG
       *					(compatibility fix)
       *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
       *		Alan Cox	:	Allocator for a socket is settable.
       *		Alan Cox	:	SO_ERROR includes soft errors.
       *		Alan Cox	:	Allow NULL arguments on some SO_ opts
       *		Alan Cox	: 	Generic socket allocation to make hooks
       *					easier (suggested by Craig Metz).
       *		Michael Pall	:	SO_ERROR returns positive errno again
       *              Steve Whitehouse:       Added default destructor to free
       *                                      protocol private data.
       *              Steve Whitehouse:       Added various other default routines
       *                                      common to several socket families.
       *              Chris Evans     :       Call suser() check last on F_SETOWN
       *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
       *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
       *		Andi Kleen	:	Fix write_space callback
       *
       * To Fix:
       *
       *
       *		This program is free software; you can redistribute it and/or
       *		modify it under the terms of the GNU General Public License
       *		as published by the Free Software Foundation; either version
       *		2 of the License, or (at your option) any later version.
       */
      
      #include <linux/config.h>
      #include <linux/errno.h>
      #include <linux/types.h>
      #include <linux/socket.h>
      #include <linux/in.h>
      #include <linux/kernel.h>
      #include <linux/major.h>
      #include <linux/sched.h>
      #include <linux/timer.h>
      #include <linux/string.h>
      #include <linux/sockios.h>
      #include <linux/net.h>
      #include <linux/fcntl.h>
      #include <linux/mm.h>
      #include <linux/slab.h>
      #include <linux/interrupt.h>
      #include <linux/poll.h>
      #include <linux/init.h>
      
      #include <asm/uaccess.h>
      #include <asm/system.h>
      
      #include <linux/inet.h>
      #include <linux/netdevice.h>
      #include <net/ip.h>
      #include <net/protocol.h>
      #include <net/arp.h>
      #include <net/route.h>
      #include <net/tcp.h>
      #include <net/udp.h>
      #include <linux/skbuff.h>
      #include <net/sock.h>
      #include <net/raw.h>
      #include <net/icmp.h>
      #include <linux/ipsec.h>
      
      #ifdef CONFIG_FILTER
      #include <linux/filter.h>
      #endif
      
      #define min(a,b)	((a)<(b)?(a):(b))
      
      /* Run time adjustable parameters. */
      __u32 sysctl_wmem_max = SK_WMEM_MAX;
      __u32 sysctl_rmem_max = SK_RMEM_MAX;
      __u32 sysctl_wmem_default = SK_WMEM_MAX;
      __u32 sysctl_rmem_default = SK_RMEM_MAX;
      
      /* Maximal space eaten by iovec or ancilliary data plus some space */
      int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
      
 143  static int sock_set_timeout(long *timeo_p, char *optval, int optlen)
      {
      	struct timeval tv;
      
 147  	if (optlen < sizeof(tv))
 148  		return -EINVAL;
 149  	if (copy_from_user(&tv, optval, sizeof(tv)))
 150  		return -EFAULT;
      
      	*timeo_p = MAX_SCHEDULE_TIMEOUT;
 153  	if (tv.tv_sec == 0 && tv.tv_usec == 0)
 154  		return 0;
 155  	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
      		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
 157  	return 0;
      }
      
      /*
       *	This is meant for all protocols to use and covers goings on
       *	at the socket level. Everything here is generic.
       */
      
 165  int sock_setsockopt(struct socket *sock, int level, int optname,
      		    char *optval, int optlen)
      {
      	struct sock *sk=sock->sk;
      #ifdef CONFIG_FILTER
      	struct sk_filter *filter;
      #endif
      	int val;
      	int valbool;
      	int err;
      	struct linger ling;
      	int ret = 0;
      	
      	/*
      	 *	Options without arguments
      	 */
      
      #ifdef SO_DONTLINGER		/* Compatibility item... */
      	switch(optname)
      	{
      		case SO_DONTLINGER:
      			sk->linger=0;
      			return 0;
      	}
      #endif	
      		
 191    	if(optlen<sizeof(int))
 192    		return(-EINVAL);
        	
      	err = get_user(val, (int *)optval);
 195  	if (err)
 196  		return err;
      	
        	valbool = val?1:0;
      
 200  	lock_sock(sk);
      
 202    	switch(optname) 
        	{
 204  		case SO_DEBUG:	
 205  			if(val && !capable(CAP_NET_ADMIN))
      			{
      				ret = -EACCES;
      			}
 209  			else
      				sk->debug=valbool;
 211  			break;
 212  		case SO_REUSEADDR:
      			sk->reuse = valbool;
 214  			break;
 215  		case SO_TYPE:
 216  		case SO_ERROR:
      			ret = -ENOPROTOOPT;
 218  		  	break;
 219  		case SO_DONTROUTE:
      			sk->localroute=valbool;
 221  			break;
 222  		case SO_BROADCAST:
      			sk->broadcast=valbool;
 224  			break;
 225  		case SO_SNDBUF:
      			/* Don't error on this BSD doesn't and if you think
      			   about it this is right. Otherwise apps have to
      			   play 'guess the biggest size' games. RCVBUF/SNDBUF
      			   are treated in BSD as hints */
      			   
 231  			if (val > sysctl_wmem_max)
      				val = sysctl_wmem_max;
      
      			sk->userlocks |= SOCK_SNDBUF_LOCK;
      			sk->sndbuf = max(val*2,SOCK_MIN_SNDBUF);
      
      			/*
      			 *	Wake up sending tasks if we
      			 *	upped the value.
      			 */
      			sk->write_space(sk);
 242  			break;
      
 244  		case SO_RCVBUF:
      			/* Don't error on this BSD doesn't and if you think
      			   about it this is right. Otherwise apps have to
      			   play 'guess the biggest size' games. RCVBUF/SNDBUF
      			   are treated in BSD as hints */
      			  
 250  			if (val > sysctl_rmem_max)
      				val = sysctl_rmem_max;
      
      			sk->userlocks |= SOCK_RCVBUF_LOCK;
      			/* FIXME: is this lower bound the right one? */
      			sk->rcvbuf = max(val*2,SOCK_MIN_RCVBUF);
 256  			break;
      
 258  		case SO_KEEPALIVE:
      #ifdef CONFIG_INET
 260  			if (sk->protocol == IPPROTO_TCP)
      			{
      				tcp_set_keepalive(sk, valbool);
      			}
      #endif
      			sk->keepopen = valbool;
 266  			break;
      
 268  	 	case SO_OOBINLINE:
      			sk->urginline = valbool;
 270  			break;
      
 272  	 	case SO_NO_CHECK:
      			sk->no_check = valbool;
 274  			break;
      
 276  		case SO_PRIORITY:
 277  			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 
      				sk->priority = val;
 279  			else
      				ret = -EPERM;
 281  			break;
      
 283  		case SO_LINGER:
 284  			if(optlen<sizeof(ling)) {
      				ret = -EINVAL;	/* 1003.1g */
 286  				break;
      			}
 288  			if (copy_from_user(&ling,optval,sizeof(ling))) {
      				ret = -EFAULT;
 290  				break;
      			}
 292  			if(ling.l_onoff==0) {
      				sk->linger=0;
 294  			} else {
      #if (BITS_PER_LONG == 32)
 296  				if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
      					sk->lingertime=MAX_SCHEDULE_TIMEOUT;
 298  				else
      #endif
      					sk->lingertime=ling.l_linger*HZ;
      				sk->linger=1;
      			}
 303  			break;
      
 305  		case SO_BSDCOMPAT:
      			sk->bsdism = valbool;
 307  			break;
      
 309  		case SO_PASSCRED:
      			sock->passcred = valbool;
 311  			break;
      
 313  		case SO_TIMESTAMP:
      			sk->rcvtstamp = valbool;
 315  			break;
      
 317  		case SO_RCVLOWAT:
 318  			if (val < 0)
      				val = INT_MAX;
      			sk->rcvlowat = val ? : 1;
 321  			break;
      
 323  		case SO_RCVTIMEO:
      			ret = sock_set_timeout(&sk->rcvtimeo, optval, optlen);
 325  			break;
      
 327  		case SO_SNDTIMEO:
      			ret = sock_set_timeout(&sk->sndtimeo, optval, optlen);
 329  			break;
      
      #ifdef CONFIG_NETDEVICES
 332  		case SO_BINDTODEVICE:
      		{
      			char devname[IFNAMSIZ]; 
      
      			/* Sorry... */ 
 337  			if (!capable(CAP_NET_RAW)) {
      				ret = -EPERM;
 339  				break;
      			}
      
      			/* Bind this socket to a particular device like "eth0",
      			 * as specified in the passed interface name. If the
      			 * name is "" or the option length is zero the socket 
      			 * is not bound. 
      			 */ 
      
 348  			if (!valbool) {
      				sk->bound_dev_if = 0;
 350  			} else {
 351  				if (optlen > IFNAMSIZ) 
      					optlen = IFNAMSIZ; 
 353  				if (copy_from_user(devname, optval, optlen)) {
      					ret = -EFAULT;
 355  					break;
      				}
      
      				/* Remove any cached route for this socket. */
      				sk_dst_reset(sk);
      
 361  				if (devname[0] == '\0') {
      					sk->bound_dev_if = 0;
 363  				} else {
      					struct net_device *dev = dev_get_by_name(devname);
 365  					if (!dev) {
      						ret = -ENODEV;
 367  						break;
      					}
      					sk->bound_dev_if = dev->ifindex;
      					dev_put(dev);
      				}
      			}
 373  			break;
      		}
      #endif
      
      
      #ifdef CONFIG_FILTER
      		case SO_ATTACH_FILTER:
      			ret = -EINVAL;
      			if (optlen == sizeof(struct sock_fprog)) {
      				struct sock_fprog fprog;
      
      				ret = -EFAULT;
      				if (copy_from_user(&fprog, optval, sizeof(fprog)))
      					break;
      
      				ret = sk_attach_filter(&fprog, sk);
      			}
      			break;
      
      		case SO_DETACH_FILTER:
      			spin_lock_bh(&sk->lock.slock);
      			filter = sk->filter;
                              if (filter) {
      				sk->filter = NULL;
      				spin_unlock_bh(&sk->lock.slock);
      				sk_filter_release(sk, filter);
      				break;
      			}
      			spin_unlock_bh(&sk->lock.slock);
      			ret = -ENONET;
      			break;
      #endif
      		/* We implement the SO_SNDLOWAT etc to
      		   not be settable (1003.1g 5.3) */
 407  		default:
      		  	ret = -ENOPROTOOPT;
 409  			break;
        	}
 411  	release_sock(sk);
 412  	return ret;
      }
      
      
 416  int sock_getsockopt(struct socket *sock, int level, int optname,
      		    char *optval, int *optlen)
      {
      	struct sock *sk = sock->sk;
      	
      	union
      	{
        		int val;
        		struct linger ling;
      		struct timeval tm;
      	} v;
      	
      	int lv=sizeof(int),len;
        	
 430    	if(get_user(len,optlen))
 431    		return -EFAULT;
      
 433    	switch(optname) 
        	{
 435  		case SO_DEBUG:		
      			v.val = sk->debug;
 437  			break;
      		
 439  		case SO_DONTROUTE:
      			v.val = sk->localroute;
 441  			break;
      		
 443  		case SO_BROADCAST:
      			v.val= sk->broadcast;
 445  			break;
      
 447  		case SO_SNDBUF:
      			v.val=sk->sndbuf;
 449  			break;
      		
 451  		case SO_RCVBUF:
      			v.val =sk->rcvbuf;
 453  			break;
      
 455  		case SO_REUSEADDR:
      			v.val = sk->reuse;
 457  			break;
      
 459  		case SO_KEEPALIVE:
      			v.val = sk->keepopen;
 461  			break;
      
 463  		case SO_TYPE:
      			v.val = sk->type;		  		
 465  			break;
      
 467  		case SO_ERROR:
      			v.val = -sock_error(sk);
 469  			if(v.val==0)
      				v.val=xchg(&sk->err_soft,0);
 471  			break;
      
 473  		case SO_OOBINLINE:
      			v.val = sk->urginline;
 475  			break;
      	
 477  		case SO_NO_CHECK:
      			v.val = sk->no_check;
 479  			break;
      
 481  		case SO_PRIORITY:
      			v.val = sk->priority;
 483  			break;
      		
 485  		case SO_LINGER:	
      			lv=sizeof(v.ling);
      			v.ling.l_onoff=sk->linger;
       			v.ling.l_linger=sk->lingertime/HZ;
 489  			break;
      					
 491  		case SO_BSDCOMPAT:
      			v.val = sk->bsdism;
 493  			break;
      
 495  		case SO_TIMESTAMP:
      			v.val = sk->rcvtstamp;
 497  			break;
      
 499  		case SO_RCVTIMEO:
      			lv=sizeof(struct timeval);
 501  			if (sk->rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
      				v.tm.tv_sec = 0;
      				v.tm.tv_usec = 0;
 504  			} else {
      				v.tm.tv_sec = sk->rcvtimeo/HZ;
      				v.tm.tv_usec = ((sk->rcvtimeo%HZ)*1000)/HZ;
      			}
 508  			break;
      
 510  		case SO_SNDTIMEO:
      			lv=sizeof(struct timeval);
 512  			if (sk->sndtimeo == MAX_SCHEDULE_TIMEOUT) {
      				v.tm.tv_sec = 0;
      				v.tm.tv_usec = 0;
 515  			} else {
      				v.tm.tv_sec = sk->sndtimeo/HZ;
      				v.tm.tv_usec = ((sk->sndtimeo%HZ)*1000)/HZ;
      			}
 519  			break;
      
 521  		case SO_RCVLOWAT:
      			v.val = sk->rcvlowat;
 523  			break;
      
 525  		case SO_SNDLOWAT:
      			v.val=1;
 527  			break; 
      
 529  		case SO_PASSCRED:
      			v.val = sock->passcred;
 531  			break;
      
 533  		case SO_PEERCRED:
      			lv=sizeof(sk->peercred);
      			len=min(len, lv);
 536  			if(copy_to_user((void*)optval, &sk->peercred, len))
 537  				return -EFAULT;
 538  			goto lenout;
      
 540  		case SO_PEERNAME:
      		{
      			char address[128];
      
 544  			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
 545  				return -ENOTCONN;
 546  			if (lv < len)
 547  				return -EINVAL;
 548  			if(copy_to_user((void*)optval, address, len))
 549  				return -EFAULT;
 550  			goto lenout;
      		}
      
 553  		default:
 554  			return(-ENOPROTOOPT);
      	}
      	len=min(len,lv);
 557  	if(copy_to_user(optval,&v,len))
 558  		return -EFAULT;
      lenout:
 560    	if(put_user(len, optlen))
 561    		return -EFAULT;
 562    	return 0;
      }
      
      static kmem_cache_t *sk_cachep;
      
      /*
       *	All socket objects are allocated here. This is for future
       *	usage.
       */
       
 572  struct sock *sk_alloc(int family, int priority, int zero_it)
      {
      	struct sock *sk = kmem_cache_alloc(sk_cachep, priority);
      
 576  	if(sk && zero_it) {
      		memset(sk, 0, sizeof(struct sock));
      		sk->family = family;
 579  		sock_lock_init(sk);
      	}
      
 582  	return sk;
      }
      
 585  void sk_free(struct sock *sk)
      {
      #ifdef CONFIG_FILTER
      	struct sk_filter *filter;
      #endif
      
 591  	if (sk->destruct)
      		sk->destruct(sk);
      
      #ifdef CONFIG_FILTER
      	filter = sk->filter;
      	if (filter) {
      		sk_filter_release(sk, filter);
      		sk->filter = NULL;
      	}
      #endif
      
 602  	if (atomic_read(&sk->omem_alloc))
      		printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
      
      	kmem_cache_free(sk_cachep, sk);
      }
      
 608  void __init sk_init(void)
      {
      	sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0,
      				      SLAB_HWCACHE_ALIGN, 0, 0);
 612  	if (!sk_cachep)
      		printk(KERN_CRIT "sk_init: Cannot create sock SLAB cache!");
      
 615  	if (num_physpages <= 4096) {
      		sysctl_wmem_max = 32767;
      		sysctl_rmem_max = 32767;
      		sysctl_wmem_default = 32767;
      		sysctl_wmem_default = 32767;
 620  	} else if (num_physpages >= 131072) {
      		sysctl_wmem_max = 131071;
      		sysctl_rmem_max = 131071;
      	}
      }
      
      /*
       *	Simple resource managers for sockets.
       */
      
      
      /* 
       * Write buffer destructor automatically called from kfree_skb. 
       */
 634  void sock_wfree(struct sk_buff *skb)
      {
      	struct sock *sk = skb->sk;
      
      	/* In case it might be waiting for more memory. */
      	atomic_sub(skb->truesize, &sk->wmem_alloc);
      	sk->write_space(sk);
      	sock_put(sk);
      }
      
      /* 
       * Read buffer destructor automatically called from kfree_skb. 
       */
 647  void sock_rfree(struct sk_buff *skb)
      {
      	struct sock *sk = skb->sk;
      
      	atomic_sub(skb->truesize, &sk->rmem_alloc);
      }
      
      /*
       * Allocate a skb from the socket's send buffer.
       */
 657  struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
      {
 659  	if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
      		struct sk_buff * skb = alloc_skb(size, priority);
 661  		if (skb) {
      			skb_set_owner_w(skb, sk);
 663  			return skb;
      		}
      	}
 666  	return NULL;
      }
      
      /*
       * Allocate a skb from the socket's receive buffer.
       */ 
 672  struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
      {
 674  	if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) {
      		struct sk_buff *skb = alloc_skb(size, priority);
 676  		if (skb) {
      			skb_set_owner_r(skb, sk);
 678  			return skb;
      		}
      	}
 681  	return NULL;
      }
      
      /* 
       * Allocate a memory block from the socket's option memory buffer.
       */ 
 687  void *sock_kmalloc(struct sock *sk, int size, int priority)
      {
      	if ((unsigned)size <= sysctl_optmem_max &&
 690  	    atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
      		void *mem;
      		/* First do the add, to avoid the race if kmalloc
       		 * might sleep.
      		 */
      		atomic_add(size, &sk->omem_alloc);
      		mem = kmalloc(size, priority);
 697  		if (mem)
 698  			return mem;
      		atomic_sub(size, &sk->omem_alloc);
      	}
 701  	return NULL;
      }
      
      /*
       * Free an option memory block.
       */
 707  void sock_kfree_s(struct sock *sk, void *mem, int size)
      {
      	kfree(mem);
      	atomic_sub(size, &sk->omem_alloc);
      }
      
      /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
         I think, these locks should be removed for datagram sockets.
       */
 716  static long sock_wait_for_wmem(struct sock * sk, long timeo)
      {
      	DECLARE_WAITQUEUE(wait, current);
      
      	clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
      	add_wait_queue(sk->sleep, &wait);
 722  	for (;;) {
 723  		if (signal_pending(current))
 724  			break;
      		set_bit(SOCK_NOSPACE, &sk->socket->flags);
 726  		set_current_state(TASK_INTERRUPTIBLE);
 727  		if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
 728  			break;
 729  		if (sk->shutdown & SEND_SHUTDOWN)
 730  			break;
 731  		if (sk->err)
 732  			break;
      		timeo = schedule_timeout(timeo);
      	}
 735  	__set_current_state(TASK_RUNNING);
      	remove_wait_queue(sk->sleep, &wait);
 737  	return timeo;
      }
      
      
      /*
       *	Generic send/receive buffer handlers
       */
      
 745  struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
      			unsigned long fallback, int noblock, int *errcode)
      {
      	int err;
      	struct sk_buff *skb;
      	long timeo;
      
      	timeo = sock_sndtimeo(sk, noblock);
      
 754  	while (1) {
      		unsigned long try_size = size;
      
      		err = sock_error(sk);
 758  		if (err != 0)
 759  			goto failure;
      
      		/*
      		 *	We should send SIGPIPE in these cases according to
      		 *	1003.1g draft 6.4. If we (the user) did a shutdown()
      		 *	call however we should not. 
      		 *
      		 *	Note: This routine isnt just used for datagrams and
      		 *	anyway some datagram protocols have a notion of
      		 *	close down.
      		 */
      
      		err = -EPIPE;
 772  		if (sk->shutdown&SEND_SHUTDOWN)
 773  			goto failure;
      
 775  		if (atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
 776  			if (fallback) {
      				/* The buffer get won't block, or use the atomic queue.
      			 	* It does produce annoying no free page messages still.
      			 	*/
      				skb = alloc_skb(size, GFP_BUFFER);
 781  				if (skb)
 782  					break;
      				try_size = fallback;
      			}
      			skb = alloc_skb(try_size, sk->allocation);
 786  			if (skb)
 787  				break;
      			err = -ENOBUFS;
 789  			goto failure;
      		}
      
      		/*
      		 *	This means we have too many buffers for this socket already.
      		 */
      
      		set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
      		set_bit(SOCK_NOSPACE, &sk->socket->flags);
      		err = -EAGAIN;
 799  		if (!timeo)
 800  			goto failure;
 801  		if (signal_pending(current))
 802  			goto interrupted;
      		timeo = sock_wait_for_wmem(sk, timeo);
      	}
      
      	skb_set_owner_w(skb, sk);
 807  	return skb;
      
      interrupted:
      	err = sock_intr_errno(timeo);
      failure:
      	*errcode = err;
 813  	return NULL;
      }
      
 816  void __lock_sock(struct sock *sk)
      {
      	DECLARE_WAITQUEUE(wait, current);
      
      	add_wait_queue_exclusive(&sk->lock.wq, &wait);
 821  	for(;;) {
      		current->state = TASK_UNINTERRUPTIBLE;
 823  		spin_unlock_bh(&sk->lock.slock);
      		schedule();
 825  		spin_lock_bh(&sk->lock.slock);
 826  		if(!sk->lock.users)
 827  			break;
      	}
      	current->state = TASK_RUNNING;
      	remove_wait_queue(&sk->lock.wq, &wait);
      }
      
 833  void __release_sock(struct sock *sk)
      {
      	struct sk_buff *skb = sk->backlog.head;
      
 837  	do {
      		sk->backlog.head = sk->backlog.tail = NULL;
 839  		bh_unlock_sock(sk);
      
 841  		do {
      			struct sk_buff *next = skb->next;
      
      			skb->next = NULL;
      			sk->backlog_rcv(sk, skb);
      			skb = next;
 847  		} while (skb != NULL);
      
      		bh_lock_sock(sk);
 850  	} while((skb = sk->backlog.head) != NULL);
      }
      
      /*
       *	Generic socket manager library. Most simpler socket families
       *	use this to manage their socket lists. At some point we should
       *	hash these. By making this generic we get the lot hashed for free.
       *
       *	It is broken by design. All the protocols using it must be fixed. --ANK
       */
      
      rwlock_t net_big_sklist_lock = RW_LOCK_UNLOCKED;
       
 863  void sklist_remove_socket(struct sock **list, struct sock *sk)
      {
      	struct sock *s;
      
 867  	write_lock_bh(&net_big_sklist_lock);
      
 869  	while ((s = *list) != NULL) {
 870  		if (s == sk) {
      			*list = s->next;
 872  			break;
      		}
      		list = &s->next;
      	}
      
 877  	write_unlock_bh(&net_big_sklist_lock);
 878  	if (s)
      		sock_put(s);
      }
      
 882  void sklist_insert_socket(struct sock **list, struct sock *sk)
      {
 884  	write_lock_bh(&net_big_sklist_lock);
      	sk->next= *list;
      	*list=sk;
      	sock_hold(sk);
 888  	write_unlock_bh(&net_big_sklist_lock);
      }
      
      /*
       *	This is only called from user mode. Thus it protects itself against
       *	interrupt users but doesn't worry about being called during work.
       *	Once it is removed from the queue no interrupt or bottom half will
       *	touch it and we are (fairly 8-) ) safe.
       */
      
      void sklist_destroy_socket(struct sock **list, struct sock *sk);
      
      /*
       *	Handler for deferred kills.
       */
      
 904  static void sklist_destroy_timer(unsigned long data)
      {
      	struct sock *sk=(struct sock *)data;
      	sklist_destroy_socket(NULL,sk);
      }
      
      /*
       *	Destroy a socket. We pass NULL for a list if we know the
       *	socket is not on a list.
       */
       
 915  void sklist_destroy_socket(struct sock **list,struct sock *sk)
      {
      	struct sk_buff *skb;
 918  	if(list)
      		sklist_remove_socket(list, sk);
      
 921  	while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
      	{
      		kfree_skb(skb);
      	}
      
      	if(atomic_read(&sk->wmem_alloc) == 0 &&
      	   atomic_read(&sk->rmem_alloc) == 0 &&
 928  	   sk->dead)
      	{
      		sock_put(sk);
      	}
 932  	else
      	{
      		/*
      		 *	Someone is using our buffers still.. defer
      		 */
      		init_timer(&sk->timer);
      		sk->timer.expires=jiffies+SOCK_DESTROY_TIME;
      		sk->timer.function=sklist_destroy_timer;
      		sk->timer.data = (unsigned long)sk;
      		add_timer(&sk->timer);
      	}
      }
      
      /*
       * Set of default routines for initialising struct proto_ops when
       * the protocol does not support a particular function. In certain
       * cases where it makes no sense for a protocol to have a "do nothing"
       * function, some default processing is provided.
       */
      
 952  int sock_no_release(struct socket *sock)
      {
 954  	return 0;
      }
      
 957  int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
      {
 959  	return -EOPNOTSUPP;
      }
      
 962  int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 
      		    int len, int flags)
      {
 965  	return -EOPNOTSUPP;
      }
      
 968  int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
      {
 970  	return -EOPNOTSUPP;
      }
      
 973  int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
      {
 975  	return -EOPNOTSUPP;
      }
      
 978  int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 
      		    int *len, int peer)
      {
 981  	return -EOPNOTSUPP;
      }
      
 984  unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
      {
 986  	return 0;
      }
      
 989  int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
      {
 991  	return -EOPNOTSUPP;
      }
      
 994  int sock_no_listen(struct socket *sock, int backlog)
      {
 996  	return -EOPNOTSUPP;
      }
      
 999  int sock_no_shutdown(struct socket *sock, int how)
      {
1001  	return -EOPNOTSUPP;
      }
      
1004  int sock_no_setsockopt(struct socket *sock, int level, int optname,
      		    char *optval, int optlen)
      {
1007  	return -EOPNOTSUPP;
      }
      
1010  int sock_no_getsockopt(struct socket *sock, int level, int optname,
      		    char *optval, int *optlen)
      {
1013  	return -EOPNOTSUPP;
      }
      
      /* 
       * Note: if you add something that sleeps here then change sock_fcntl()
       *       to do proper fd locking.
       */
1020  int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
      {
      	struct sock *sk = sock->sk;
      
1024  	switch(cmd)
      	{
1026  		case F_SETOWN:
      			/*
      			 * This is a little restrictive, but it's the only
      			 * way to make sure that you can't send a sigurg to
      			 * another process.
      			 */
      			if (current->pgrp != -arg &&
      				current->pid != arg &&
1034  				!capable(CAP_KILL)) return(-EPERM);
      			sk->proc = arg;
1036  			return(0);
1037  		case F_GETOWN:
1038  			return(sk->proc);
1039  		default:
1040  			return(-EINVAL);
      	}
      }
      
1044  int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags,
      		    struct scm_cookie *scm)
      {
1047  	return -EOPNOTSUPP;
      }
      
1050  int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int len, int flags,
      		    struct scm_cookie *scm)
      {
1053  	return -EOPNOTSUPP;
      }
      
1056  int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
      {
      	/* Mirror missing mmap method error code */
1059  	return -ENODEV;
      }
      
      /*
       *	Default Socket Callbacks
       */
      
1066  void sock_def_wakeup(struct sock *sk)
      {
      	read_lock(&sk->callback_lock);
1069  	if (sk->sleep && waitqueue_active(sk->sleep))
      		wake_up_interruptible_all(sk->sleep);
1071  	read_unlock(&sk->callback_lock);
      }
      
1074  void sock_def_error_report(struct sock *sk)
      {
      	read_lock(&sk->callback_lock);
1077  	if (sk->sleep && waitqueue_active(sk->sleep))
      		wake_up_interruptible(sk->sleep);
      	sk_wake_async(sk,0,POLL_ERR); 
1080  	read_unlock(&sk->callback_lock);
      }
      
1083  void sock_def_readable(struct sock *sk, int len)
      {
      	read_lock(&sk->callback_lock);
1086  	if (sk->sleep && waitqueue_active(sk->sleep))
      		wake_up_interruptible(sk->sleep);
      	sk_wake_async(sk,1,POLL_IN);
1089  	read_unlock(&sk->callback_lock);
      }
      
1092  void sock_def_write_space(struct sock *sk)
      {
      	read_lock(&sk->callback_lock);
      
      	/* Do not wake up a writer until he can make "significant"
      	 * progress.  --DaveM
      	 */
1099  	if((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf) {
1100  		if (sk->sleep && waitqueue_active(sk->sleep))
      			wake_up_interruptible(sk->sleep);
      
      		/* Should agree with poll, otherwise some programs break */
1104  		if (sock_writeable(sk))
      			sk_wake_async(sk, 2, POLL_OUT);
      	}
      
1108  	read_unlock(&sk->callback_lock);
      }
      
1111  void sock_def_destruct(struct sock *sk)
      {
1113  	if (sk->protinfo.destruct_hook)
      		kfree(sk->protinfo.destruct_hook);
      }
      
1117  void sock_init_data(struct socket *sock, struct sock *sk)
      {
      	skb_queue_head_init(&sk->receive_queue);
      	skb_queue_head_init(&sk->write_queue);
      	skb_queue_head_init(&sk->error_queue);
      
      	init_timer(&sk->timer);
      	
      	sk->allocation	=	GFP_KERNEL;
      	sk->rcvbuf	=	sysctl_rmem_default;
      	sk->sndbuf	=	sysctl_wmem_default;
      	sk->state 	= 	TCP_CLOSE;
      	sk->zapped	=	1;
      	sk->socket	=	sock;
      
1132  	if(sock)
      	{
      		sk->type	=	sock->type;
      		sk->sleep	=	&sock->wait;
      		sock->sk	=	sk;
1137  	} else
      		sk->sleep	=	NULL;
      
      	sk->dst_lock		=	RW_LOCK_UNLOCKED;
      	sk->callback_lock	=	RW_LOCK_UNLOCKED;
      
      	sk->state_change	=	sock_def_wakeup;
      	sk->data_ready		=	sock_def_readable;
      	sk->write_space		=	sock_def_write_space;
      	sk->error_report	=	sock_def_error_report;
      	sk->destruct            =       sock_def_destruct;
      
      	sk->peercred.pid 	=	0;
      	sk->peercred.uid	=	-1;
      	sk->peercred.gid	=	-1;
      	sk->rcvlowat		=	1;
      	sk->rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
      	sk->sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
      
      	atomic_set(&sk->refcnt, 1);
      }