/*
       * INET		An implementation of the TCP/IP protocol suite for the LINUX
       *		operating system.  INET is implemented using the  BSD Socket
       *		interface as the means of communication with the user level.
       *
       *		IPv4 Forwarding Information Base: FIB frontend.
       *
       * Version:	$Id: fib_frontend.c,v 1.21 1999/12/15 22:39:07 davem Exp $
       *
       * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
       *
       *		This program is free software; you can redistribute it and/or
       *		modify it under the terms of the GNU General Public License
       *		as published by the Free Software Foundation; either version
       *		2 of the License, or (at your option) any later version.
       */
      
      #include <linux/config.h>
      #include <asm/uaccess.h>
      #include <asm/system.h>
      #include <asm/bitops.h>
      #include <linux/types.h>
      #include <linux/kernel.h>
      #include <linux/sched.h>
      #include <linux/mm.h>
      #include <linux/string.h>
      #include <linux/socket.h>
      #include <linux/sockios.h>
      #include <linux/errno.h>
      #include <linux/in.h>
      #include <linux/inet.h>
      #include <linux/netdevice.h>
      #include <linux/if_arp.h>
      #include <linux/proc_fs.h>
      #include <linux/skbuff.h>
      #include <linux/netlink.h>
      #include <linux/init.h>
      
      #include <net/ip.h>
      #include <net/protocol.h>
      #include <net/route.h>
      #include <net/tcp.h>
      #include <net/sock.h>
      #include <net/icmp.h>
      #include <net/arp.h>
      #include <net/ip_fib.h>
      
      #define FFprint(a...) printk(KERN_DEBUG a)
      
      #ifndef CONFIG_IP_MULTIPLE_TABLES
      
      #define RT_TABLE_MIN RT_TABLE_MAIN
      
      struct fib_table *local_table;
      struct fib_table *main_table;
      
      #else
      
      #define RT_TABLE_MIN 1
      
      struct fib_table *fib_tables[RT_TABLE_MAX+1];
      
      struct fib_table *__fib_new_table(int id)
      {
      	struct fib_table *tb;
      
      	tb = fib_hash_init(id);
      	if (!tb)
      		return NULL;
      	fib_tables[id] = tb;
      	return tb;
      }
      
      
      #endif /* CONFIG_IP_MULTIPLE_TABLES */
      
      
  78  void fib_flush(void)
      {
      	int flushed = 0;
      #ifdef CONFIG_IP_MULTIPLE_TABLES
      	struct fib_table *tb;
      	int id;
      
      	for (id = RT_TABLE_MAX; id>0; id--) {
      		if ((tb = fib_get_table(id))==NULL)
      			continue;
      		flushed += tb->tb_flush(tb);
      	}
      #else /* CONFIG_IP_MULTIPLE_TABLES */
      	flushed += main_table->tb_flush(main_table);
      	flushed += local_table->tb_flush(local_table);
      #endif /* CONFIG_IP_MULTIPLE_TABLES */
      
  95  	if (flushed)
      		rt_cache_flush(-1);
      }
      
      
      #ifdef CONFIG_PROC_FS
      
      /* 
       *	Called from the PROCfs module. This outputs /proc/net/route.
       *
       *	It always works in backward compatibility mode.
       *	The format of the file is not supposed to be changed.
       */
       
      static int
 110  fib_get_procinfo(char *buffer, char **start, off_t offset, int length)
      {
      	int first = offset/128;
      	char *ptr = buffer;
      	int count = (length+127)/128;
      	int len;
      
      	*start = buffer + offset%128;
      	
 119  	if (--first < 0) {
      		sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
      		--count;
      		ptr += 128;
      		first = 0;
        	}
      
 126  	if (main_table && count > 0) {
      		int n = main_table->tb_get_info(main_table, ptr, first, count);
      		count -= n;
      		ptr += n*128;
      	}
      	len = ptr - *start;
 132  	if (len >= length)
 133  		return length;
 134  	if (len >= 0)
 135  		return len;
 136  	return 0;
      }
      
      #endif /* CONFIG_PROC_FS */
      
      /*
       *	Find the first device with a given source address.
       */
      
 145  struct net_device * ip_dev_find(u32 addr)
      {
      	struct rt_key key;
      	struct fib_result res;
      	struct net_device *dev = NULL;
      
      	memset(&key, 0, sizeof(key));
      	key.dst = addr;
      #ifdef CONFIG_IP_MULTIPLE_TABLES
      	res.r = NULL;
      #endif
      
 157  	if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
 158  		return NULL;
      	}
 160  	if (res.type != RTN_LOCAL)
 161  		goto out;
      	dev = FIB_RES_DEV(res);
 163  	if (dev)
      		atomic_inc(&dev->refcnt);
      
      out:
      	fib_res_put(&res);
 168  	return dev;
      }
      
 171  unsigned inet_addr_type(u32 addr)
      {
      	struct rt_key		key;
      	struct fib_result	res;
      	unsigned ret = RTN_BROADCAST;
      
 177  	if (ZERONET(addr) || BADCLASS(addr))
 178  		return RTN_BROADCAST;
 179  	if (MULTICAST(addr))
 180  		return RTN_MULTICAST;
      
      	memset(&key, 0, sizeof(key));
      	key.dst = addr;
      #ifdef CONFIG_IP_MULTIPLE_TABLES
      	res.r = NULL;
      #endif
      	
 188  	if (local_table) {
      		ret = RTN_UNICAST;
 190  		if (local_table->tb_lookup(local_table, &key, &res) == 0) {
      			ret = res.type;
      			fib_res_put(&res);
      		}
      	}
 195  	return ret;
      }
      
      /* Given (packet source, input interface) and optional (dst, oif, tos):
         - (main) check, that source is valid i.e. not broadcast or our local
           address.
         - figure out what "logical" interface this packet arrived
           and calculate "specific destination" address.
         - check, that packet arrived from expected physical interface.
       */
      
 206  int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
      			struct net_device *dev, u32 *spec_dst, u32 *itag)
      {
      	struct in_device *in_dev;
      	struct rt_key key;
      	struct fib_result res;
      	int no_addr, rpf;
      	int ret;
      
      	key.dst = src;
      	key.src = dst;
      	key.tos = tos;
      	key.oif = 0;
      	key.iif = oif;
      	key.scope = RT_SCOPE_UNIVERSE;
      
      	no_addr = rpf = 0;
      	read_lock(&inetdev_lock);
      	in_dev = __in_dev_get(dev);
 225  	if (in_dev) {
      		no_addr = in_dev->ifa_list == NULL;
      		rpf = IN_DEV_RPFILTER(in_dev);
      	}
 229  	read_unlock(&inetdev_lock);
      
 231  	if (in_dev == NULL)
 232  		goto e_inval;
      
 234  	if (fib_lookup(&key, &res))
 235  		goto last_resort;
 236  	if (res.type != RTN_UNICAST)
 237  		goto e_inval_res;
      	*spec_dst = FIB_RES_PREFSRC(res);
 239  	if (itag)
      		fib_combine_itag(itag, &res);
      #ifdef CONFIG_IP_ROUTE_MULTIPATH
      	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
      #else
 244  	if (FIB_RES_DEV(res) == dev)
      #endif
      	{
      		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
      		fib_res_put(&res);
 249  		return ret;
      	}
      	fib_res_put(&res);
 252  	if (no_addr)
 253  		goto last_resort;
 254  	if (rpf)
 255  		goto e_inval;
      	key.oif = dev->ifindex;
      
      	ret = 0;
 259  	if (fib_lookup(&key, &res) == 0) {
 260  		if (res.type == RTN_UNICAST) {
      			*spec_dst = FIB_RES_PREFSRC(res);
      			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
      		}
      		fib_res_put(&res);
      	}
 266  	return ret;
      
      last_resort:
 269  	if (rpf)
 270  		goto e_inval;
      	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
      	*itag = 0;
 273  	return 0;
      
      e_inval_res:
      	fib_res_put(&res);
      e_inval:
 278  	return -EINVAL;
      }
      
      #ifndef CONFIG_IP_NOSIOCRT
      
      /*
       *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
       */
       
 287  int ip_rt_ioctl(unsigned int cmd, void *arg)
      {
      	int err;
      	struct kern_rta rta;
      	struct rtentry  r;
      	struct {
      		struct nlmsghdr nlh;
      		struct rtmsg	rtm;
      	} req;
      
 297  	switch (cmd) {
 298  	case SIOCADDRT:		/* Add a route */
 299  	case SIOCDELRT:		/* Delete a route */
 300  		if (!capable(CAP_NET_ADMIN))
 301  			return -EPERM;
 302  		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
 303  			return -EFAULT;
      		rtnl_lock();
      		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
 306  		if (err == 0) {
 307  			if (cmd == SIOCDELRT) {
      				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
      				err = -ESRCH;
 310  				if (tb)
      					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
 312  			} else {
      				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
      				err = -ENOBUFS;
 315  				if (tb)
      					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
      			}
 318  			if (rta.rta_mx)
      				kfree(rta.rta_mx);
      		}
      		rtnl_unlock();
 322  		return err;
      	}
 324  	return -EINVAL;
      }
      
      #else
      
      int ip_rt_ioctl(unsigned int cmd, void *arg)
      {
      	return -EINVAL;
      }
      
      #endif
      
      #ifdef CONFIG_RTNETLINK
      
      static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
      {
      	int i;
      
      	for (i=1; i<=RTA_MAX; i++) {
      		struct rtattr *attr = rta[i-1];
      		if (attr) {
      			if (RTA_PAYLOAD(attr) < 4)
      				return -EINVAL;
      			if (i != RTA_MULTIPATH && i != RTA_METRICS)
      				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
      		}
      	}
      	return 0;
      }
      
      int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
      {
      	struct fib_table * tb;
      	struct rtattr **rta = arg;
      	struct rtmsg *r = NLMSG_DATA(nlh);
      
      	if (inet_check_attr(r, rta))
      		return -EINVAL;
      
      	tb = fib_get_table(r->rtm_table);
      	if (tb)
      		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
      	return -ESRCH;
      }
      
      int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
      {
      	struct fib_table * tb;
      	struct rtattr **rta = arg;
      	struct rtmsg *r = NLMSG_DATA(nlh);
      
      	if (inet_check_attr(r, rta))
      		return -EINVAL;
      
      	tb = fib_new_table(r->rtm_table);
      	if (tb)
      		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
      	return -ENOBUFS;
      }
      
      int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
      {
      	int t;
      	int s_t;
      	struct fib_table *tb;
      
      	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
      	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
      		return ip_rt_dump(skb, cb);
      
      	s_t = cb->args[0];
      	if (s_t == 0)
      		s_t = cb->args[0] = RT_TABLE_MIN;
      
      	for (t=s_t; t<=RT_TABLE_MAX; t++) {
      		if (t < s_t) continue;
      		if (t > s_t)
      			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
      		if ((tb = fib_get_table(t))==NULL)
      			continue;
      		if (tb->tb_dump(tb, skb, cb) < 0) 
      			break;
      	}
      
      	cb->args[0] = t;
      
      	return skb->len;
      }
      
      #endif
      
      /* Prepare and feed intra-kernel routing request.
         Really, it should be netlink message, but :-( netlink
         can be not configured, so that we feed it directly
         to fib engine. It is legal, because all events occur
         only when netlink is already locked.
       */
      
 422  static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
      {
      	struct fib_table * tb;
      	struct {
      		struct nlmsghdr	nlh;
      		struct rtmsg	rtm;
      	} req;
      	struct kern_rta rta;
      
      	memset(&req.rtm, 0, sizeof(req.rtm));
      	memset(&rta, 0, sizeof(rta));
      
 434  	if (type == RTN_UNICAST)
      		tb = fib_new_table(RT_TABLE_MAIN);
 436  	else
      		tb = fib_new_table(RT_TABLE_LOCAL);
      
 439  	if (tb == NULL)
 440  		return;
      
      	req.nlh.nlmsg_len = sizeof(req);
      	req.nlh.nlmsg_type = cmd;
      	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
      	req.nlh.nlmsg_pid = 0;
      	req.nlh.nlmsg_seq = 0;
      
      	req.rtm.rtm_dst_len = dst_len;
      	req.rtm.rtm_table = tb->tb_id;
      	req.rtm.rtm_protocol = RTPROT_KERNEL;
      	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
      	req.rtm.rtm_type = type;
      
      	rta.rta_dst = &dst;
      	rta.rta_prefsrc = &ifa->ifa_local;
      	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
      
 458  	if (cmd == RTM_NEWROUTE)
      		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
 460  	else
      		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
      }
      
 464  static void fib_add_ifaddr(struct in_ifaddr *ifa)
      {
      	struct in_device *in_dev = ifa->ifa_dev;
      	struct net_device *dev = in_dev->dev;
      	struct in_ifaddr *prim = ifa;
      	u32 mask = ifa->ifa_mask;
      	u32 addr = ifa->ifa_local;
      	u32 prefix = ifa->ifa_address&mask;
      
 473  	if (ifa->ifa_flags&IFA_F_SECONDARY) {
      		prim = inet_ifa_byprefix(in_dev, prefix, mask);
 475  		if (prim == NULL) {
      			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
 477  			return;
      		}
      	}
      
      	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
      
 483  	if (!(dev->flags&IFF_UP))
 484  		return;
      
      	/* Add broadcast address, if it is explicitly assigned. */
 487  	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
      		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
      
      	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
 491  	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
      		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
      			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
      
      		/* Add network specific broadcasts, when it takes a sense */
 496  		if (ifa->ifa_prefixlen < 31) {
      			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
      			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
      		}
      	}
      }
      
 503  static void fib_del_ifaddr(struct in_ifaddr *ifa)
      {
      	struct in_device *in_dev = ifa->ifa_dev;
      	struct net_device *dev = in_dev->dev;
      	struct in_ifaddr *ifa1;
      	struct in_ifaddr *prim = ifa;
      	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
      	u32 any = ifa->ifa_address&ifa->ifa_mask;
      #define LOCAL_OK	1
      #define BRD_OK		2
      #define BRD0_OK		4
      #define BRD1_OK		8
      	unsigned ok = 0;
      
 517  	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
      		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
      			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
 520  	else {
      		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 522  		if (prim == NULL) {
      			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
 524  			return;
      		}
      	}
      
      	/* Deletion is more complicated than add.
      	   We should take care of not to delete too much :-)
      
      	   Scan address list to be sure that addresses are really gone.
      	 */
      
 534  	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
 535  		if (ifa->ifa_local == ifa1->ifa_local)
      			ok |= LOCAL_OK;
 537  		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
      			ok |= BRD_OK;
 539  		if (brd == ifa1->ifa_broadcast)
      			ok |= BRD1_OK;
 541  		if (any == ifa1->ifa_broadcast)
      			ok |= BRD0_OK;
      	}
      
 545  	if (!(ok&BRD_OK))
      		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 547  	if (!(ok&BRD1_OK))
      		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
 549  	if (!(ok&BRD0_OK))
      		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
 551  	if (!(ok&LOCAL_OK)) {
      		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
      
      		/* Check, that this local address finally disappeared. */
 555  		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
      			/* And the last, but not the least thing.
      			   We must flush stray FIB entries.
      
      			   First of all, we scan fib_info list searching
      			   for stray nexthop entries, then ignite fib_flush.
      			*/
 562  			if (fib_sync_down(ifa->ifa_local, NULL, 0))
      				fib_flush();
      		}
      	}
      #undef LOCAL_OK
      #undef BRD_OK
      #undef BRD0_OK
      #undef BRD1_OK
      }
      
 572  static void fib_disable_ip(struct net_device *dev, int force)
      {
 574  	if (fib_sync_down(0, dev, force))
      		fib_flush();
      	rt_cache_flush(0);
      	arp_ifdown(dev);
      }
      
 580  static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
      {
      	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
      
 584  	switch (event) {
 585  	case NETDEV_UP:
      		fib_add_ifaddr(ifa);
      		rt_cache_flush(-1);
 588  		break;
 589  	case NETDEV_DOWN:
 590  		if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
      			/* Last address was deleted from this interface.
      			   Disable IP.
      			 */
      			fib_disable_ip(ifa->ifa_dev->dev, 1);
 595  		} else {
      			fib_del_ifaddr(ifa);
      			rt_cache_flush(-1);
      		}
 599  		break;
      	}
 601  	return NOTIFY_DONE;
      }
      
 604  static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
      {
      	struct net_device *dev = ptr;
      	struct in_device *in_dev = __in_dev_get(dev);
      
 609  	if (!in_dev)
 610  		return NOTIFY_DONE;
      
 612  	switch (event) {
 613  	case NETDEV_UP:
 614  		for_ifa(in_dev) {
      			fib_add_ifaddr(ifa);
      		} endfor_ifa(in_dev);
      #ifdef CONFIG_IP_ROUTE_MULTIPATH
      		fib_sync_up(dev);
      #endif
      		rt_cache_flush(-1);
 621  		break;
 622  	case NETDEV_DOWN:
      		fib_disable_ip(dev, 0);
 624  		break;
 625  	case NETDEV_UNREGISTER:
      		fib_disable_ip(dev, 1);
 627  		break;
 628  	case NETDEV_CHANGEMTU:
 629  	case NETDEV_CHANGE:
      		rt_cache_flush(0);
 631  		break;
      	}
 633  	return NOTIFY_DONE;
      }
      
      struct notifier_block fib_inetaddr_notifier = {
      	fib_inetaddr_event,
      	NULL,
      	0
      };
      
      struct notifier_block fib_netdev_notifier = {
      	fib_netdev_event,
      	NULL,
      	0
      };
      
 648  void __init ip_fib_init(void)
      {
      #ifdef CONFIG_PROC_FS
      	proc_net_create("route",0,fib_get_procinfo);
      #endif		/* CONFIG_PROC_FS */
      
      #ifndef CONFIG_IP_MULTIPLE_TABLES
      	local_table = fib_hash_init(RT_TABLE_LOCAL);
      	main_table = fib_hash_init(RT_TABLE_MAIN);
      #else
      	fib_rules_init();
      #endif
      
      	register_netdevice_notifier(&fib_netdev_notifier);
      	register_inetaddr_notifier(&fib_inetaddr_notifier);
      }