./kernel/sys.c

      /*
       *  linux/kernel/sys.c
       *
       *  Copyright (C) 1991, 1992  Linus Torvalds
       */
      
      #include <linux/module.h>
      #include <linux/mm.h>
      #include <linux/utsname.h>
      #include <linux/mman.h>
      #include <linux/smp_lock.h>
      #include <linux/notifier.h>
      #include <linux/reboot.h>
      #include <linux/prctl.h>
      #include <linux/init.h>
      #include <linux/highuid.h>
      
      #include <asm/uaccess.h>
      #include <asm/io.h>
      
      /*
       * this is where the system-wide overflow UID and GID are defined, for
       * architectures that now have 32-bit UID/GID but didn't in the past
       */
      
      int overflowuid = DEFAULT_OVERFLOWUID;
      int overflowgid = DEFAULT_OVERFLOWGID;
      
      /*
       * the same as above, but for filesystems which can only store a 16-bit
       * UID and GID. as such, this is needed on all architectures
       */
      
      int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
      int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
      
      /*
       * this indicates whether you can reboot with ctrl-alt-del: the default is yes
       */
      
      int C_A_D = 1;
      
      
      /*
       *	Notifier list for kernel code which wants to be called
       *	at shutdown. This is used to stop any idling DMA operations
       *	and the like. 
       */
      
      static struct notifier_block *reboot_notifier_list;
      rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
      
      /**
       *	notifier_chain_register	- Add notifier to a notifier chain
       *	@list: Pointer to root list pointer
       *	@n: New entry in notifier chain
       *
       *	Adds a notifier to a notifier chain.
       *
       *	Currently always returns zero.
       */
       
  63  int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
      {
      	write_lock(¬ifier_lock);
  66  	while(*list)
      	{
  68  		if(n->priority > (*list)->priority)
  69  			break;
      		list= &((*list)->next);
      	}
      	n->next = *list;
      	*list=n;
  74  	write_unlock(¬ifier_lock);
  75  	return 0;
      }
      
      /**
       *	notifier_chain_unregister - Remove notifier from a notifier chain
       *	@nl: Pointer to root list pointer
       *	@n: New entry in notifier chain
       *
       *	Removes a notifier from a notifier chain.
       *
       *	Returns zero on success, or %-ENOENT on failure.
       */
       
  88  int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
      {
      	write_lock(¬ifier_lock);
  91  	while((*nl)!=NULL)
      	{
  93  		if((*nl)==n)
      		{
      			*nl=n->next;
  96  			write_unlock(¬ifier_lock);
  97  			return 0;
      		}
      		nl=&((*nl)->next);
      	}
 101  	write_unlock(¬ifier_lock);
 102  	return -ENOENT;
      }
      
      /**
       *	notifier_call_chain - Call functions in a notifier chain
       *	@n: Pointer to root pointer of notifier chain
       *	@val: Value passed unmodified to notifier function
       *	@v: Pointer passed unmodified to notifier function
       *
       *	Calls each function in a notifier chain in turn.
       *
       *	If the return value of the notifier can be and'd
       *	with %NOTIFY_STOP_MASK, then notifier_call_chain
       *	will return immediately, with the return value of
       *	the notifier function which halted execution.
       *	Otherwise, the return value is the return value
       *	of the last notifier function called.
       */
       
 121  int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
      {
      	int ret=NOTIFY_DONE;
      	struct notifier_block *nb = *n;
      
 126  	while(nb)
      	{
      		ret=nb->notifier_call(nb,val,v);
 129  		if(ret&NOTIFY_STOP_MASK)
      		{
 131  			return ret;
      		}
      		nb=nb->next;
      	}
 135  	return ret;
      }
      
      /**
       *	register_reboot_notifier - Register function to be called at reboot time
       *	@nb: Info about notifier function to be called
       *
       *	Registers a function with the list of functions
       *	to be called at reboot time.
       *
       *	Currently always returns zero, as notifier_chain_register
       *	always returns zero.
       */
       
 149  int register_reboot_notifier(struct notifier_block * nb)
      {
 151  	return notifier_chain_register(&reboot_notifier_list, nb);
      }
      
      /**
       *	unregister_reboot_notifier - Unregister previously registered reboot notifier
       *	@nb: Hook to be unregistered
       *
       *	Unregisters a previously registered reboot
       *	notifier function.
       *
       *	Returns zero on success, or %-ENOENT on failure.
       */
       
 164  int unregister_reboot_notifier(struct notifier_block * nb)
      {
 166  	return notifier_chain_unregister(&reboot_notifier_list, nb);
      }
      
 169  asmlinkage long sys_ni_syscall(void)
      {
 171  	return -ENOSYS;
      }
      
 174  static int proc_sel(struct task_struct *p, int which, int who)
      {
 176  	if(p->pid)
      	{
 178  		switch (which) {
 179  			case PRIO_PROCESS:
 180  				if (!who && p == current)
 181  					return 1;
 182  				return(p->pid == who);
 183  			case PRIO_PGRP:
 184  				if (!who)
      					who = current->pgrp;
 186  				return(p->pgrp == who);
 187  			case PRIO_USER:
 188  				if (!who)
      					who = current->uid;
 190  				return(p->uid == who);
      		}
      	}
 193  	return 0;
      }
      
 196  asmlinkage long sys_setpriority(int which, int who, int niceval)
      {
      	struct task_struct *p;
      	int error;
      
 201  	if (which > 2 || which < 0)
 202  		return -EINVAL;
      
      	/* normalize: avoid signed division (rounding problems) */
      	error = -ESRCH;
 206  	if (niceval < -20)
      		niceval = -20;
 208  	if (niceval > 19)
      		niceval = 19;
      
      	read_lock(&tasklist_lock);
 212  	for_each_task(p) {
 213  		if (!proc_sel(p, which, who))
 214  			continue;
      		if (p->uid != current->euid &&
 216  			p->uid != current->uid && !capable(CAP_SYS_NICE)) {
      			error = -EPERM;
 218  			continue;
      		}
 220  		if (error == -ESRCH)
      			error = 0;
 222  		if (niceval < p->nice && !capable(CAP_SYS_NICE))
      			error = -EACCES;
 224  		else
      			p->nice = niceval;
      	}
 227  	read_unlock(&tasklist_lock);
      
 229  	return error;
      }
      
      /*
       * Ugh. To avoid negative return values, "getpriority()" will
       * not return the normal nice-value, but a negated value that
       * has been offset by 20 (ie it returns 40..1 instead of -20..19)
       * to stay compatible.
       */
 238  asmlinkage long sys_getpriority(int which, int who)
      {
      	struct task_struct *p;
      	long retval = -ESRCH;
      
 243  	if (which > 2 || which < 0)
 244  		return -EINVAL;
      
      	read_lock(&tasklist_lock);
 247  	for_each_task (p) {
      		long niceval;
 249  		if (!proc_sel(p, which, who))
 250  			continue;
      		niceval = 20 - p->nice;
 252  		if (niceval > retval)
      			retval = niceval;
      	}
 255  	read_unlock(&tasklist_lock);
      
 257  	return retval;
      }
      
      
      /*
       * Reboot system call: for obvious reasons only root may call it,
       * and even root needs to set up some magic numbers in the registers
       * so that some mistake won't make this reboot the whole machine.
       * You can also set the meaning of the ctrl-alt-del-key here.
       *
       * reboot doesn't sync: do that yourself before calling this.
       */
 269  asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
      {
      	char buffer[256];
      
      	/* We only trust the superuser with rebooting the system. */
 274  	if (!capable(CAP_SYS_BOOT))
 275  		return -EPERM;
      
      	/* For safety, we require "magic" arguments. */
      	if (magic1 != LINUX_REBOOT_MAGIC1 ||
      	    (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
 280  			magic2 != LINUX_REBOOT_MAGIC2B))
 281  		return -EINVAL;
      
 283  	lock_kernel();
 284  	switch (cmd) {
 285  	case LINUX_REBOOT_CMD_RESTART:
      		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
      		printk(KERN_EMERG "Restarting system.\n");
      		machine_restart(NULL);
 289  		break;
      
 291  	case LINUX_REBOOT_CMD_CAD_ON:
      		C_A_D = 1;
 293  		break;
      
 295  	case LINUX_REBOOT_CMD_CAD_OFF:
      		C_A_D = 0;
 297  		break;
      
 299  	case LINUX_REBOOT_CMD_HALT:
      		notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
      		printk(KERN_EMERG "System halted.\n");
      		machine_halt();
      		do_exit(0);
 304  		break;
      
 306  	case LINUX_REBOOT_CMD_POWER_OFF:
      		notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
      		printk(KERN_EMERG "Power down.\n");
      		machine_power_off();
      		do_exit(0);
 311  		break;
      
 313  	case LINUX_REBOOT_CMD_RESTART2:
 314  		if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
 315  			unlock_kernel();
 316  			return -EFAULT;
      		}
      		buffer[sizeof(buffer) - 1] = '\0';
      
      		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
      		printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
      		machine_restart(buffer);
 323  		break;
      
 325  	default:
 326  		unlock_kernel();
 327  		return -EINVAL;
      	}
 329  	unlock_kernel();
 330  	return 0;
      }
      
      /*
       * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
       * As it's called within an interrupt, it may NOT sync: the only choice
       * is whether to reboot at once, or just ignore the ctrl-alt-del.
       */
 338  void ctrl_alt_del(void)
      {
 340  	if (C_A_D) {
      		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
      		machine_restart(NULL);
 343  	} else
      		kill_proc(1, SIGINT, 1);
      }
      	
      
      /*
       * Unprivileged users may change the real gid to the effective gid
       * or vice versa.  (BSD-style)
       *
       * If you set the real gid at all, or set the effective gid to a value not
       * equal to the real gid, then the saved gid is set to the new effective gid.
       *
       * This makes it possible for a setgid program to completely drop its
       * privileges, which is often a useful assertion to make when you are doing
       * a security audit over a program.
       *
       * The general idea is that a program which uses just setregid() will be
       * 100% compatible with BSD.  A program which uses just setgid() will be
       * 100% compatible with POSIX with saved IDs. 
       *
       * SMP: There are not races, the GIDs are checked only by filesystem
       *      operations (as far as semantic preservation is concerned).
       */
 366  asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
      {
      	int old_rgid = current->gid;
      	int old_egid = current->egid;
      
 371  	if (rgid != (gid_t) -1) {
      		if ((old_rgid == rgid) ||
      		    (current->egid==rgid) ||
 374  		    capable(CAP_SETGID))
      			current->gid = rgid;
 376  		else
 377  			return -EPERM;
      	}
 379  	if (egid != (gid_t) -1) {
      		if ((old_rgid == egid) ||
      		    (current->egid == egid) ||
      		    (current->sgid == egid) ||
 383  		    capable(CAP_SETGID))
      			current->fsgid = current->egid = egid;
 385  		else {
      			current->gid = old_rgid;
 387  			return -EPERM;
      		}
      	}
      	if (rgid != (gid_t) -1 ||
 391  	    (egid != (gid_t) -1 && egid != old_rgid))
      		current->sgid = current->egid;
      	current->fsgid = current->egid;
 394  	if (current->egid != old_egid)
      		current->dumpable = 0;
 396  	return 0;
      }
      
      /*
       * setgid() is implemented like SysV w/ SAVED_IDS 
       *
       * SMP: Same implicit races as above.
       */
 404  asmlinkage long sys_setgid(gid_t gid)
      {
      	int old_egid = current->egid;
      
 408  	if (capable(CAP_SETGID))
      		current->gid = current->egid = current->sgid = current->fsgid = gid;
 410  	else if ((gid == current->gid) || (gid == current->sgid))
      		current->egid = current->fsgid = gid;
 412  	else
 413  		return -EPERM;
      
 415  	if (current->egid != old_egid)
      		current->dumpable = 0;
 417  	return 0;
      }
        
      /* 
       * cap_emulate_setxuid() fixes the effective / permitted capabilities of
       * a process after a call to setuid, setreuid, or setresuid.
       *
       *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
       *  {r,e,s}uid != 0, the permitted and effective capabilities are
       *  cleared.
       *
       *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
       *  capabilities of the process are cleared.
       *
       *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
       *  capabilities are set to the permitted capabilities.
       *
       *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 
       *  never happen.
       *
       *  -astor 
       *
       * cevans - New behaviour, Oct '99
       * A process may, via prctl(), elect to keep its capabilities when it
       * calls setuid() and switches away from uid==0. Both permitted and
       * effective sets will be retained.
       * Without this change, it was impossible for a daemon to drop only some
       * of its privilege. The call to setuid(!=0) would drop all privileges!
       * Keeping uid 0 is not an option because uid 0 owns too many vital
       * files..
       * Thanks to Olaf Kirch and Peter Benie for spotting this.
       */
 449  extern inline void cap_emulate_setxuid(int old_ruid, int old_euid, 
      				       int old_suid)
      {
      	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
      	    (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
 454  	    !current->keep_capabilities) {
 455  		cap_clear(current->cap_permitted);
 456  		cap_clear(current->cap_effective);
      	}
 458  	if (old_euid == 0 && current->euid != 0) {
 459  		cap_clear(current->cap_effective);
      	}
 461  	if (old_euid != 0 && current->euid == 0) {
      		current->cap_effective = current->cap_permitted;
      	}
      }
      
 466  static int set_user(uid_t new_ruid)
      {
      	struct user_struct *new_user, *old_user;
      
      	/* What if a process setreuid()'s and this brings the
      	 * new uid over his NPROC rlimit?  We can check this now
      	 * cheaply with the new uid cache, so if it matters
      	 * we should be checking for it.  -DaveM
      	 */
      	new_user = alloc_uid(new_ruid);
 476  	if (!new_user)
 477  		return -EAGAIN;
      	old_user = current->user;
      	atomic_dec(&old_user->processes);
      	atomic_inc(&new_user->processes);
      
      	current->uid = new_ruid;
      	current->user = new_user;
      	free_uid(old_user);
 485  	return 0;
      }
      
      /*
       * Unprivileged users may change the real uid to the effective uid
       * or vice versa.  (BSD-style)
       *
       * If you set the real uid at all, or set the effective uid to a value not
       * equal to the real uid, then the saved uid is set to the new effective uid.
       *
       * This makes it possible for a setuid program to completely drop its
       * privileges, which is often a useful assertion to make when you are doing
       * a security audit over a program.
       *
       * The general idea is that a program which uses just setreuid() will be
       * 100% compatible with BSD.  A program which uses just setuid() will be
       * 100% compatible with POSIX with saved IDs. 
       */
 503  asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
      {
      	int old_ruid, old_euid, old_suid, new_ruid, new_euid;
      
      	new_ruid = old_ruid = current->uid;
      	new_euid = old_euid = current->euid;
      	old_suid = current->suid;
      
 511  	if (ruid != (uid_t) -1) {
      		new_ruid = ruid;
      		if ((old_ruid != ruid) &&
      		    (current->euid != ruid) &&
 515  		    !capable(CAP_SETUID))
 516  			return -EPERM;
      	}
      
 519  	if (euid != (uid_t) -1) {
      		new_euid = euid;
      		if ((old_ruid != euid) &&
      		    (current->euid != euid) &&
      		    (current->suid != euid) &&
 524  		    !capable(CAP_SETUID))
 525  			return -EPERM;
      	}
      
 528  	if (new_ruid != old_ruid && set_user(new_ruid) < 0)
 529  		return -EAGAIN;
      
      	current->fsuid = current->euid = new_euid;
      	if (ruid != (uid_t) -1 ||
 533  	    (euid != (uid_t) -1 && euid != old_ruid))
      		current->suid = current->euid;
      	current->fsuid = current->euid;
 536  	if (current->euid != old_euid)
      		current->dumpable = 0;
      
 539  	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
      		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
      	}
      
 543  	return 0;
      }
      
      
      		
      /*
       * setuid() is implemented like SysV with SAVED_IDS 
       * 
       * Note that SAVED_ID's is deficient in that a setuid root program
       * like sendmail, for example, cannot set its uid to be a normal 
       * user and then switch back, because if you're root, setuid() sets
       * the saved uid too.  If you don't like this, blame the bright people
       * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
       * will allow a root program to temporarily drop privileges and be able to
       * regain them by swapping the real and effective uid.  
       */
 559  asmlinkage long sys_setuid(uid_t uid)
      {
      	int old_euid = current->euid;
      	int old_ruid, old_suid, new_ruid;
      
      	old_ruid = new_ruid = current->uid;
      	old_suid = current->suid;
 566  	if (capable(CAP_SETUID)) {
 567  		if (uid != old_ruid && set_user(uid) < 0)
 568  			return -EAGAIN;
      		current->suid = uid;
 570  	} else if ((uid != current->uid) && (uid != current->suid))
 571  		return -EPERM;
      
      	current->fsuid = current->euid = uid;
      
 575  	if (old_euid != uid)
      		current->dumpable = 0;
      
 578  	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
      		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
      	}
      
 582  	return 0;
      }
      
      
      /*
       * This function implements a generic ability to update ruid, euid,
       * and suid.  This allows you to implement the 4.4 compatible seteuid().
       */
 590  asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
      {
      	int old_ruid = current->uid;
      	int old_euid = current->euid;
      	int old_suid = current->suid;
      
 596  	if (!capable(CAP_SETUID)) {
      		if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
 598  		    (ruid != current->euid) && (ruid != current->suid))
 599  			return -EPERM;
      		if ((euid != (uid_t) -1) && (euid != current->uid) &&
 601  		    (euid != current->euid) && (euid != current->suid))
 602  			return -EPERM;
      		if ((suid != (uid_t) -1) && (suid != current->uid) &&
 604  		    (suid != current->euid) && (suid != current->suid))
 605  			return -EPERM;
      	}
 607  	if (ruid != (uid_t) -1) {
 608  		if (ruid != current->uid && set_user(ruid) < 0)
 609  			return -EAGAIN;
      	}
 611  	if (euid != (uid_t) -1) {
 612  		if (euid != current->euid)
      			current->dumpable = 0;
      		current->euid = euid;
      		current->fsuid = euid;
      	}
 617  	if (suid != (uid_t) -1)
      		current->suid = suid;
      
 620  	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
      		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
      	}
      
 624  	return 0;
      }
      
 627  asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
      {
      	int retval;
      
      	if (!(retval = put_user(current->uid, ruid)) &&
 632  	    !(retval = put_user(current->euid, euid)))
      		retval = put_user(current->suid, suid);
      
 635  	return retval;
      }
      
      /*
       * Same as above, but for rgid, egid, sgid.
       */
 641  asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
      {
 643         if (!capable(CAP_SETGID)) {
      		if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
 645  		    (rgid != current->egid) && (rgid != current->sgid))
 646  			return -EPERM;
      		if ((egid != (gid_t) -1) && (egid != current->gid) &&
 648  		    (egid != current->egid) && (egid != current->sgid))
 649  			return -EPERM;
      		if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
 651  		    (sgid != current->egid) && (sgid != current->sgid))
 652  			return -EPERM;
      	}
 654  	if (rgid != (gid_t) -1)
      		current->gid = rgid;
 656  	if (egid != (gid_t) -1) {
 657  		if (egid != current->egid)
      			current->dumpable = 0;
      		current->egid = egid;
      		current->fsgid = egid;
      	}
 662  	if (sgid != (gid_t) -1)
      		current->sgid = sgid;
 664  	return 0;
      }
      
 667  asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
      {
      	int retval;
      
      	if (!(retval = put_user(current->gid, rgid)) &&
 672  	    !(retval = put_user(current->egid, egid)))
      		retval = put_user(current->sgid, sgid);
      
 675  	return retval;
      }
      
      
      /*
       * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
       * is used for "access()" and for the NFS daemon (letting nfsd stay at
       * whatever uid it wants to). It normally shadows "euid", except when
       * explicitly set by setfsuid() or for access..
       */
 685  asmlinkage long sys_setfsuid(uid_t uid)
      {
      	int old_fsuid;
      
      	old_fsuid = current->fsuid;
      	if (uid == current->uid || uid == current->euid ||
      	    uid == current->suid || uid == current->fsuid || 
 692  	    capable(CAP_SETUID))
      		current->fsuid = uid;
 694  	if (current->fsuid != old_fsuid)
      		current->dumpable = 0;
      
      	/* We emulate fsuid by essentially doing a scaled-down version
      	 * of what we did in setresuid and friends. However, we only
      	 * operate on the fs-specific bits of the process' effective
      	 * capabilities 
      	 *
      	 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
      	 *          if not, we might be a bit too harsh here.
      	 */
      	
 706  	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 707  		if (old_fsuid == 0 && current->fsuid != 0) {
      			cap_t(current->cap_effective) &= ~CAP_FS_MASK;
      		}
 710  		if (old_fsuid != 0 && current->fsuid == 0) {
      			cap_t(current->cap_effective) |=
      				(cap_t(current->cap_permitted) & CAP_FS_MASK);
      		}
      	}
      
 716  	return old_fsuid;
      }
      
      /*
       * Samma p� svenska..
       */
 722  asmlinkage long sys_setfsgid(gid_t gid)
      {
      	int old_fsgid;
      
      	old_fsgid = current->fsgid;
      	if (gid == current->gid || gid == current->egid ||
      	    gid == current->sgid || gid == current->fsgid || 
 729  	    capable(CAP_SETGID))
      		current->fsgid = gid;
 731  	if (current->fsgid != old_fsgid)
      		current->dumpable = 0;
      
 734  	return old_fsgid;
      }
      
 737  asmlinkage long sys_times(struct tms * tbuf)
      {
      	/*
      	 *	In the SMP world we might just be unlucky and have one of
      	 *	the times increment as we use it. Since the value is an
      	 *	atomically safe type this is just fine. Conceptually its
      	 *	as if the syscall took an instant longer to occur.
      	 */
 745  	if (tbuf)
 746  		if (copy_to_user(tbuf, ¤t->times, sizeof(struct tms)))
 747  			return -EFAULT;
 748  	return jiffies;
      }
      
      /*
       * This needs some heavy checking ...
       * I just haven't the stomach for it. I also don't fully
       * understand sessions/pgrp etc. Let somebody who does explain it.
       *
       * OK, I think I have the protection semantics right.... this is really
       * only important on a multi-user system anyway, to make sure one user
       * can't send a signal to a process owned by another.  -TYT, 12/12/91
       *
       * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
       * LBT 04.03.94
       */
      
 764  asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
      {
      	struct task_struct * p;
      	int err = -EINVAL;
      
 769  	if (!pid)
      		pid = current->pid;
 771  	if (!pgid)
      		pgid = pid;
 773  	if (pgid < 0)
 774  		return -EINVAL;
      
      	/* From this point forward we keep holding onto the tasklist lock
      	 * so that our parent does not change from under us. -DaveM
      	 */
      	read_lock(&tasklist_lock);
      
      	err = -ESRCH;
      	p = find_task_by_pid(pid);
 783  	if (!p)
 784  		goto out;
      
 786  	if (p->p_pptr == current || p->p_opptr == current) {
      		err = -EPERM;
 788  		if (p->session != current->session)
 789  			goto out;
      		err = -EACCES;
 791  		if (p->did_exec)
 792  			goto out;
 793  	} else if (p != current)
 794  		goto out;
      	err = -EPERM;
 796  	if (p->leader)
 797  		goto out;
 798  	if (pgid != pid) {
      		struct task_struct * tmp;
 800  		for_each_task (tmp) {
      			if (tmp->pgrp == pgid &&
 802  			    tmp->session == current->session)
 803  				goto ok_pgid;
      		}
 805  		goto out;
      	}
      
      ok_pgid:
      	p->pgrp = pgid;
      	err = 0;
      out:
      	/* All paths lead to here, thus we are safe. -DaveM */
 813  	read_unlock(&tasklist_lock);
 814  	return err;
      }
      
 817  asmlinkage long sys_getpgid(pid_t pid)
      {
 819  	if (!pid) {
 820  		return current->pgrp;
 821  	} else {
      		int retval;
      		struct task_struct *p;
      
      		read_lock(&tasklist_lock);
      		p = find_task_by_pid(pid);
      
      		retval = -ESRCH;
 829  		if (p)
      			retval = p->pgrp;
 831  		read_unlock(&tasklist_lock);
 832  		return retval;
      	}
      }
      
 836  asmlinkage long sys_getpgrp(void)
      {
      	/* SMP - assuming writes are word atomic this is fine */
 839  	return current->pgrp;
      }
      
 842  asmlinkage long sys_getsid(pid_t pid)
      {
 844  	if (!pid) {
 845  		return current->session;
 846  	} else {
      		int retval;
      		struct task_struct *p;
      
      		read_lock(&tasklist_lock);
      		p = find_task_by_pid(pid);
      
      		retval = -ESRCH;
 854  		if(p)
      			retval = p->session;
 856  		read_unlock(&tasklist_lock);
 857  		return retval;
      	}
      }
      
 861  asmlinkage long sys_setsid(void)
      {
      	struct task_struct * p;
      	int err = -EPERM;
      
      	read_lock(&tasklist_lock);
 867  	for_each_task(p) {
 868  		if (p->pgrp == current->pid)
 869  			goto out;
      	}
      
      	current->leader = 1;
      	current->session = current->pgrp = current->pid;
      	current->tty = NULL;
      	current->tty_old_pgrp = 0;
      	err = current->pgrp;
      out:
 878  	read_unlock(&tasklist_lock);
 879  	return err;
      }
      
      /*
       * Supplementary group IDs
       */
 885  asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
      {
      	int i;
      	
      	/*
      	 *	SMP: Nobody else can change our grouplist. Thus we are
      	 *	safe.
      	 */
      
 894  	if (gidsetsize < 0)
 895  		return -EINVAL;
      	i = current->ngroups;
 897  	if (gidsetsize) {
 898  		if (i > gidsetsize)
 899  			return -EINVAL;
 900  		if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
 901  			return -EFAULT;
      	}
 903  	return i;
      }
      
      /*
       *	SMP: Our groups are not shared. We can copy to/from them safely
       *	without another task interfering.
       */
       
 911  asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
      {
 913  	if (!capable(CAP_SETGID))
 914  		return -EPERM;
 915  	if ((unsigned) gidsetsize > NGROUPS)
 916  		return -EINVAL;
 917  	if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
 918  		return -EFAULT;
      	current->ngroups = gidsetsize;
 920  	return 0;
      }
      
 923  static int supplemental_group_member(gid_t grp)
      {
      	int i = current->ngroups;
      
 927  	if (i) {
      		gid_t *groups = current->groups;
 929  		do {
 930  			if (*groups == grp)
 931  				return 1;
      			groups++;
      			i--;
 934  		} while (i);
      	}
 936  	return 0;
      }
      
      /*
       * Check whether we're fsgid/egid or in the supplemental group..
       */
 942  int in_group_p(gid_t grp)
      {
      	int retval = 1;
 945  	if (grp != current->fsgid)
      		retval = supplemental_group_member(grp);
 947  	return retval;
      }
      
 950  int in_egroup_p(gid_t grp)
      {
      	int retval = 1;
 953  	if (grp != current->egid)
      		retval = supplemental_group_member(grp);
 955  	return retval;
      }
      
      DECLARE_RWSEM(uts_sem);
      
 960  asmlinkage long sys_newuname(struct new_utsname * name)
      {
      	int errno = 0;
      
      	down_read(&uts_sem);
 965  	if (copy_to_user(name,&system_utsname,sizeof *name))
      		errno = -EFAULT;
      	up_read(&uts_sem);
 968  	return errno;
      }
      
 971  asmlinkage long sys_sethostname(char *name, int len)
      {
      	int errno;
      
 975  	if (!capable(CAP_SYS_ADMIN))
 976  		return -EPERM;
 977  	if (len < 0 || len > __NEW_UTS_LEN)
 978  		return -EINVAL;
      	down_write(&uts_sem);
      	errno = -EFAULT;
 981  	if (!copy_from_user(system_utsname.nodename, name, len)) {
      		system_utsname.nodename[len] = 0;
      		errno = 0;
      	}
      	up_write(&uts_sem);
 986  	return errno;
      }
      
 989  asmlinkage long sys_gethostname(char *name, int len)
      {
      	int i, errno;
      
 993  	if (len < 0)
 994  		return -EINVAL;
      	down_read(&uts_sem);
      	i = 1 + strlen(system_utsname.nodename);
 997  	if (i > len)
      		i = len;
      	errno = 0;
1000  	if (copy_to_user(name, system_utsname.nodename, i))
      		errno = -EFAULT;
      	up_read(&uts_sem);
1003  	return errno;
      }
      
      /*
       * Only setdomainname; getdomainname can be implemented by calling
       * uname()
       */
1010  asmlinkage long sys_setdomainname(char *name, int len)
      {
      	int errno;
      
1014  	if (!capable(CAP_SYS_ADMIN))
1015  		return -EPERM;
1016  	if (len < 0 || len > __NEW_UTS_LEN)
1017  		return -EINVAL;
      
      	down_write(&uts_sem);
      	errno = -EFAULT;
1021  	if (!copy_from_user(system_utsname.domainname, name, len)) {
      		errno = 0;
      		system_utsname.domainname[len] = 0;
      	}
      	up_write(&uts_sem);
1026  	return errno;
      }
      
1029  asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
      {
1031  	if (resource >= RLIM_NLIMITS)
1032  		return -EINVAL;
1033  	else
      		return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
1035  			? -EFAULT : 0;
      }
      
      #if !defined(__ia64__) && !defined(__s390__)
      
      /*
       *	Back compatibility for getrlimit. Needed for some apps.
       */
       
1044  asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
      {
      	struct rlimit x;
1047  	if (resource >= RLIM_NLIMITS)
1048  		return -EINVAL;
      
      	memcpy(&x, current->rlim + resource, sizeof(*rlim));
1051  	if(x.rlim_cur > 0x7FFFFFFF)
      		x.rlim_cur = 0x7FFFFFFF;
1053  	if(x.rlim_max > 0x7FFFFFFF)
      		x.rlim_max = 0x7FFFFFFF;
1055  	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
      }
      
      #endif
      
1060  asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
      {
      	struct rlimit new_rlim, *old_rlim;
      
1064  	if (resource >= RLIM_NLIMITS)
1065  		return -EINVAL;
1066  	if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1067  		return -EFAULT;
1068  	if (new_rlim.rlim_cur < 0 || new_rlim.rlim_max < 0)
1069  		return -EINVAL;
      	old_rlim = current->rlim + resource;
      	if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
      	     (new_rlim.rlim_max > old_rlim->rlim_max)) &&
1073  	    !capable(CAP_SYS_RESOURCE))
1074  		return -EPERM;
1075  	if (resource == RLIMIT_NOFILE) {
1076  		if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
1077  			return -EPERM;
      	}
      	*old_rlim = new_rlim;
1080  	return 0;
      }
      
      /*
       * It would make sense to put struct rusage in the task_struct,
       * except that would make the task_struct be *really big*.  After
       * task_struct gets moved into malloc'ed memory, it would
       * make sense to do this.  It will make moving the rest of the information
       * a lot simpler!  (Which we're not doing right now because we're not
       * measuring them yet).
       *
       * This is SMP safe.  Either we are called from sys_getrusage on ourselves
       * below (we know we aren't going to exit/disappear and only we change our
       * rusage counters), or we are called from wait4() on a process which is
       * either stopped or zombied.  In the zombied case the task won't get
       * reaped till shortly after the call to getrusage(), in both cases the
       * task being examined is in a frozen state so the counters won't change.
       *
       * FIXME! Get the fault counts properly!
       */
1100  int getrusage(struct task_struct *p, int who, struct rusage *ru)
      {
      	struct rusage r;
      
      	memset((char *) &r, 0, sizeof(r));
1105  	switch (who) {
1106  		case RUSAGE_SELF:
      			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
      			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
      			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
      			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
      			r.ru_minflt = p->min_flt;
      			r.ru_majflt = p->maj_flt;
      			r.ru_nswap = p->nswap;
1114  			break;
1115  		case RUSAGE_CHILDREN:
      			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
      			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
      			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
      			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
      			r.ru_minflt = p->cmin_flt;
      			r.ru_majflt = p->cmaj_flt;
      			r.ru_nswap = p->cnswap;
1123  			break;
1124  		default:
      			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
      			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
      			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
      			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
      			r.ru_minflt = p->min_flt + p->cmin_flt;
      			r.ru_majflt = p->maj_flt + p->cmaj_flt;
      			r.ru_nswap = p->nswap + p->cnswap;
1132  			break;
      	}
1134  	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
      }
      
1137  asmlinkage long sys_getrusage(int who, struct rusage *ru)
      {
1139  	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1140  		return -EINVAL;
1141  	return getrusage(current, who, ru);
      }
      
1144  asmlinkage long sys_umask(int mask)
      {
      	mask = xchg(¤t->fs->umask, mask & S_IRWXUGO);
1147  	return mask;
      }
          
1150  asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
      			  unsigned long arg4, unsigned long arg5)
      {
      	int error = 0;
      	int sig;
      
1156  	switch (option) {
1157  		case PR_SET_PDEATHSIG:
      			sig = arg2;
1159  			if (sig > _NSIG) {
      				error = -EINVAL;
1161  				break;
      			}
      			current->pdeath_signal = sig;
1164  			break;
1165  		case PR_GET_PDEATHSIG:
      			error = put_user(current->pdeath_signal, (int *)arg2);
1167  			break;
1168  		case PR_GET_DUMPABLE:
1169  			if (current->dumpable)
      				error = 1;
1171  			break;
1172  		case PR_SET_DUMPABLE:
1173  			if (arg2 != 0 && arg2 != 1) {
      				error = -EINVAL;
1175  				break;
      			}
      			current->dumpable = arg2;
1178  			break;
1179  	        case PR_SET_UNALIGN:
      #ifdef SET_UNALIGN_CTL
      			error = SET_UNALIGN_CTL(current, arg2);
      #else
      			error = -EINVAL;
      #endif
1185  			break;
      
1187  	        case PR_GET_UNALIGN:
      #ifdef GET_UNALIGN_CTL
      			error = GET_UNALIGN_CTL(current, arg2);
      #else
      			error = -EINVAL;
      #endif
1193  			break;
      
1195  		case PR_GET_KEEPCAPS:
1196  			if (current->keep_capabilities)
      				error = 1;
1198  			break;
1199  		case PR_SET_KEEPCAPS:
1200  			if (arg2 != 0 && arg2 != 1) {
      				error = -EINVAL;
1202  				break;
      			}
      			current->keep_capabilities = arg2;
1205  			break;
1206  		default:
      			error = -EINVAL;
1208  			break;
      	}
1210  	return error;
      }
      
      EXPORT_SYMBOL(notifier_chain_register);
      EXPORT_SYMBOL(notifier_chain_unregister);
      EXPORT_SYMBOL(notifier_call_chain);
      EXPORT_SYMBOL(register_reboot_notifier);
      EXPORT_SYMBOL(unregister_reboot_notifier);
      EXPORT_SYMBOL(in_group_p);
      EXPORT_SYMBOL(in_egroup_p);