./kernel/exit.c

      /*
       *  linux/kernel/exit.c
       *
       *  Copyright (C) 1991, 1992  Linus Torvalds
       */
      
      #include <linux/config.h>
      #include <linux/malloc.h>
      #include <linux/interrupt.h>
      #include <linux/smp_lock.h>
      #include <linux/module.h>
      #ifdef CONFIG_BSD_PROCESS_ACCT
      #include <linux/acct.h>
      #endif
      
      #include <asm/uaccess.h>
      #include <asm/pgtable.h>
      #include <asm/mmu_context.h>
      
      extern void sem_exit (void);
      extern struct task_struct *child_reaper;
      
      int getrusage(struct task_struct *, int, struct rusage *);
      
  25  static void release_task(struct task_struct * p)
      {
  27  	if (p != current) {
      #ifdef CONFIG_SMP
      		/*
      		 * Wait to make sure the process isn't on the
      		 * runqueue (active on some other CPU still)
      		 */
      		for (;;) {
      			task_lock(p);
      			if (!p->has_cpu)
      				break;
      			task_unlock(p);
      			do {
      				barrier();
      			} while (p->has_cpu);
      		}
      		task_unlock(p);
      #endif
      		atomic_dec(&p->user->processes);
      		free_uid(p->user);
      		unhash_process(p);
      
      		release_thread(p);
      		current->cmin_flt += p->min_flt + p->cmin_flt;
      		current->cmaj_flt += p->maj_flt + p->cmaj_flt;
      		current->cnswap += p->nswap + p->cnswap;
      		/*
      		 * Potentially available timeslices are retrieved
      		 * here - this way the parent does not get penalized
      		 * for creating too many processes.
      		 *
      		 * (this cannot be used to artificially 'generate'
      		 * timeslices, because any timeslice recovered here
      		 * was given away by the parent in the first place.)
      		 */
      		current->counter += p->counter;
  62  		if (current->counter >= MAX_COUNTER)
      			current->counter = MAX_COUNTER;
      		free_task_struct(p);
  65  	} else {
      		printk("task releasing itself\n");
      	}
      }
      
      /*
       * This checks not only the pgrp, but falls back on the pid if no
       * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
       * without this...
       */
  75  int session_of_pgrp(int pgrp)
      {
      	struct task_struct *p;
      	int fallback;
      
      	fallback = -1;
      	read_lock(&tasklist_lock);
  82  	for_each_task(p) {
  83   		if (p->session <= 0)
  84   			continue;
  85  		if (p->pgrp == pgrp) {
      			fallback = p->session;
  87  			break;
      		}
  89  		if (p->pid == pgrp)
      			fallback = p->session;
      	}
  92  	read_unlock(&tasklist_lock);
  93  	return fallback;
      }
      
      /*
       * Determine if a process group is "orphaned", according to the POSIX
       * definition in 2.2.2.52.  Orphaned process groups are not to be affected
       * by terminal-generated stop signals.  Newly orphaned process groups are
       * to receive a SIGHUP and a SIGCONT.
       *
       * "I ask you, have you ever known what it is to be an orphan?"
       */
 104  static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
      {
      	struct task_struct *p;
      
      	read_lock(&tasklist_lock);
 109  	for_each_task(p) {
      		if ((p == ignored_task) || (p->pgrp != pgrp) ||
      		    (p->state == TASK_ZOMBIE) ||
 112  		    (p->p_pptr->pid == 1))
 113  			continue;
      		if ((p->p_pptr->pgrp != pgrp) &&
 115  		    (p->p_pptr->session == p->session)) {
 116  			read_unlock(&tasklist_lock);
 117   			return 0;
      		}
      	}
 120  	read_unlock(&tasklist_lock);
 121  	return 1;	/* (sighing) "Often!" */
      }
      
 124  int is_orphaned_pgrp(int pgrp)
      {
 126  	return will_become_orphaned_pgrp(pgrp, 0);
      }
      
 129  static inline int has_stopped_jobs(int pgrp)
      {
      	int retval = 0;
      	struct task_struct * p;
      
      	read_lock(&tasklist_lock);
 135  	for_each_task(p) {
 136  		if (p->pgrp != pgrp)
 137  			continue;
 138  		if (p->state != TASK_STOPPED)
 139  			continue;
      		retval = 1;
 141  		break;
      	}
 143  	read_unlock(&tasklist_lock);
 144  	return retval;
      }
      
      /*
       * When we die, we re-parent all our children.
       * Try to give them to another thread in our process
       * group, and if no such member exists, give it to
       * the global child reaper process (ie "init")
       */
 153  static inline void forget_original_parent(struct task_struct * father)
      {
      	struct task_struct * p, *reaper;
      
      	read_lock(&tasklist_lock);
      
      	/* Next in our thread group */
      	reaper = next_thread(father);
 161  	if (reaper == father)
      		reaper = child_reaper;
      
 164  	for_each_task(p) {
 165  		if (p->p_opptr == father) {
      			/* We dont want people slaying init */
      			p->exit_signal = SIGCHLD;
      			p->self_exec_id++;
      			p->p_opptr = reaper;
 170  			if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
      		}
      	}
 173  	read_unlock(&tasklist_lock);
      }
      
 176  static inline void close_files(struct files_struct * files)
      {
      	int i, j;
      
      	j = 0;
 181  	for (;;) {
      		unsigned long set;
      		i = j * __NFDBITS;
 184  		if (i >= files->max_fdset || i >= files->max_fds)
 185  			break;
      		set = files->open_fds->fds_bits[j++];
 187  		while (set) {
 188  			if (set & 1) {
      				struct file * file = xchg(&files->fd[i], NULL);
 190  				if (file)
      					filp_close(file, files);
      			}
      			i++;
      			set >>= 1;
      		}
      	}
      }
      
 199  void put_files_struct(struct files_struct *files)
      {
 201  	if (atomic_dec_and_test(&files->count)) {
      		close_files(files);
      		/*
      		 * Free the fd and fdset arrays if we expanded them.
      		 */
 206  		if (files->fd != &files->fd_array[0])
      			free_fd_array(files->fd, files->max_fds);
 208  		if (files->max_fdset > __FD_SETSIZE) {
      			free_fdset(files->open_fds, files->max_fdset);
      			free_fdset(files->close_on_exec, files->max_fdset);
      		}
      		kmem_cache_free(files_cachep, files);
      	}
      }
      
 216  static inline void __exit_files(struct task_struct *tsk)
      {
      	struct files_struct * files = tsk->files;
      
 220  	if (files) {
      		task_lock(tsk);
      		tsk->files = NULL;
      		task_unlock(tsk);
      		put_files_struct(files);
      	}
      }
      
 228  void exit_files(struct task_struct *tsk)
      {
      	__exit_files(tsk);
      }
      
 233  static inline void __put_fs_struct(struct fs_struct *fs)
      {
      	/* No need to hold fs->lock if we are killing it */
 236  	if (atomic_dec_and_test(&fs->count)) {
      		dput(fs->root);
      		mntput(fs->rootmnt);
      		dput(fs->pwd);
      		mntput(fs->pwdmnt);
 241  		if (fs->altroot) {
      			dput(fs->altroot);
      			mntput(fs->altrootmnt);
      		}
      		kmem_cache_free(fs_cachep, fs);
      	}
      }
      
 249  void put_fs_struct(struct fs_struct *fs)
      {
      	__put_fs_struct(fs);
      }
      
 254  static inline void __exit_fs(struct task_struct *tsk)
      {
      	struct fs_struct * fs = tsk->fs;
      
 258  	if (fs) {
      		task_lock(tsk);
      		tsk->fs = NULL;
      		task_unlock(tsk);
      		__put_fs_struct(fs);
      	}
      }
      
 266  void exit_fs(struct task_struct *tsk)
      {
      	__exit_fs(tsk);
      }
      
      /*
       * We can use these to temporarily drop into
       * "lazy TLB" mode and back.
       */
 275  struct mm_struct * start_lazy_tlb(void)
      {
      	struct mm_struct *mm = current->mm;
      	current->mm = NULL;
      	/* active_mm is still 'mm' */
      	atomic_inc(&mm->mm_count);
      	enter_lazy_tlb(mm, current, smp_processor_id());
 282  	return mm;
      }
      
 285  void end_lazy_tlb(struct mm_struct *mm)
      {
      	struct mm_struct *active_mm = current->active_mm;
      
      	current->mm = mm;
 290  	if (mm != active_mm) {
      		current->active_mm = mm;
      		activate_mm(active_mm, mm);
      	}
      	mmdrop(active_mm);
      }
      
      /*
       * Turn us into a lazy TLB process if we
       * aren't already..
       */
 301  static inline void __exit_mm(struct task_struct * tsk)
      {
      	struct mm_struct * mm = tsk->mm;
      
      	mm_release();
 306  	if (mm) {
      		atomic_inc(&mm->mm_count);
 308  		if (mm != tsk->active_mm) BUG();
      		/* more a memory barrier than a real lock */
      		task_lock(tsk);
      		tsk->mm = NULL;
      		task_unlock(tsk);
      		enter_lazy_tlb(mm, current, smp_processor_id());
      		mmput(mm);
      	}
      }
      
 318  void exit_mm(struct task_struct *tsk)
      {
      	__exit_mm(tsk);
      }
      
      /*
       * Send signals to all our closest relatives so that they know
       * to properly mourn us..
       */
 327  static void exit_notify(void)
      {
      	struct task_struct * p, *t;
      
      	forget_original_parent(current);
      	/*
      	 * Check to see if any process groups have become orphaned
      	 * as a result of our exiting, and if they have any stopped
      	 * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
      	 *
      	 * Case i: Our father is in a different pgrp than we are
      	 * and we were the only connection outside, so our pgrp
      	 * is about to become orphaned.
      	 */
      	 
      	t = current->p_pptr;
      	
      	if ((t->pgrp != current->pgrp) &&
      	    (t->session == current->session) &&
      	    will_become_orphaned_pgrp(current->pgrp, current) &&
 347  	    has_stopped_jobs(current->pgrp)) {
      		kill_pg(current->pgrp,SIGHUP,1);
      		kill_pg(current->pgrp,SIGCONT,1);
      	}
      
      	/* Let father know we died 
      	 *
      	 * Thread signals are configurable, but you aren't going to use
      	 * that to send signals to arbitary processes. 
      	 * That stops right now.
      	 *
      	 * If the parent exec id doesn't match the exec id we saved
      	 * when we started then we know the parent has changed security
      	 * domain.
      	 *
      	 * If our self_exec id doesn't match our parent_exec_id then
      	 * we have changed execution domain as these two values started
      	 * the same after a fork.
      	 *	
      	 */
      	
      	if(current->exit_signal != SIGCHLD &&
      	    ( current->parent_exec_id != t->self_exec_id  ||
      	      current->self_exec_id != current->parent_exec_id) 
 371  	    && !capable(CAP_KILL))
      		current->exit_signal = SIGCHLD;
      
      
      	/*
      	 * This loop does two things:
      	 *
        	 * A.  Make init inherit all the child processes
      	 * B.  Check to see if any process groups have become orphaned
      	 *	as a result of our exiting, and if they have any stopped
      	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
      	 */
      
 384  	write_lock_irq(&tasklist_lock);
      	current->state = TASK_ZOMBIE;
      	do_notify_parent(current, current->exit_signal);
 387  	while (current->p_cptr != NULL) {
      		p = current->p_cptr;
      		current->p_cptr = p->p_osptr;
      		p->p_ysptr = NULL;
      		p->ptrace = 0;
      
      		p->p_pptr = p->p_opptr;
      		p->p_osptr = p->p_pptr->p_cptr;
 395  		if (p->p_osptr)
      			p->p_osptr->p_ysptr = p;
      		p->p_pptr->p_cptr = p;
 398  		if (p->state == TASK_ZOMBIE)
      			do_notify_parent(p, p->exit_signal);
      		/*
      		 * process group orphan check
      		 * Case ii: Our child is in a different pgrp
      		 * than we are, and it was the only connection
      		 * outside, so the child pgrp is now orphaned.
      		 */
      		if ((p->pgrp != current->pgrp) &&
 407  		    (p->session == current->session)) {
      			int pgrp = p->pgrp;
      
 410  			write_unlock_irq(&tasklist_lock);
 411  			if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
      				kill_pg(pgrp,SIGHUP,1);
      				kill_pg(pgrp,SIGCONT,1);
      			}
 415  			write_lock_irq(&tasklist_lock);
      		}
      	}
 418  	write_unlock_irq(&tasklist_lock);
      }
      
 421  NORET_TYPE void do_exit(long code)
      {
      	struct task_struct *tsk = current;
      
 425  	if (in_interrupt())
      		panic("Aiee, killing interrupt handler!");
 427  	if (!tsk->pid)
      		panic("Attempted to kill the idle task!");
 429  	if (tsk->pid == 1)
      		panic("Attempted to kill init!");
      	tsk->flags |= PF_EXITING;
      	del_timer_sync(&tsk->real_timer);
      
      fake_volatile:
      #ifdef CONFIG_BSD_PROCESS_ACCT
      	acct_process(code);
      #endif
      	__exit_mm(tsk);
      
 440  	lock_kernel();
      	sem_exit();
      	__exit_files(tsk);
      	__exit_fs(tsk);
      	exit_sighand(tsk);
      	exit_thread();
      
 447  	if (current->leader)
      		disassociate_ctty(1);
      
 450  	put_exec_domain(tsk->exec_domain);
 451  	if (tsk->binfmt && tsk->binfmt->module)
      		__MOD_DEC_USE_COUNT(tsk->binfmt->module);
      
      	tsk->exit_code = code;
      	exit_notify();
      	schedule();
 457  	BUG();
      /*
       * In order to get rid of the "volatile function does return" message
       * I did this little loop that confuses gcc to think do_exit really
       * is volatile. In fact it's schedule() that is volatile in some
       * circumstances: when current->state = ZOMBIE, schedule() never
       * returns.
       *
       * In fact the natural way to do all this is to have the label and the
       * goto right after each other, but I put the fake_volatile label at
       * the start of the function just in case something /really/ bad
       * happens, and the schedule returns. This way we can try again. I'm
       * not paranoid: it's just that everybody is out to get me.
       */
 471  	goto fake_volatile;
      }
      
 474  NORET_TYPE void up_and_exit(struct semaphore *sem, long code)
      {
 476  	if (sem)
      		up(sem);
      	
      	do_exit(code);
      }
      
 482  asmlinkage long sys_exit(int error_code)
      {
      	do_exit((error_code&0xff)<<8);
      }
      
 487  asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
      {
      	int flag, retval;
      	DECLARE_WAITQUEUE(wait, current);
      	struct task_struct *tsk;
      
 493  	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
 494  		return -EINVAL;
      
      	add_wait_queue(¤t->wait_chldexit,&wait);
      repeat:
      	flag = 0;
      	current->state = TASK_INTERRUPTIBLE;
      	read_lock(&tasklist_lock);
      	tsk = current;
 502  	do {
      		struct task_struct *p;
 504  	 	for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
 505  			if (pid>0) {
 506  				if (p->pid != pid)
 507  					continue;
 508  			} else if (!pid) {
 509  				if (p->pgrp != current->pgrp)
 510  					continue;
 511  			} else if (pid != -1) {
 512  				if (p->pgrp != -pid)
 513  					continue;
      			}
      			/* Wait for all children (clone and not) if __WALL is set;
      			 * otherwise, wait for clone children *only* if __WCLONE is
      			 * set; otherwise, wait for non-clone children *only*.  (Note:
      			 * A "clone" child here is one that reports to its parent
      			 * using a signal other than SIGCHLD.) */
      			if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
 521  			    && !(options & __WALL))
 522  				continue;
      			flag = 1;
 524  			switch (p->state) {
 525  			case TASK_STOPPED:
 526  				if (!p->exit_code)
 527  					continue;
 528  				if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
 529  					continue;
 530  				read_unlock(&tasklist_lock);
      				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
 532  				if (!retval && stat_addr) 
      					retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
 534  				if (!retval) {
      					p->exit_code = 0;
      					retval = p->pid;
      				}
 538  				goto end_wait4;
 539  			case TASK_ZOMBIE:
      				current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
      				current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
 542  				read_unlock(&tasklist_lock);
      				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 544  				if (!retval && stat_addr)
      					retval = put_user(p->exit_code, stat_addr);
 546  				if (retval)
 547  					goto end_wait4; 
      				retval = p->pid;
 549  				if (p->p_opptr != p->p_pptr) {
 550  					write_lock_irq(&tasklist_lock);
 551  					REMOVE_LINKS(p);
      					p->p_pptr = p->p_opptr;
 553  					SET_LINKS(p);
      					do_notify_parent(p, SIGCHLD);
 555  					write_unlock_irq(&tasklist_lock);
 556  				} else
      					release_task(p);
 558  				goto end_wait4;
 559  			default:
 560  				continue;
      			}
      		}
 563  		if (options & __WNOTHREAD)
 564  			break;
      		tsk = next_thread(tsk);
 566  	} while (tsk != current);
 567  	read_unlock(&tasklist_lock);
 568  	if (flag) {
      		retval = 0;
 570  		if (options & WNOHANG)
 571  			goto end_wait4;
      		retval = -ERESTARTSYS;
 573  		if (signal_pending(current))
 574  			goto end_wait4;
      		schedule();
 576  		goto repeat;
      	}
      	retval = -ECHILD;
      end_wait4:
      	current->state = TASK_RUNNING;
      	remove_wait_queue(¤t->wait_chldexit,&wait);
 582  	return retval;
      }
      
      #if !defined(__alpha__) && !defined(__ia64__)
      
      /*
       * sys_waitpid() remains for compatibility. waitpid() should be
       * implemented by calling sys_wait4() from libc.a.
       */
 591  asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
      {
 593  	return sys_wait4(pid, stat_addr, options, NULL);
      }
      
      #endif