/*
       * linux/ipc/shm.c
       * Copyright (C) 1992, 1993 Krishna Balasubramanian
       *	 Many improvements/fixes by Bruno Haible.
       * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
       * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
       *
       * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
       * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
       * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
       * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
       * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
       * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
       * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
       *
       */
      
      #include <linux/config.h>
      #include <linux/malloc.h>
      #include <linux/shm.h>
      #include <linux/init.h>
      #include <linux/file.h>
      #include <linux/mman.h>
      #include <linux/proc_fs.h>
      #include <asm/uaccess.h>
      
      #include "util.h"
      
      struct shmid_kernel /* private to the kernel */
      {	
      	struct kern_ipc_perm	shm_perm;
      	struct file *		shm_file;
      	int			id;
      	unsigned long		shm_nattch;
      	unsigned long		shm_segsz;
      	time_t			shm_atim;
      	time_t			shm_dtim;
      	time_t			shm_ctim;
      	pid_t			shm_cprid;
      	pid_t			shm_lprid;
      };
      
      #define shm_flags	shm_perm.mode
      
      static struct file_operations shm_file_operations;
      static struct vm_operations_struct shm_vm_ops;
      
      static struct ipc_ids shm_ids;
      
      #define shm_lock(id)	((struct shmid_kernel*)ipc_lock(&shm_ids,id))
      #define shm_unlock(id)	ipc_unlock(&shm_ids,id)
      #define shm_lockall()	ipc_lockall(&shm_ids)
      #define shm_unlockall()	ipc_unlockall(&shm_ids)
      #define shm_get(id)	((struct shmid_kernel*)ipc_get(&shm_ids,id))
      #define shm_buildid(id, seq) \
      	ipc_buildid(&shm_ids, id, seq)
      
      static int newseg (key_t key, int shmflg, size_t size);
      static void shm_open (struct vm_area_struct *shmd);
      static void shm_close (struct vm_area_struct *shmd);
      #ifdef CONFIG_PROC_FS
      static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
      #endif
      
      size_t	shm_ctlmax = SHMMAX;
      size_t 	shm_ctlall = SHMALL;
      int 	shm_ctlmni = SHMMNI;
      
      static int shm_tot; /* total number of shared memory pages */
      
  71  void __init shm_init (void)
      {
      	ipc_init_ids(&shm_ids, 1);
      	create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
      }
      
  77  static inline int shm_checkid(struct shmid_kernel *s, int id)
      {
  79  	if (ipc_checkid(&shm_ids,&s->shm_perm,id))
  80  		return -EIDRM;
  81  	return 0;
      }
      
  84  static inline struct shmid_kernel *shm_rmid(int id)
      {
  86  	return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
      }
      
  89  static inline int shm_addid(struct shmid_kernel *shp)
      {
  91  	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1);
      }
      
      
      
  96  static inline void shm_inc (int id) {
      	struct shmid_kernel *shp;
      
  99  	if(!(shp = shm_lock(id)))
 100  		BUG();
      	shp->shm_atim = CURRENT_TIME;
      	shp->shm_lprid = current->pid;
      	shp->shm_nattch++;
      	shm_unlock(id);
      }
      
      /* This is called by fork, once for every shm attach. */
 108  static void shm_open (struct vm_area_struct *shmd)
      {
      	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
      }
      
      /*
       * shm_destroy - free the struct shmid_kernel
       *
       * @shp: struct to free
       *
       * It has to be called with shp and shm_ids.sem locked
       */
 120  static void shm_destroy (struct shmid_kernel *shp)
      {
      	shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
      	shm_rmid (shp->id);
      	fput (shp->shm_file);
      	kfree (shp);
      }
      
      /*
       * remove the attach descriptor shmd.
       * free memory for segment if it is marked destroyed.
       * The descriptor has already been removed from the current->mm->mmap list
       * and will later be kfree()d.
       */
 134  static void shm_close (struct vm_area_struct *shmd)
      {
      	struct file * file = shmd->vm_file;
      	int id = file->f_dentry->d_inode->i_ino;
      	struct shmid_kernel *shp;
      
      	down (&shm_ids.sem);
      	/* remove from the list of attaches of the shm segment */
 142  	if(!(shp = shm_lock(id)))
 143  		BUG();
      	shp->shm_lprid = current->pid;
      	shp->shm_dtim = CURRENT_TIME;
      	shp->shm_nattch--;
      	if(shp->shm_nattch == 0 &&
 148  	   shp->shm_flags & SHM_DEST)
      		shm_destroy (shp);
      
      	shm_unlock(id);
      	up (&shm_ids.sem);
      }
      
 155  static int shm_mmap(struct file * file, struct vm_area_struct * vma)
      {
      	UPDATE_ATIME(file->f_dentry->d_inode);
      	vma->vm_ops = &shm_vm_ops;
      	shm_inc(file->f_dentry->d_inode->i_ino);
 160  	return 0;
      }
      
      static struct file_operations shm_file_operations = {
      	mmap:	shm_mmap
      };
      
      static struct vm_operations_struct shm_vm_ops = {
      	open:	shm_open,	/* callback for a new vm-area open */
      	close:	shm_close,	/* callback for when the vm-area is released */
      	nopage:	shmem_nopage,
      };
      
 173  static int newseg (key_t key, int shmflg, size_t size)
      {
      	int error;
      	struct shmid_kernel *shp;
      	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
      	struct file * file;
      	char name[13];
      	int id;
      
 182  	if (size < SHMMIN || size > shm_ctlmax)
 183  		return -EINVAL;
      
 185  	if (shm_tot + numpages >= shm_ctlall)
 186  		return -ENOSPC;
      
      	shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_USER);
 189  	if (!shp)
 190  		return -ENOMEM;
      	sprintf (name, "SYSV%08x", key);
      	file = shmem_file_setup(name, size);
      	error = PTR_ERR(file);
 194  	if (IS_ERR(file))
 195  		goto no_file;
      
      	error = -ENOSPC;
      	id = shm_addid(shp);
 199  	if(id == -1) 
 200  		goto no_id;
      	shp->shm_perm.key = key;
      	shp->shm_flags = (shmflg & S_IRWXUGO);
      	shp->shm_cprid = current->pid;
      	shp->shm_lprid = 0;
      	shp->shm_atim = shp->shm_dtim = 0;
      	shp->shm_ctim = CURRENT_TIME;
      	shp->shm_segsz = size;
      	shp->shm_nattch = 0;
      	shp->id = shm_buildid(id,shp->shm_perm.seq);
      	shp->shm_file = file;
      	file->f_dentry->d_inode->i_ino = shp->id;
      	file->f_op = &shm_file_operations;
      	shm_tot += numpages;
      	shm_unlock (id);
 215  	return shp->id;
      
      no_id:
      	fput(file);
      no_file:
      	kfree(shp);
 221  	return error;
      }
      
 224  asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
      {
      	struct shmid_kernel *shp;
      	int err, id = 0;
      
      	down(&shm_ids.sem);
 230  	if (key == IPC_PRIVATE) {
      		err = newseg(key, shmflg, size);
 232  	} else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
 233  		if (!(shmflg & IPC_CREAT))
      			err = -ENOENT;
 235  		else
      			err = newseg(key, shmflg, size);
 237  	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
      		err = -EEXIST;
 239  	} else {
      		shp = shm_lock(id);
 241  		if(shp==NULL)
 242  			BUG();
 243  		if (shp->shm_segsz < size)
      			err = -EINVAL;
 245  		else if (ipcperms(&shp->shm_perm, shmflg))
      			err = -EACCES;
 247  		else
      			err = shm_buildid(id, shp->shm_perm.seq);
      		shm_unlock(id);
      	}
      	up(&shm_ids.sem);
 252  	return err;
      }
      
 255  static inline unsigned long copy_shmid_to_user(void *buf, struct shmid64_ds *in, int version)
      {
 257  	switch(version) {
 258  	case IPC_64:
 259  		return copy_to_user(buf, in, sizeof(*in));
 260  	case IPC_OLD:
      	    {
      		struct shmid_ds out;
      
      		ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
      		out.shm_segsz	= in->shm_segsz;
      		out.shm_atime	= in->shm_atime;
      		out.shm_dtime	= in->shm_dtime;
      		out.shm_ctime	= in->shm_ctime;
      		out.shm_cpid	= in->shm_cpid;
      		out.shm_lpid	= in->shm_lpid;
      		out.shm_nattch	= in->shm_nattch;
      
 273  		return copy_to_user(buf, &out, sizeof(out));
      	    }
 275  	default:
 276  		return -EINVAL;
      	}
      }
      
      struct shm_setbuf {
      	uid_t	uid;
      	gid_t	gid;
      	mode_t	mode;
      };	
      
 286  static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void *buf, int version)
      {
 288  	switch(version) {
 289  	case IPC_64:
      	    {
      		struct shmid64_ds tbuf;
      
 293  		if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
 294  			return -EFAULT;
      
      		out->uid	= tbuf.shm_perm.uid;
      		out->gid	= tbuf.shm_perm.gid;
      		out->mode	= tbuf.shm_flags;
      
 300  		return 0;
      	    }
 302  	case IPC_OLD:
      	    {
      		struct shmid_ds tbuf_old;
      
 306  		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
 307  			return -EFAULT;
      
      		out->uid	= tbuf_old.shm_perm.uid;
      		out->gid	= tbuf_old.shm_perm.gid;
      		out->mode	= tbuf_old.shm_flags;
      
 313  		return 0;
      	    }
 315  	default:
 316  		return -EINVAL;
      	}
      }
      
 320  static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in, int version)
      {
 322  	switch(version) {
 323  	case IPC_64:
 324  		return copy_to_user(buf, in, sizeof(*in));
 325  	case IPC_OLD:
      	    {
      		struct shminfo out;
      
 329  		if(in->shmmax > INT_MAX)
      			out.shmmax = INT_MAX;
 331  		else
      			out.shmmax = (int)in->shmmax;
      
      		out.shmmin	= in->shmmin;
      		out.shmmni	= in->shmmni;
      		out.shmseg	= in->shmseg;
      		out.shmall	= in->shmall; 
      
 339  		return copy_to_user(buf, &out, sizeof(out));
      	    }
 341  	default:
 342  		return -EINVAL;
      	}
      }
      
 346  static void shm_get_stat (unsigned long *rss, unsigned long *swp) 
      {
      	int i;
      
      	*rss = 0;
      	*swp = 0;
      
 353  	for(i = 0; i <= shm_ids.max_id; i++) {
      		struct shmid_kernel* shp;
      		struct inode * inode;
      
      		shp = shm_get(i);
 358  		if(shp == NULL)
 359  			continue;
      		inode = shp->shm_file->f_dentry->d_inode;
      		spin_lock (&inode->u.shmem_i.lock);
      		*rss += inode->i_mapping->nrpages;
      		*swp += inode->u.shmem_i.swapped;
 364  		spin_unlock (&inode->u.shmem_i.lock);
      	}
      }
      
 368  asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
      {
      	struct shm_setbuf setbuf;
      	struct shmid_kernel *shp;
      	int err, version;
      
 374  	if (cmd < 0 || shmid < 0)
 375  		return -EINVAL;
      
      	version = ipc_parse_version(&cmd);
      
 379  	switch (cmd) { /* replace with proc interface ? */
 380  	case IPC_INFO:
      	{
      		struct shminfo64 shminfo;
      
      		memset(&shminfo,0,sizeof(shminfo));
      		shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
      		shminfo.shmmax = shm_ctlmax;
      		shminfo.shmall = shm_ctlall;
      
      		shminfo.shmmin = SHMMIN;
 390  		if(copy_shminfo_to_user (buf, &shminfo, version))
 391  			return -EFAULT;
      		/* reading a integer is always atomic */
      		err= shm_ids.max_id;
 394  		if(err<0)
      			err = 0;
 396  		return err;
      	}
 398  	case SHM_INFO:
      	{
      		struct shm_info shm_info;
      
      		memset(&shm_info,0,sizeof(shm_info));
      		down(&shm_ids.sem);
      		shm_lockall();
      		shm_info.used_ids = shm_ids.in_use;
      		shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
      		shm_info.shm_tot = shm_tot;
      		shm_info.swap_attempts = 0;
      		shm_info.swap_successes = 0;
      		err = shm_ids.max_id;
      		shm_unlockall();
      		up(&shm_ids.sem);
 413  		if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
 414  			return -EFAULT;
      
 416  		return err < 0 ? 0 : err;
      	}
 418  	case SHM_STAT:
 419  	case IPC_STAT:
      	{
      		struct shmid64_ds tbuf;
      		int result;
      		memset(&tbuf, 0, sizeof(tbuf));
      		shp = shm_lock(shmid);
 425  		if(shp==NULL)
 426  			return -EINVAL;
 427  		if(cmd==SHM_STAT) {
      			err = -EINVAL;
 429  			if (shmid > shm_ids.max_id)
 430  				goto out_unlock;
      			result = shm_buildid(shmid, shp->shm_perm.seq);
 432  		} else {
      			err = shm_checkid(shp,shmid);
 434  			if(err)
 435  				goto out_unlock;
      			result = 0;
      		}
      		err=-EACCES;
 439  		if (ipcperms (&shp->shm_perm, S_IRUGO))
 440  			goto out_unlock;
      		kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
      		tbuf.shm_segsz	= shp->shm_segsz;
      		tbuf.shm_atime	= shp->shm_atim;
      		tbuf.shm_dtime	= shp->shm_dtim;
      		tbuf.shm_ctime	= shp->shm_ctim;
      		tbuf.shm_cpid	= shp->shm_cprid;
      		tbuf.shm_lpid	= shp->shm_lprid;
      		tbuf.shm_nattch	= shp->shm_nattch;
      		shm_unlock(shmid);
 450  		if(copy_shmid_to_user (buf, &tbuf, version))
 451  			return -EFAULT;
 452  		return result;
      	}
 454  	case SHM_LOCK:
 455  	case SHM_UNLOCK:
      	{
      /* Allow superuser to lock segment in memory */
      /* Should the pages be faulted in here or leave it to user? */
      /* need to determine interaction with current->swappable */
 460  		if (!capable(CAP_IPC_LOCK))
 461  			return -EPERM;
      
      		shp = shm_lock(shmid);
 464  		if(shp==NULL)
 465  			return -EINVAL;
      		err = shm_checkid(shp,shmid);
 467  		if(err)
 468  			goto out_unlock;
 469  		if(cmd==SHM_LOCK) {
      			shp->shm_file->f_dentry->d_inode->u.shmem_i.locked = 1;
      			shp->shm_flags |= SHM_LOCKED;
 472  		} else {
      			shp->shm_file->f_dentry->d_inode->u.shmem_i.locked = 0;
      			shp->shm_flags &= ~SHM_LOCKED;
      		}
      		shm_unlock(shmid);
 477  		return err;
      	}
 479  	case IPC_RMID:
      	{
      		/*
      		 *	We cannot simply remove the file. The SVID states
      		 *	that the block remains until the last person
      		 *	detaches from it, then is deleted. A shmat() on
      		 *	an RMID segment is legal in older Linux and if 
      		 *	we change it apps break...
      		 *
      		 *	Instead we set a destroyed flag, and then blow
      		 *	the name away when the usage hits zero.
      		 */
      		down(&shm_ids.sem);
      		shp = shm_lock(shmid);
      		err = -EINVAL;
 494  		if (shp == NULL) 
 495  			goto out_up;
      		err = shm_checkid(shp, shmid);
 497  		if (err == 0) {
 498  			if (shp->shm_nattch){
      				shp->shm_flags |= SHM_DEST;
      				/* Do not find it any more */
      				shp->shm_perm.key = IPC_PRIVATE;
 502  			} else
      				shm_destroy (shp);
      		}
      		/* Unlock */
      		shm_unlock(shmid);
      		up(&shm_ids.sem);
 508  		return err;
      	}
      
 511  	case IPC_SET:
      	{
 513  		if(copy_shmid_from_user (&setbuf, buf, version))
 514  			return -EFAULT;
      		down(&shm_ids.sem);
      		shp = shm_lock(shmid);
      		err=-EINVAL;
 518  		if(shp==NULL)
 519  			goto out_up;
      		err = shm_checkid(shp,shmid);
 521  		if(err)
 522  			goto out_unlock_up;
      		err=-EPERM;
      		if (current->euid != shp->shm_perm.uid &&
      		    current->euid != shp->shm_perm.cuid && 
 526  		    !capable(CAP_SYS_ADMIN)) {
 527  			goto out_unlock_up;
      		}
      
      		shp->shm_perm.uid = setbuf.uid;
      		shp->shm_perm.gid = setbuf.gid;
      		shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO)
      			| (setbuf.mode & S_IRWXUGO);
      		shp->shm_ctim = CURRENT_TIME;
 535  		break;
      	}
      
 538  	default:
 539  		return -EINVAL;
      	}
      
      	err = 0;
      out_unlock_up:
      	shm_unlock(shmid);
      out_up:
      	up(&shm_ids.sem);
 547  	return err;
      out_unlock:
      	shm_unlock(shmid);
 550  	return err;
      }
      
      /*
       * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
       */
 556  asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
      {
      	struct shmid_kernel *shp;
      	unsigned long addr;
      	struct file * file;
      	int    err;
      	unsigned long flags;
      	unsigned long prot;
      	unsigned long o_flags;
      	int acc_mode;
      	void *user_addr;
      
 568  	if (shmid < 0)
 569  		return -EINVAL;
      
 571  	if ((addr = (ulong)shmaddr)) {
 572  		if (addr & (SHMLBA-1)) {
 573  			if (shmflg & SHM_RND)
      				addr &= ~(SHMLBA-1);	   /* round down */
 575  			else
 576  				return -EINVAL;
      		}
      		flags = MAP_SHARED | MAP_FIXED;
 579  	} else
      		flags = MAP_SHARED;
      
 582  	if (shmflg & SHM_RDONLY) {
      		prot = PROT_READ;
      		o_flags = O_RDONLY;
      		acc_mode = S_IRUGO;
 586  	} else {
      		prot = PROT_READ | PROT_WRITE;
      		o_flags = O_RDWR;
      		acc_mode = S_IRUGO | S_IWUGO;
      	}
      
      	/*
      	 * We cannot rely on the fs check since SYSV IPC does have an
      	 * aditional creator id...
      	 */
      	shp = shm_lock(shmid);
 597  	if(shp == NULL)
 598  		return -EINVAL;
 599  	if (ipcperms(&shp->shm_perm, acc_mode)) {
      		shm_unlock(shmid);
 601  		return -EACCES;
      	}
      	file = shp->shm_file;
      	shp->shm_nattch++;
      	shm_unlock(shmid);
      
      	down(¤t->mm->mmap_sem);
      	user_addr = (void *) do_mmap (file, addr, file->f_dentry->d_inode->i_size, prot, flags, 0);
      	up(¤t->mm->mmap_sem);
      
      	down (&shm_ids.sem);
 612  	if(!(shp = shm_lock(shmid)))
 613  		BUG();
      	shp->shm_nattch--;
      	if(shp->shm_nattch == 0 &&
 616  	   shp->shm_flags & SHM_DEST)
      		shm_destroy (shp);
      	shm_unlock(shmid);
      	up (&shm_ids.sem);
      
      	*raddr = (unsigned long) user_addr;
      	err = 0;
 623  	if (IS_ERR(user_addr))
      		err = PTR_ERR(user_addr);
 625  	return err;
      
      }
      
      /*
       * detach and kill segment if marked destroyed.
       * The work is done in shm_close.
       */
 633  asmlinkage long sys_shmdt (char *shmaddr)
      {
      	struct mm_struct *mm = current->mm;
      	struct vm_area_struct *shmd, *shmdnext;
      
      	down(&mm->mmap_sem);
 639  	for (shmd = mm->mmap; shmd; shmd = shmdnext) {
      		shmdnext = shmd->vm_next;
      		if (shmd->vm_ops == &shm_vm_ops
 642  		    && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr)
      			do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start);
      	}
      	up(&mm->mmap_sem);
 646  	return 0;
      }
      
      #ifdef CONFIG_PROC_FS
 650  static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
      {
      	off_t pos = 0;
      	off_t begin = 0;
      	int i, len = 0;
      
      	down(&shm_ids.sem);
      	len += sprintf(buffer, "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime\n");
      
 659  	for(i = 0; i <= shm_ids.max_id; i++) {
      		struct shmid_kernel* shp;
      
      		shp = shm_lock(i);
 663  		if(shp!=NULL) {
      #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
      #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
      			char *format;
      
 668  			if (sizeof(size_t) <= sizeof(int))
      				format = SMALL_STRING;
 670  			else
      				format = BIG_STRING;
      			len += sprintf(buffer + len, format,
      				shp->shm_perm.key,
      				shm_buildid(i, shp->shm_perm.seq),
      				shp->shm_flags,
      				shp->shm_segsz,
      				shp->shm_cprid,
      				shp->shm_lprid,
      				shp->shm_nattch,
      				shp->shm_perm.uid,
      				shp->shm_perm.gid,
      				shp->shm_perm.cuid,
      				shp->shm_perm.cgid,
      				shp->shm_atim,
      				shp->shm_dtim,
      				shp->shm_ctim);
      			shm_unlock(i);
      
      			pos += len;
 690  			if(pos < offset) {
      				len = 0;
      				begin = pos;
      			}
 694  			if(pos > offset + length)
 695  				goto done;
      		}
      	}
      	*eof = 1;
      done:
      	up(&shm_ids.sem);
      	*start = buffer + (offset - begin);
      	len -= (offset - begin);
 703  	if(len > length)
      		len = length;
 705  	if(len < 0)
      		len = 0;
 707  	return len;
      }
      #endif