/*
       *  linux/fs/pipe.c
       *
       *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
       */
      
      #include <linux/mm.h>
      #include <linux/file.h>
      #include <linux/poll.h>
      #include <linux/malloc.h>
      #include <linux/module.h>
      #include <linux/init.h>
      
      #include <asm/uaccess.h>
      
      /*
       * We use a start+len construction, which provides full use of the 
       * allocated memory.
       * -- Florian Coosmann (FGC)
       * 
       * Reads with count = 0 should always return 0.
       * -- Julian Bradfield 1999-06-07.
       */
      
      /* Drop the inode semaphore and wait for a pipe event, atomically */
  26  void pipe_wait(struct inode * inode)
      {
      	DECLARE_WAITQUEUE(wait, current);
      	current->state = TASK_INTERRUPTIBLE;
      	add_wait_queue(PIPE_WAIT(*inode), &wait);
      	up(PIPE_SEM(*inode));
      	schedule();
      	remove_wait_queue(PIPE_WAIT(*inode), &wait);
      	current->state = TASK_RUNNING;
      	down(PIPE_SEM(*inode));
      }
      
      static ssize_t
  39  pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
      {
      	struct inode *inode = filp->f_dentry->d_inode;
      	ssize_t size, read, ret;
      
      	/* Seeks are not allowed on pipes.  */
      	ret = -ESPIPE;
      	read = 0;
  47  	if (ppos != &filp->f_pos)
  48  		goto out_nolock;
      
      	/* Always return 0 on null read.  */
      	ret = 0;
  52  	if (count == 0)
  53  		goto out_nolock;
      
      	/* Get the pipe semaphore */
      	ret = -ERESTARTSYS;
  57  	if (down_interruptible(PIPE_SEM(*inode)))
  58  		goto out_nolock;
      
  60  	if (PIPE_EMPTY(*inode)) {
      do_more_read:
      		ret = 0;
  63  		if (!PIPE_WRITERS(*inode))
  64  			goto out;
      
      		ret = -EAGAIN;
  67  		if (filp->f_flags & O_NONBLOCK)
  68  			goto out;
      
  70  		for (;;) {
      			PIPE_WAITING_READERS(*inode)++;
      			pipe_wait(inode);
      			PIPE_WAITING_READERS(*inode)--;
      			ret = -ERESTARTSYS;
  75  			if (signal_pending(current))
  76  				goto out;
      			ret = 0;
  78  			if (!PIPE_EMPTY(*inode))
  79  				break;
  80  			if (!PIPE_WRITERS(*inode))
  81  				goto out;
      		}
      	}
      
      	/* Read what data is available.  */
      	ret = -EFAULT;
  87  	while (count > 0 && (size = PIPE_LEN(*inode))) {
      		char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
      		ssize_t chars = PIPE_MAX_RCHUNK(*inode);
      
  91  		if (chars > count)
      			chars = count;
  93  		if (chars > size)
      			chars = size;
      
  96  		if (copy_to_user(buf, pipebuf, chars))
  97  			goto out;
      
      		read += chars;
      		PIPE_START(*inode) += chars;
      		PIPE_START(*inode) &= (PIPE_SIZE - 1);
      		PIPE_LEN(*inode) -= chars;
      		count -= chars;
      		buf += chars;
      	}
      
      	/* Cache behaviour optimization */
 108  	if (!PIPE_LEN(*inode))
      		PIPE_START(*inode) = 0;
      
 111  	if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
      		/*
      		 * We know that we are going to sleep: signal
      		 * writers synchronously that there is more
      		 * room.
      		 */
      		wake_up_interruptible_sync(PIPE_WAIT(*inode));
 118  		if (!PIPE_EMPTY(*inode))
 119  			BUG();
 120  		goto do_more_read;
      	}
      	/* Signal writers asynchronously that there is more room.  */
      	wake_up_interruptible(PIPE_WAIT(*inode));
      
      	ret = read;
      out:
      	up(PIPE_SEM(*inode));
      out_nolock:
 129  	if (read)
      		ret = read;
 131  	return ret;
      }
      
      static ssize_t
 135  pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
      {
      	struct inode *inode = filp->f_dentry->d_inode;
      	ssize_t free, written, ret;
      
      	/* Seeks are not allowed on pipes.  */
      	ret = -ESPIPE;
      	written = 0;
 143  	if (ppos != &filp->f_pos)
 144  		goto out_nolock;
      
      	/* Null write succeeds.  */
      	ret = 0;
 148  	if (count == 0)
 149  		goto out_nolock;
      
      	ret = -ERESTARTSYS;
 152  	if (down_interruptible(PIPE_SEM(*inode)))
 153  		goto out_nolock;
      
      	/* No readers yields SIGPIPE.  */
 156  	if (!PIPE_READERS(*inode))
 157  		goto sigpipe;
      
      	/* If count <= PIPE_BUF, we have to make it atomic.  */
      	free = (count <= PIPE_BUF ? count : 1);
      
      	/* Wait, or check for, available space.  */
 163  	if (filp->f_flags & O_NONBLOCK) {
      		ret = -EAGAIN;
 165  		if (PIPE_FREE(*inode) < free)
 166  			goto out;
 167  	} else {
 168  		while (PIPE_FREE(*inode) < free) {
      			PIPE_WAITING_WRITERS(*inode)++;
      			pipe_wait(inode);
      			PIPE_WAITING_WRITERS(*inode)--;
      			ret = -ERESTARTSYS;
 173  			if (signal_pending(current))
 174  				goto out;
      
 176  			if (!PIPE_READERS(*inode))
 177  				goto sigpipe;
      		}
      	}
      
      	/* Copy into available space.  */
      	ret = -EFAULT;
 183  	while (count > 0) {
      		int space;
      		char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
      		ssize_t chars = PIPE_MAX_WCHUNK(*inode);
      
 188  		if ((space = PIPE_FREE(*inode)) != 0) {
 189  			if (chars > count)
      				chars = count;
 191  			if (chars > space)
      				chars = space;
      
 194  			if (copy_from_user(pipebuf, buf, chars))
 195  				goto out;
      
      			written += chars;
      			PIPE_LEN(*inode) += chars;
      			count -= chars;
      			buf += chars;
      			space = PIPE_FREE(*inode);
 202  			continue;
      		}
      
      		ret = written;
 206  		if (filp->f_flags & O_NONBLOCK)
 207  			break;
      
 209  		do {
      			/*
      			 * Synchronous wake-up: it knows that this process
      			 * is going to give up this CPU, so it doesnt have
      			 * to do idle reschedules.
      			 */
      			wake_up_interruptible_sync(PIPE_WAIT(*inode));
      			PIPE_WAITING_WRITERS(*inode)++;
      			pipe_wait(inode);
      			PIPE_WAITING_WRITERS(*inode)--;
 219  			if (signal_pending(current))
 220  				goto out;
 221  			if (!PIPE_READERS(*inode))
 222  				goto sigpipe;
 223  		} while (!PIPE_FREE(*inode));
      		ret = -EFAULT;
      	}
      
      	/* Signal readers asynchronously that there is more data.  */
      	wake_up_interruptible(PIPE_WAIT(*inode));
      
      	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
      	mark_inode_dirty(inode);
      
      out:
      	up(PIPE_SEM(*inode));
      out_nolock:
 236  	if (written)
      		ret = written;
 238  	return ret;
      
      sigpipe:
 241  	if (written)
 242  		goto out;
      	up(PIPE_SEM(*inode));
      	send_sig(SIGPIPE, current, 0);
 245  	return -EPIPE;
      }
      
      static loff_t
 249  pipe_lseek(struct file *file, loff_t offset, int orig)
      {
 251  	return -ESPIPE;
      }
      
      static ssize_t
 255  bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
      {
 257  	return -EBADF;
      }
      
      static ssize_t
 261  bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
      {
 263  	return -EBADF;
      }
      
      static int
 267  pipe_ioctl(struct inode *pino, struct file *filp,
      	   unsigned int cmd, unsigned long arg)
      {
 270  	switch (cmd) {
 271  		case FIONREAD:
 272  			return put_user(PIPE_LEN(*pino), (int *)arg);
 273  		default:
 274  			return -EINVAL;
      	}
      }
      
      /* No kernel lock held - fine */
      static unsigned int
 280  pipe_poll(struct file *filp, poll_table *wait)
      {
      	unsigned int mask;
      	struct inode *inode = filp->f_dentry->d_inode;
      
      	poll_wait(filp, PIPE_WAIT(*inode), wait);
      
      	/* Reading only -- no need for acquiring the semaphore.  */
      	mask = POLLIN | POLLRDNORM;
 289  	if (PIPE_EMPTY(*inode))
      		mask = POLLOUT | POLLWRNORM;
 291  	if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
      		mask |= POLLHUP;
 293  	if (!PIPE_READERS(*inode))
      		mask |= POLLERR;
      
 296  	return mask;
      }
      
      /* FIXME: most Unices do not set POLLERR for fifos */
      #define fifo_poll pipe_poll
      
      static int
 303  pipe_release(struct inode *inode, int decr, int decw)
      {
      	down(PIPE_SEM(*inode));
      	PIPE_READERS(*inode) -= decr;
      	PIPE_WRITERS(*inode) -= decw;
 308  	if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
      		struct pipe_inode_info *info = inode->i_pipe;
      		inode->i_pipe = NULL;
      		free_page((unsigned long) info->base);
      		kfree(info);
 313  	} else {
      		wake_up_interruptible(PIPE_WAIT(*inode));
      	}
      	up(PIPE_SEM(*inode));
      
 318  	return 0;
      }
      
      static int
 322  pipe_read_release(struct inode *inode, struct file *filp)
      {
 324  	return pipe_release(inode, 1, 0);
      }
      
      static int
 328  pipe_write_release(struct inode *inode, struct file *filp)
      {
 330  	return pipe_release(inode, 0, 1);
      }
      
      static int
 334  pipe_rdwr_release(struct inode *inode, struct file *filp)
      {
      	int decr, decw;
      
      	decr = (filp->f_mode & FMODE_READ) != 0;
      	decw = (filp->f_mode & FMODE_WRITE) != 0;
 340  	return pipe_release(inode, decr, decw);
      }
      
      static int
 344  pipe_read_open(struct inode *inode, struct file *filp)
      {
      	/* We could have perhaps used atomic_t, but this and friends
      	   below are the only places.  So it doesn't seem worthwhile.  */
      	down(PIPE_SEM(*inode));
      	PIPE_READERS(*inode)++;
      	up(PIPE_SEM(*inode));
      
 352  	return 0;
      }
      
      static int
 356  pipe_write_open(struct inode *inode, struct file *filp)
      {
      	down(PIPE_SEM(*inode));
      	PIPE_WRITERS(*inode)++;
      	up(PIPE_SEM(*inode));
      
 362  	return 0;
      }
      
      static int
 366  pipe_rdwr_open(struct inode *inode, struct file *filp)
      {
      	down(PIPE_SEM(*inode));
 369  	if (filp->f_mode & FMODE_READ)
      		PIPE_READERS(*inode)++;
 371  	if (filp->f_mode & FMODE_WRITE)
      		PIPE_WRITERS(*inode)++;
      	up(PIPE_SEM(*inode));
      
 375  	return 0;
      }
      
      /*
       * The file_operations structs are not static because they
       * are also used in linux/fs/fifo.c to do operations on FIFOs.
       */
      struct file_operations read_fifo_fops = {
      	llseek:		pipe_lseek,
      	read:		pipe_read,
      	write:		bad_pipe_w,
      	poll:		fifo_poll,
      	ioctl:		pipe_ioctl,
      	open:		pipe_read_open,
      	release:	pipe_read_release,
      };
      
      struct file_operations write_fifo_fops = {
      	llseek:		pipe_lseek,
      	read:		bad_pipe_r,
      	write:		pipe_write,
      	poll:		fifo_poll,
      	ioctl:		pipe_ioctl,
      	open:		pipe_write_open,
      	release:	pipe_write_release,
      };
      
      struct file_operations rdwr_fifo_fops = {
      	llseek:		pipe_lseek,
      	read:		pipe_read,
      	write:		pipe_write,
      	poll:		fifo_poll,
      	ioctl:		pipe_ioctl,
      	open:		pipe_rdwr_open,
      	release:	pipe_rdwr_release,
      };
      
      struct file_operations read_pipe_fops = {
      	llseek:		pipe_lseek,
      	read:		pipe_read,
      	write:		bad_pipe_w,
      	poll:		pipe_poll,
      	ioctl:		pipe_ioctl,
      	open:		pipe_read_open,
      	release:	pipe_read_release,
      };
      
      struct file_operations write_pipe_fops = {
      	llseek:		pipe_lseek,
      	read:		bad_pipe_r,
      	write:		pipe_write,
      	poll:		pipe_poll,
      	ioctl:		pipe_ioctl,
      	open:		pipe_write_open,
      	release:	pipe_write_release,
      };
      
      struct file_operations rdwr_pipe_fops = {
      	llseek:		pipe_lseek,
      	read:		pipe_read,
      	write:		pipe_write,
      	poll:		pipe_poll,
      	ioctl:		pipe_ioctl,
      	open:		pipe_rdwr_open,
      	release:	pipe_rdwr_release,
      };
      
 442  struct inode* pipe_new(struct inode* inode)
      {
      	unsigned long page;
      
      	page = __get_free_page(GFP_USER);
 447  	if (!page)
 448  		return NULL;
      
      	inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 451  	if (!inode->i_pipe)
 452  		goto fail_page;
      
      	init_waitqueue_head(PIPE_WAIT(*inode));
      	PIPE_BASE(*inode) = (char*) page;
      	PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
      	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
      	PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
      	PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
      
 461  	return inode;
      fail_page:
      	free_page(page);
 464  	return NULL;
      }
      
      static struct vfsmount *pipe_mnt;
 468  static int pipefs_delete_dentry(struct dentry *dentry)
      {
 470  	return 1;
      }
      static struct dentry_operations pipefs_dentry_operations = {
      	d_delete:	pipefs_delete_dentry,
      };
      
 476  static struct inode * get_pipe_inode(void)
      {
      	struct inode *inode = get_empty_inode();
      
 480  	if (!inode)
 481  		goto fail_inode;
      
 483  	if(!pipe_new(inode))
 484  		goto fail_iput;
      	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
      	inode->i_fop = &rdwr_pipe_fops;
      	inode->i_sb = pipe_mnt->mnt_sb;
      
      	/*
      	 * Mark the inode dirty from the very beginning,
      	 * that way it will never be moved to the dirty
      	 * list because "mark_inode_dirty()" will think
      	 * that it already _is_ on the dirty list.
      	 */
      	inode->i_state = I_DIRTY;
      	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
      	inode->i_uid = current->fsuid;
      	inode->i_gid = current->fsgid;
      	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
      	inode->i_blksize = PAGE_SIZE;
 501  	return inode;
      
      fail_iput:
      	iput(inode);
      fail_inode:
 506  	return NULL;
      }
      
 509  int do_pipe(int *fd)
      {
      	struct qstr this;
      	char name[32];
      	struct dentry *dentry;
      	struct inode * inode;
      	struct file *f1, *f2;
      	int error;
      	int i,j;
      
      	error = -ENFILE;
      	f1 = get_empty_filp();
 521  	if (!f1)
 522  		goto no_files;
      
      	f2 = get_empty_filp();
 525  	if (!f2)
 526  		goto close_f1;
      
      	inode = get_pipe_inode();
 529  	if (!inode)
 530  		goto close_f12;
      
      	error = get_unused_fd();
 533  	if (error < 0)
 534  		goto close_f12_inode;
      	i = error;
      
      	error = get_unused_fd();
 538  	if (error < 0)
 539  		goto close_f12_inode_i;
      	j = error;
      
      	error = -ENOMEM;
      	sprintf(name, "[%lu]", inode->i_ino);
      	this.name = name;
      	this.len = strlen(name);
      	this.hash = inode->i_ino; /* will go */
      	dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
 548  	if (!dentry)
 549  		goto close_f12_inode_i_j;
      	dentry->d_op = &pipefs_dentry_operations;
      	d_add(dentry, inode);
      	f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
      	f1->f_dentry = f2->f_dentry = dget(dentry);
      
      	/* read file */
      	f1->f_pos = f2->f_pos = 0;
      	f1->f_flags = O_RDONLY;
      	f1->f_op = &read_pipe_fops;
      	f1->f_mode = 1;
      	f1->f_version = 0;
      
      	/* write file */
      	f2->f_flags = O_WRONLY;
      	f2->f_op = &write_pipe_fops;
      	f2->f_mode = 2;
      	f2->f_version = 0;
      
      	fd_install(i, f1);
      	fd_install(j, f2);
      	fd[0] = i;
      	fd[1] = j;
 572  	return 0;
      
      close_f12_inode_i_j:
      	put_unused_fd(j);
      close_f12_inode_i:
      	put_unused_fd(i);
      close_f12_inode:
      	free_page((unsigned long) PIPE_BASE(*inode));
      	kfree(inode->i_pipe);
      	inode->i_pipe = NULL;
      	iput(inode);
      close_f12:
      	put_filp(f2);
      close_f1:
      	put_filp(f1);
      no_files:
 588  	return error;	
      }
      
      /*
       * pipefs should _never_ be mounted by userland - too much of security hassle,
       * no real gain from having the whole whorehouse mounted. So we don't need
       * any operations on the root directory. However, we need a non-trivial
       * d_name - pipe: will go nicely and kill the special-casing in procfs.
       */
 597  static int pipefs_statfs(struct super_block *sb, struct statfs *buf)
      {
      	buf->f_type = PIPEFS_MAGIC;
      	buf->f_bsize = 1024;
      	buf->f_namelen = 255;
 602  	return 0;
      }
      
      static struct super_operations pipefs_ops = {
      	statfs:		pipefs_statfs,
      };
      
 609  static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent)
      {
      	struct inode *root = new_inode(sb);
 612  	if (!root)
 613  		return NULL;
      	root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
      	root->i_uid = root->i_gid = 0;
      	root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
      	sb->s_blocksize = 1024;
      	sb->s_blocksize_bits = 10;
      	sb->s_magic = PIPEFS_MAGIC;
      	sb->s_op	= &pipefs_ops;
      	sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 });
 622  	if (!sb->s_root) {
      		iput(root);
 624  		return NULL;
      	}
      	sb->s_root->d_sb = sb;
      	sb->s_root->d_parent = sb->s_root;
      	d_instantiate(sb->s_root, root);
 629  	return sb;
      }
      
      static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super,
      	FS_NOMOUNT|FS_SINGLE);
      
 635  static int __init init_pipe_fs(void)
      {
      	int err = register_filesystem(&pipe_fs_type);
 638  	if (!err) {
      		pipe_mnt = kern_mount(&pipe_fs_type);
      		err = PTR_ERR(pipe_mnt);
 641  		if (IS_ERR(pipe_mnt))
      			unregister_filesystem(&pipe_fs_type);
 643  		else
      			err = 0;
      	}
 646  	return err;
      }
      
 649  static void __exit exit_pipe_fs(void)
      {
      	unregister_filesystem(&pipe_fs_type);
      	kern_umount(pipe_mnt);
      }
      
      module_init(init_pipe_fs)
      module_exit(exit_pipe_fs)