/*
       *  linux/mm/swapfile.c
       *
       *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
       *  Swap reorganised 29.12.95, Stephen Tweedie
       */
      
      #include <linux/malloc.h>
      #include <linux/smp_lock.h>
      #include <linux/kernel_stat.h>
      #include <linux/swap.h>
      #include <linux/swapctl.h>
      #include <linux/blkdev.h> /* for blk_size */
      #include <linux/vmalloc.h>
      #include <linux/pagemap.h>
      #include <linux/shm.h>
      
      #include <asm/pgtable.h>
      
      spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
      unsigned int nr_swapfiles;
      
      struct swap_list_t swap_list = {-1, -1};
      
      struct swap_info_struct swap_info[MAX_SWAPFILES];
      
      #define SWAPFILE_CLUSTER 256
      
  29  static inline int scan_swap_map(struct swap_info_struct *si, unsigned short count)
      {
      	unsigned long offset;
      	/* 
      	 * We try to cluster swap pages by allocating them
      	 * sequentially in swap.  Once we've allocated
      	 * SWAPFILE_CLUSTER pages this way, however, we resort to
      	 * first-free allocation, starting a new cluster.  This
      	 * prevents us from scattering swap pages all over the entire
      	 * swap partition, so that we reduce overall disk seek times
      	 * between swap pages.  -- sct */
  40  	if (si->cluster_nr) {
  41  		while (si->cluster_next <= si->highest_bit) {
      			offset = si->cluster_next++;
  43  			if (si->swap_map[offset])
  44  				continue;
      			si->cluster_nr--;
  46  			goto got_page;
      		}
      	}
      	si->cluster_nr = SWAPFILE_CLUSTER;
      
      	/* try to find an empty (even not aligned) cluster. */
      	offset = si->lowest_bit;
       check_next_cluster:
  54  	if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit)
      	{
      		int nr;
  57  		for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++)
  58  			if (si->swap_map[nr])
      			{
      				offset = nr+1;
  61  				goto check_next_cluster;
      			}
      		/* We found a completly empty cluster, so start
      		 * using it.
      		 */
  66  		goto got_page;
      	}
      	/* No luck, so now go finegrined as usual. -Andrea */
  69  	for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
  70  		if (si->swap_map[offset])
  71  			continue;
      	got_page:
  73  		if (offset == si->lowest_bit)
      			si->lowest_bit++;
  75  		if (offset == si->highest_bit)
      			si->highest_bit--;
      		si->swap_map[offset] = count;
      		nr_swap_pages--;
      		si->cluster_next = offset+1;
  80  		return offset;
      	}
  82  	return 0;
      }
      
  85  swp_entry_t __get_swap_page(unsigned short count)
      {
      	struct swap_info_struct * p;
      	unsigned long offset;
      	swp_entry_t entry;
      	int type, wrapped = 0;
      
      	entry.val = 0;	/* Out of memory */
  93  	if (count >= SWAP_MAP_MAX)
  94  		goto bad_count;
      	swap_list_lock();
      	type = swap_list.next;
  97  	if (type < 0)
  98  		goto out;
  99  	if (nr_swap_pages == 0)
 100  		goto out;
      
 102  	while (1) {
      		p = &swap_info[type];
 104  		if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
      			swap_device_lock(p);
      			offset = scan_swap_map(p, count);
 107  			swap_device_unlock(p);
 108  			if (offset) {
      				entry = SWP_ENTRY(type,offset);
      				type = swap_info[type].next;
      				if (type < 0 ||
 112  					p->prio != swap_info[type].prio) {
      						swap_list.next = swap_list.head;
 114  				} else {
      					swap_list.next = type;
      				}
 117  				goto out;
      			}
      		}
      		type = p->next;
 121  		if (!wrapped) {
 122  			if (type < 0 || p->prio != swap_info[type].prio) {
      				type = swap_list.head;
      				wrapped = 1;
      			}
      		} else
 127  			if (type < 0)
 128  				goto out;	/* out of swap space */
      	}
      out:
 131  	swap_list_unlock();
 132  	return entry;
      
      bad_count:
      	printk(KERN_ERR "get_swap_page: bad count %hd from %p\n",
      	       count, __builtin_return_address(0));
 137  	goto out;
      }
      
      
      /*
       * Caller has made sure that the swapdevice corresponding to entry
       * is still around or has not been recycled.
       */
 145  void __swap_free(swp_entry_t entry, unsigned short count)
      {
      	struct swap_info_struct * p;
      	unsigned long offset, type;
      
 150  	if (!entry.val)
 151  		goto out;
      
      	type = SWP_TYPE(entry);
 154  	if (type >= nr_swapfiles)
 155  		goto bad_nofile;
      	p = & swap_info[type];
 157  	if (!(p->flags & SWP_USED))
 158  		goto bad_device;
      	offset = SWP_OFFSET(entry);
 160  	if (offset >= p->max)
 161  		goto bad_offset;
 162  	if (!p->swap_map[offset])
 163  		goto bad_free;
      	swap_list_lock();
 165  	if (p->prio > swap_info[swap_list.next].prio)
      		swap_list.next = type;
      	swap_device_lock(p);
 168  	if (p->swap_map[offset] < SWAP_MAP_MAX) {
 169  		if (p->swap_map[offset] < count)
 170  			goto bad_count;
 171  		if (!(p->swap_map[offset] -= count)) {
 172  			if (offset < p->lowest_bit)
      				p->lowest_bit = offset;
 174  			if (offset > p->highest_bit)
      				p->highest_bit = offset;
      			nr_swap_pages++;
      		}
      	}
 179  	swap_device_unlock(p);
 180  	swap_list_unlock();
      out:
 182  	return;
      
      bad_nofile:
      	printk("swap_free: Trying to free nonexistent swap-page\n");
 186  	goto out;
      bad_device:
      	printk("swap_free: Trying to free swap from unused swap-device\n");
 189  	goto out;
      bad_offset:
      	printk("swap_free: offset exceeds max\n");
 192  	goto out;
      bad_free:
      	printk("VM: Bad swap entry %08lx\n", entry.val);
 195  	goto out;
      bad_count:
 197  	swap_device_unlock(p);
 198  	swap_list_unlock();
      	printk(KERN_ERR "VM: Bad count %hd current count %hd\n", count, p->swap_map[offset]);
 200  	goto out;
      }
      
      /*
       * The swap entry has been read in advance, and we return 1 to indicate
       * that the page has been used or is no longer needed.
       *
       * Always set the resulting pte to be nowrite (the same as COW pages
       * after one process has exited).  We don't know just how many PTEs will
       * share this swap entry, so be cautious and let do_wp_page work out
       * what to do if a write is requested later.
       */
 212  static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
      	pte_t *dir, swp_entry_t entry, struct page* page)
      {
      	pte_t pte = *dir;
      
 217  	if (pte_none(pte))
 218  		return;
 219  	if (pte_present(pte)) {
      		/* If this entry is swap-cached, then page must already
                         hold the right address for any copies in physical
                         memory */
 223  		if (pte_page(pte) != page)
 224  			return;
      		/* We will be removing the swap cache in a moment, so... */
      		ptep_mkdirty(dir);
 227  		return;
      	}
 229  	if (pte_to_swp_entry(pte).val != entry.val)
 230  		return;
      	set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
      	swap_free(entry);
      	get_page(page);
      	++vma->vm_mm->rss;
      }
      
 237  static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
      	unsigned long address, unsigned long size, unsigned long offset,
      	swp_entry_t entry, struct page* page)
      {
      	pte_t * pte;
      	unsigned long end;
      
 244  	if (pmd_none(*dir))
 245  		return;
 246  	if (pmd_bad(*dir)) {
      		pmd_ERROR(*dir);
 248  		pmd_clear(dir);
 249  		return;
      	}
      	pte = pte_offset(dir, address);
      	offset += address & PMD_MASK;
      	address &= ~PMD_MASK;
      	end = address + size;
 255  	if (end > PMD_SIZE)
      		end = PMD_SIZE;
 257  	do {
      		unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page);
      		address += PAGE_SIZE;
      		pte++;
 261  	} while (address && (address < end));
      }
      
 264  static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
      	unsigned long address, unsigned long size,
      	swp_entry_t entry, struct page* page)
      {
      	pmd_t * pmd;
      	unsigned long offset, end;
      
 271  	if (pgd_none(*dir))
 272  		return;
 273  	if (pgd_bad(*dir)) {
      		pgd_ERROR(*dir);
 275  		pgd_clear(dir);
 276  		return;
      	}
      	pmd = pmd_offset(dir, address);
      	offset = address & PGDIR_MASK;
      	address &= ~PGDIR_MASK;
      	end = address + size;
 282  	if (end > PGDIR_SIZE)
      		end = PGDIR_SIZE;
 284  	if (address >= end)
 285  		BUG();
 286  	do {
      		unuse_pmd(vma, pmd, address, end - address, offset, entry,
      			  page);
      		address = (address + PMD_SIZE) & PMD_MASK;
      		pmd++;
 291  	} while (address && (address < end));
      }
      
 294  static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
      			swp_entry_t entry, struct page* page)
      {
      	unsigned long start = vma->vm_start, end = vma->vm_end;
      
 299  	if (start >= end)
 300  		BUG();
 301  	do {
      		unuse_pgd(vma, pgdir, start, end - start, entry, page);
      		start = (start + PGDIR_SIZE) & PGDIR_MASK;
      		pgdir++;
 305  	} while (start && (start < end));
      }
      
 308  static void unuse_process(struct mm_struct * mm,
      			swp_entry_t entry, struct page* page)
      {
      	struct vm_area_struct* vma;
      
      	/*
      	 * Go through process' page directory.
      	 */
 316  	if (!mm)
 317  		return;
      	spin_lock(&mm->page_table_lock);
 319  	for (vma = mm->mmap; vma; vma = vma->vm_next) {
      		pgd_t * pgd = pgd_offset(mm, vma->vm_start);
      		unuse_vma(vma, pgd, entry, page);
      	}
 323  	spin_unlock(&mm->page_table_lock);
 324  	return;
      }
      
      /*
       * We completely avoid races by reading each swap page in advance,
       * and then search for the process using it.  All the necessary
       * page table adjustments can then be made atomically.
       */
 332  static int try_to_unuse(unsigned int type)
      {
      	struct swap_info_struct * si = &swap_info[type];
      	struct task_struct *p;
      	struct page *page;
      	swp_entry_t entry;
      	int i;
      
 340  	while (1) {
      		/*
      		 * Find a swap page in use and read it in.
      		 */
      		swap_device_lock(si);
 345  		for (i = 1; i < si->max ; i++) {
 346  			if (si->swap_map[i] > 0 && si->swap_map[i] != SWAP_MAP_BAD) {
      				/*
      				 * Prevent swaphandle from being completely
      				 * unused by swap_free while we are trying
      				 * to read in the page - this prevents warning
      				 * messages from rw_swap_page_base.
      				 */
 353  				if (si->swap_map[i] != SWAP_MAP_MAX)
      					si->swap_map[i]++;
 355  				swap_device_unlock(si);
 356  				goto found_entry;
      			}
      		}
 359  		swap_device_unlock(si);
 360  		break;
      
      	found_entry:
      		entry = SWP_ENTRY(type, i);
      
      		/* Get a page for the entry, using the existing swap
                         cache page if there is one.  Otherwise, get a clean
                         page and read the swap into it. */
      		page = read_swap_cache(entry);
 369  		if (!page) {
      			swap_free(entry);
 371    			return -ENOMEM;
      		}
 373  		if (PageSwapCache(page))
      			delete_from_swap_cache(page);
      		read_lock(&tasklist_lock);
 376  		for_each_task(p)
      			unuse_process(p->mm, entry, page);
 378  		read_unlock(&tasklist_lock);
      		shmem_unuse(entry, page);
      		/* Now get rid of the extra reference to the temporary
                         page we've been using. */
      		page_cache_release(page);
      		/*
      		 * Check for and clear any overflowed swap map counts.
      		 */
      		swap_free(entry);
      		swap_list_lock();
      		swap_device_lock(si);
 389  		if (si->swap_map[i] > 0) {
 390  			if (si->swap_map[i] != SWAP_MAP_MAX)
      				printk("VM: Undead swap entry %08lx\n", 
      								entry.val);
      			nr_swap_pages++;
      			si->swap_map[i] = 0;
      		}
 396  		swap_device_unlock(si);
 397  		swap_list_unlock();
      	}
 399  	return 0;
      }
      
 402  asmlinkage long sys_swapoff(const char * specialfile)
      {
      	struct swap_info_struct * p = NULL;
      	struct nameidata nd;
      	int i, type, prev;
      	int err;
      	
 409  	if (!capable(CAP_SYS_ADMIN))
 410  		return -EPERM;
      
      	err = user_path_walk(specialfile, &nd);
 413  	if (err)
 414  		goto out;
      
 416  	lock_kernel();
      	prev = -1;
      	swap_list_lock();
 419  	for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
      		p = swap_info + type;
 421  		if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 422  			if (p->swap_file) {
 423  				if (p->swap_file == nd.dentry)
 424  				  break;
 425  			} else {
      				if (S_ISBLK(nd.dentry->d_inode->i_mode)
 427  				    && (p->swap_device == nd.dentry->d_inode->i_rdev))
 428  				  break;
      			}
      		}
      		prev = type;
      	}
      	err = -EINVAL;
 434  	if (type < 0) {
 435  		swap_list_unlock();
 436  		goto out_dput;
      	}
      
 439  	if (prev < 0) {
      		swap_list.head = p->next;
 441  	} else {
      		swap_info[prev].next = p->next;
      	}
 444  	if (type == swap_list.next) {
      		/* just pick something that's safe... */
      		swap_list.next = swap_list.head;
      	}
      	nr_swap_pages -= p->pages;
 449  	swap_list_unlock();
      	p->flags = SWP_USED;
      	err = try_to_unuse(type);
 452  	if (err) {
      		/* re-insert swap space back into swap_list */
      		swap_list_lock();
 455  		for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
 456  			if (p->prio >= swap_info[i].prio)
 457  				break;
      		p->next = i;
 459  		if (prev < 0)
      			swap_list.head = swap_list.next = p - swap_info;
 461  		else
      			swap_info[prev].next = p - swap_info;
      		nr_swap_pages += p->pages;
 464  		swap_list_unlock();
      		p->flags = SWP_WRITEOK;
 466  		goto out_dput;
      	}
 468  	if (p->swap_device)
      		blkdev_put(nd.dentry->d_inode->i_bdev, BDEV_SWAP);
      	path_release(&nd);
      
      	nd.dentry = p->swap_file;
      	p->swap_file = NULL;
      	nd.mnt = p->swap_vfsmnt;
      	p->swap_vfsmnt = NULL;
      	p->swap_device = 0;
      	vfree(p->swap_map);
      	p->swap_map = NULL;
      	p->flags = 0;
      	err = 0;
      
      out_dput:
 483  	unlock_kernel();
      	path_release(&nd);
      out:
 486  	return err;
      }
      
 489  int get_swaparea_info(char *buf)
      {
      	char * page = (char *) __get_free_page(GFP_KERNEL);
      	struct swap_info_struct *ptr = swap_info;
      	int i, j, len = 0, usedswap;
      
 495  	if (!page)
 496  		return -ENOMEM;
      
      	len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
 499  	for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
 500  		if (ptr->flags & SWP_USED) {
      			char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt,
      						page, PAGE_SIZE);
      
      			len += sprintf(buf + len, "%-31s ", path);
      
 506  			if (!ptr->swap_device)
      				len += sprintf(buf + len, "file\t\t");
 508  			else
      				len += sprintf(buf + len, "partition\t");
      
      			usedswap = 0;
 512  			for (j = 0; j < ptr->max; ++j)
 513  				switch (ptr->swap_map[j]) {
 514  					case SWAP_MAP_BAD:
 515  					case 0:
 516  						continue;
 517  					default:
      						usedswap++;
      				}
      			len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), 
      				usedswap << (PAGE_SHIFT - 10), ptr->prio);
      		}
      	}
      	free_page((unsigned long) page);
 525  	return len;
      }
      
 528  int is_swap_partition(kdev_t dev) {
      	struct swap_info_struct *ptr = swap_info;
      	int i;
      
 532  	for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
 533  		if (ptr->flags & SWP_USED)
 534  			if (ptr->swap_device == dev)
 535  				return 1;
      	}
 537  	return 0;
      }
      
      /*
       * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
       *
       * The swapon system call
       */
 545  asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
      {
      	struct swap_info_struct * p;
      	struct nameidata nd;
      	struct inode * swap_inode;
      	unsigned int type;
      	int i, j, prev;
      	int error;
      	static int least_priority = 0;
      	union swap_header *swap_header = 0;
      	int swap_header_version;
      	int nr_good_pages = 0;
      	unsigned long maxpages;
      	int swapfilesize;
      	struct block_device *bdev = NULL;
      	
 561  	if (!capable(CAP_SYS_ADMIN))
 562  		return -EPERM;
 563  	lock_kernel();
      	p = swap_info;
 565  	for (type = 0 ; type < nr_swapfiles ; type++,p++)
 566  		if (!(p->flags & SWP_USED))
 567  			break;
      	error = -EPERM;
 569  	if (type >= MAX_SWAPFILES)
 570  		goto out;
 571  	if (type >= nr_swapfiles)
      		nr_swapfiles = type+1;
      	p->flags = SWP_USED;
      	p->swap_file = NULL;
      	p->swap_vfsmnt = NULL;
      	p->swap_device = 0;
      	p->swap_map = NULL;
      	p->lowest_bit = 0;
      	p->highest_bit = 0;
      	p->cluster_nr = 0;
      	p->sdev_lock = SPIN_LOCK_UNLOCKED;
      	p->max = 1;
      	p->next = -1;
 584  	if (swap_flags & SWAP_FLAG_PREFER) {
      		p->prio =
      		  (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
 587  	} else {
      		p->prio = --least_priority;
      	}
      	error = user_path_walk(specialfile, &nd);
 591  	if (error)
 592  		goto bad_swap_2;
      
      	p->swap_file = nd.dentry;
      	p->swap_vfsmnt = nd.mnt;
      	swap_inode = nd.dentry->d_inode;
      	error = -EINVAL;
      
 599  	if (S_ISBLK(swap_inode->i_mode)) {
      		kdev_t dev = swap_inode->i_rdev;
      		struct block_device_operations *bdops;
      
      		p->swap_device = dev;
      		set_blocksize(dev, PAGE_SIZE);
      		
      		bdev = swap_inode->i_bdev;
      		bdops = devfs_get_ops(devfs_get_handle_from_inode(swap_inode));
 608  		if (bdops) bdev->bd_op = bdops;
      
      		error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP);
 611  		if (error)
 612  			goto bad_swap_2;
      		set_blocksize(dev, PAGE_SIZE);
      		error = -ENODEV;
      		if (!dev || (blk_size[MAJOR(dev)] &&
 616  		     !blk_size[MAJOR(dev)][MINOR(dev)]))
 617  			goto bad_swap;
      		error = -EBUSY;
 619  		for (i = 0 ; i < nr_swapfiles ; i++) {
 620  			if (i == type)
 621  				continue;
 622  			if (dev == swap_info[i].swap_device)
 623  				goto bad_swap;
      		}
      		swapfilesize = 0;
 626  		if (blk_size[MAJOR(dev)])
      			swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
      				>> (PAGE_SHIFT - 10);
 629  	} else if (S_ISREG(swap_inode->i_mode)) {
      		error = -EBUSY;
 631  		for (i = 0 ; i < nr_swapfiles ; i++) {
 632  			if (i == type || !swap_info[i].swap_file)
 633  				continue;
 634  			if (swap_inode == swap_info[i].swap_file->d_inode)
 635  				goto bad_swap;
      		}
      		swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
 638  	} else
 639  		goto bad_swap;
      
      	swap_header = (void *) __get_free_page(GFP_USER);
 642  	if (!swap_header) {
      		printk("Unable to start swapping: out of memory :-)\n");
      		error = -ENOMEM;
 645  		goto bad_swap;
      	}
      
      	lock_page(virt_to_page(swap_header));
      	rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header, 1);
      
 651  	if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
      		swap_header_version = 1;
 653  	else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
      		swap_header_version = 2;
 655  	else {
      		printk("Unable to find swap-space signature\n");
      		error = -EINVAL;
 658  		goto bad_swap;
      	}
      	
 661  	switch (swap_header_version) {
 662  	case 1:
      		memset(((char *) swap_header)+PAGE_SIZE-10,0,10);
      		j = 0;
      		p->lowest_bit = 0;
      		p->highest_bit = 0;
 667  		for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 668  			if (test_bit(i,(char *) swap_header)) {
 669  				if (!p->lowest_bit)
      					p->lowest_bit = i;
      				p->highest_bit = i;
      				p->max = i+1;
      				j++;
      			}
      		}
      		nr_good_pages = j;
      		p->swap_map = vmalloc(p->max * sizeof(short));
 678  		if (!p->swap_map) {
      			error = -ENOMEM;		
 680  			goto bad_swap;
      		}
 682  		for (i = 1 ; i < p->max ; i++) {
 683  			if (test_bit(i,(char *) swap_header))
      				p->swap_map[i] = 0;
 685  			else
      				p->swap_map[i] = SWAP_MAP_BAD;
      		}
 688  		break;
      
 690  	case 2:
      		/* Check the swap header's sub-version and the size of
                         the swap file and bad block lists */
 693  		if (swap_header->info.version != 1) {
      			printk(KERN_WARNING
      			       "Unable to handle swap header version %d\n",
      			       swap_header->info.version);
      			error = -EINVAL;
 698  			goto bad_swap;
      		}
      
      		p->lowest_bit  = 1;
      		p->highest_bit = swap_header->info.last_page - 1;
      		p->max	       = swap_header->info.last_page;
      
      		maxpages = SWP_OFFSET(SWP_ENTRY(0,~0UL));
 706  		if (p->max >= maxpages)
      			p->max = maxpages-1;
      
      		error = -EINVAL;
 710  		if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
 711  			goto bad_swap;
      		
      		/* OK, set up the swap map and apply the bad block list */
 714  		if (!(p->swap_map = vmalloc (p->max * sizeof(short)))) {
      			error = -ENOMEM;
 716  			goto bad_swap;
      		}
      
      		error = 0;
      		memset(p->swap_map, 0, p->max * sizeof(short));
 721  		for (i=0; i<swap_header->info.nr_badpages; i++) {
      			int page = swap_header->info.badpages[i];
 723  			if (page <= 0 || page >= swap_header->info.last_page)
      				error = -EINVAL;
 725  			else
      				p->swap_map[page] = SWAP_MAP_BAD;
      		}
      		nr_good_pages = swap_header->info.last_page -
      				swap_header->info.nr_badpages -
      				1 /* header page */;
 731  		if (error) 
 732  			goto bad_swap;
      	}
      	
 735  	if (swapfilesize && p->max > swapfilesize) {
      		printk(KERN_WARNING
      		       "Swap area shorter than signature indicates\n");
      		error = -EINVAL;
 739  		goto bad_swap;
      	}
 741  	if (!nr_good_pages) {
      		printk(KERN_WARNING "Empty swap-file\n");
      		error = -EINVAL;
 744  		goto bad_swap;
      	}
      	p->swap_map[0] = SWAP_MAP_BAD;
      	p->flags = SWP_WRITEOK;
      	p->pages = nr_good_pages;
      	swap_list_lock();
      	nr_swap_pages += nr_good_pages;
      	printk(KERN_INFO "Adding Swap: %dk swap-space (priority %d)\n",
      	       nr_good_pages<<(PAGE_SHIFT-10), p->prio);
      
      	/* insert swap space into swap_list: */
      	prev = -1;
 756  	for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
 757  		if (p->prio >= swap_info[i].prio) {
 758  			break;
      		}
      		prev = i;
      	}
      	p->next = i;
 763  	if (prev < 0) {
      		swap_list.head = swap_list.next = p - swap_info;
 765  	} else {
      		swap_info[prev].next = p - swap_info;
      	}
 768  	swap_list_unlock();
      	error = 0;
 770  	goto out;
      bad_swap:
 772  	if (bdev)
      		blkdev_put(bdev, BDEV_SWAP);
      bad_swap_2:
 775  	if (p->swap_map)
      		vfree(p->swap_map);
      	nd.mnt = p->swap_vfsmnt;
      	nd.dentry = p->swap_file;
      	p->swap_device = 0;
      	p->swap_file = NULL;
      	p->swap_vfsmnt = NULL;
      	p->swap_map = NULL;
      	p->flags = 0;
 784  	if (!(swap_flags & SWAP_FLAG_PREFER))
      		++least_priority;
      	path_release(&nd);
      out:
 788  	if (swap_header)
      		free_page((long) swap_header);
 790  	unlock_kernel();
 791  	return error;
      }
      
 794  void si_swapinfo(struct sysinfo *val)
      {
      	unsigned int i;
      	unsigned long freeswap = 0;
      	unsigned long totalswap = 0;
      
 800  	for (i = 0; i < nr_swapfiles; i++) {
      		unsigned int j;
 802  		if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
 803  			continue;
 804  		for (j = 0; j < swap_info[i].max; ++j) {
 805  			switch (swap_info[i].swap_map[j]) {
 806  				case SWAP_MAP_BAD:
 807  					continue;
 808  				case 0:
      					freeswap++;
 810  				default:
      					totalswap++;
      			}
      		}
      	}
      	val->freeswap = freeswap;
      	val->totalswap = totalswap;
 817  	return;
      }
      
      /*
       * Verify that a swap entry is valid and increment its swap map count.
       * Kernel_lock is held, which guarantees existance of swap device.
       *
       * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
       * "permanent", but will be reclaimed by the next swapoff.
       */
 827  int swap_duplicate(swp_entry_t entry)
      {
      	struct swap_info_struct * p;
      	unsigned long offset, type;
      	int result = 0;
      
      	/* Swap entry 0 is illegal */
 834  	if (!entry.val)
 835  		goto out;
      	type = SWP_TYPE(entry);
 837  	if (type >= nr_swapfiles)
 838  		goto bad_file;
      	p = type + swap_info;
      	offset = SWP_OFFSET(entry);
 841  	if (offset >= p->max)
 842  		goto bad_offset;
 843  	if (!p->swap_map[offset])
 844  		goto bad_unused;
      	/*
      	 * Entry is valid, so increment the map count.
      	 */
      	swap_device_lock(p);
 849  	if (p->swap_map[offset] < SWAP_MAP_MAX)
      		p->swap_map[offset]++;
 851  	else {
      		static int overflow = 0;
 853  		if (overflow++ < 5)
      			printk("VM: swap entry overflow\n");
      		p->swap_map[offset] = SWAP_MAP_MAX;
      	}
 857  	swap_device_unlock(p);
      	result = 1;
      out:
 860  	return result;
      
      bad_file:
      	printk("Bad swap file entry %08lx\n", entry.val);
 864  	goto out;
      bad_offset:
      	printk("Bad swap offset entry %08lx\n", entry.val);
 867  	goto out;
      bad_unused:
      	printk("Unused swap offset entry in swap_dup %08lx\n", entry.val);
 870  	goto out;
      }
      
      /*
       * Page lock needs to be held in all cases to prevent races with
       * swap file deletion.
       */
 877  int swap_count(struct page *page)
      {
      	struct swap_info_struct * p;
      	unsigned long offset, type;
      	swp_entry_t entry;
      	int retval = 0;
      
      	entry.val = page->index;
 885  	if (!entry.val)
 886  		goto bad_entry;
      	type = SWP_TYPE(entry);
 888  	if (type >= nr_swapfiles)
 889  		goto bad_file;
      	p = type + swap_info;
      	offset = SWP_OFFSET(entry);
 892  	if (offset >= p->max)
 893  		goto bad_offset;
 894  	if (!p->swap_map[offset])
 895  		goto bad_unused;
      	retval = p->swap_map[offset];
      out:
 898  	return retval;
      
      bad_entry:
      	printk(KERN_ERR "swap_count: null entry!\n");
 902  	goto out;
      bad_file:
      	printk("Bad swap file entry %08lx\n", entry.val);
 905  	goto out;
      bad_offset:
      	printk("Bad swap offset entry %08lx\n", entry.val);
 908  	goto out;
      bad_unused:
      	printk("Unused swap offset entry in swap_count %08lx\n", entry.val);
 911  	goto out;
      }
      
      /*
       * Kernel_lock protects against swap device deletion.
       */
 917  void get_swaphandle_info(swp_entry_t entry, unsigned long *offset, 
      			kdev_t *dev, struct inode **swapf)
      {
      	unsigned long type;
      	struct swap_info_struct *p;
      
      	type = SWP_TYPE(entry);
 924  	if (type >= nr_swapfiles) {
      		printk("Internal error: bad swap-device\n");
 926  		return;
      	}
      
      	p = &swap_info[type];
      	*offset = SWP_OFFSET(entry);
 931  	if (*offset >= p->max) {
      		printk("rw_swap_page: weirdness\n");
 933  		return;
      	}
 935  	if (p->swap_map && !p->swap_map[*offset]) {
      		printk("VM: Bad swap entry %08lx\n", entry.val);
 937  		return;
      	}
 939  	if (!(p->flags & SWP_USED)) {
      		printk(KERN_ERR "rw_swap_page: "
      			"Trying to swap to unused swap-device\n");
 942  		return;
      	}
      
 945  	if (p->swap_device) {
      		*dev = p->swap_device;
 947  	} else if (p->swap_file) {
      		*swapf = p->swap_file->d_inode;
 949  	} else {
      		printk(KERN_ERR "rw_swap_page: no swap file or device\n");
      	}
 952  	return;
      }
      
      /*
       * Kernel_lock protects against swap device deletion. Grab an extra
       * reference on the swaphandle so that it dos not become unused.
       */
 959  int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
      {
      	int ret = 0, i = 1 << page_cluster;
      	unsigned long toff;
      	struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info;
      
      	*offset = SWP_OFFSET(entry);
      	toff = *offset = (*offset >> page_cluster) << page_cluster;
      
      	swap_device_lock(swapdev);
 969  	do {
      		/* Don't read-ahead past the end of the swap area */
 971  		if (toff >= swapdev->max)
 972  			break;
      		/* Don't read in bad or busy pages */
 974  		if (!swapdev->swap_map[toff])
 975  			break;
 976  		if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
 977  			break;
      		swapdev->swap_map[toff]++;
      		toff++;
      		ret++;
 981  	} while (--i);
 982  	swap_device_unlock(swapdev);
 983  	return ret;
      }