patch-2.3.23 linux/fs/proc/array.c

Next file: linux/fs/proc/mem.c
Previous file: linux/fs/nfs/symlink.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.22/linux/fs/proc/array.c linux/fs/proc/array.c
@@ -38,6 +38,7 @@
  *
  * aeb@cwi.nl        :  /proc/partitions
  *
+ *
  * Alan Cox	     :  security fixes.
  *			<Alan.Cox@linux.org>
  *
@@ -45,11 +46,6 @@
  *
  * Gerhard Wichert   :  added BIGMEM support
  * Siemens AG           <Gerhard.Wichert@pdb.siemens.de>
- *
- * Chuck Lever       :  safe handling of task_struct
- *                      <cel@monkey.org>
- *
- * Andrea Arcangeli  :	SMP race/security fixes.
  */
 
 #include <linux/types.h>
@@ -71,7 +67,6 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
-#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -365,16 +360,24 @@
 	struct sysinfo i;
 	int len;
 
+/*
+ * display in kilobytes.
+ */
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+
 	si_meminfo(&i);
 	si_swapinfo(&i);
 	len = sprintf(buffer, "        total:    used:    free:  shared: buffers:  cached:\n"
-		"Mem:  %8lu %8lu %8lu %8lu %8lu %8lu\n"
+		"Mem:  %8lu %8lu %8lu %8lu %8lu %8u\n"
 		"Swap: %8lu %8lu %8lu\n",
-		i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram, (unsigned long) atomic_read(&page_cache_size)*PAGE_SIZE,
-		i.totalswap, i.totalswap-i.freeswap, i.freeswap);
+		K(i.totalram), K(i.totalram-i.freeram), K(i.freeram),
+		K(i.sharedram), K(i.bufferram),
+		K(atomic_read(&page_cache_size)), K(i.totalswap),
+		K(i.totalswap-i.freeswap), K(i.freeswap));
 	/*
-	 * Tagged format, for easy grepping and expansion. The above will go away
-	 * eventually, once the tools have been updated.
+	 * Tagged format, for easy grepping and expansion.
+	 * The above will go away eventually, once the tools
+	 * have been updated.
 	 */
 	return len + sprintf(buffer+len,
 		"MemTotal:  %8lu kB\n"
@@ -382,19 +385,20 @@
 		"MemShared: %8lu kB\n"
 		"Buffers:   %8lu kB\n"
 		"Cached:    %8u kB\n"
-		"BigTotal:  %8lu kB\n"
-		"BigFree:   %8lu kB\n"
+		"HighTotal: %8lu kB\n"
+		"HighFree:  %8lu kB\n"
 		"SwapTotal: %8lu kB\n"
 		"SwapFree:  %8lu kB\n",
-		i.totalram >> 10,
-		i.freeram >> 10,
-		i.sharedram >> 10,
-		i.bufferram >> 10,
-		atomic_read(&page_cache_size) << (PAGE_SHIFT - 10),
-		i.totalbig >> 10,
-		i.freebig >> 10,
-		i.totalswap >> 10,
-		i.freeswap >> 10);
+		K(i.totalram),
+		K(i.freeram),
+		K(i.sharedram),
+		K(i.bufferram),
+		K(atomic_read(&page_cache_size)),
+		K(i.totalhigh),
+		K(i.freehigh),
+		K(i.totalswap),
+		K(i.freeswap));
+#undef K
 }
 
 static int get_version(char * buffer)
@@ -412,69 +416,68 @@
 	return sprintf(buffer, "%s\n", saved_command_line);
 }
 
-static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr)
+static struct page * get_phys_addr(struct mm_struct * mm, unsigned long ptr)
 {
-	pgd_t *page_dir;
-	pmd_t *page_middle;
+	pgd_t *pgd;
+	pmd_t *pmd;
 	pte_t pte;
 
 	if (ptr >= TASK_SIZE)
 		return 0;
-	page_dir = pgd_offset(mm,ptr);
-	if (pgd_none(*page_dir))
+	pgd = pgd_offset(mm,ptr);
+	if (pgd_none(*pgd))
 		return 0;
-	if (pgd_bad(*page_dir)) {
-		printk("bad page directory entry %08lx\n", pgd_val(*page_dir));
-		pgd_clear(page_dir);
+	if (pgd_bad(*pgd)) {
+		pgd_ERROR(*pgd);
+		pgd_clear(pgd);
 		return 0;
 	}
-	page_middle = pmd_offset(page_dir,ptr);
-	if (pmd_none(*page_middle))
+	pmd = pmd_offset(pgd,ptr);
+	if (pmd_none(*pmd))
 		return 0;
-	if (pmd_bad(*page_middle)) {
-		printk("bad page middle entry %08lx\n", pmd_val(*page_middle));
-		pmd_clear(page_middle);
+	if (pmd_bad(*pmd)) {
+		pmd_ERROR(*pmd);
+		pmd_clear(pmd);
 		return 0;
 	}
-	pte = *pte_offset(page_middle,ptr);
+	pte = *pte_offset(pmd,ptr);
 	if (!pte_present(pte))
 		return 0;
-	return pte_page(pte) + (ptr & ~PAGE_MASK);
+	return pte_page(pte);
 }
 
-#include <linux/bigmem.h>
-
 static int get_array(struct mm_struct *mm, unsigned long start, unsigned long end, char * buffer)
 {
-	unsigned long addr;
+	struct page *page;
+	unsigned long kaddr;
 	int size = 0, result = 0;
 	char c;
 
 	if (start >= end)
 		return result;
 	for (;;) {
-		addr = get_phys_addr(mm, start);
-		if (!addr)
+		page = get_phys_addr(mm, start);
+		if (!page)
 			return result;
-		addr = kmap(addr, KM_READ);
+		kaddr = kmap(page, KM_READ) + (start & ~PAGE_MASK);
 		do {
-			c = *(char *) addr;
+			c = *(char *) kaddr;
 			if (!c)
 				result = size;
 			if (size < PAGE_SIZE)
 				buffer[size++] = c;
 			else {
-				kunmap(addr, KM_READ);
+				kunmap(kaddr, KM_READ);
 				return result;
 			}
-			addr++;
+			kaddr++;
 			start++;
 			if (!c && start >= end) {
-				kunmap(addr, KM_READ);
+				kunmap(kaddr, KM_READ);
 				return result;
 			}
-		} while (addr & ~PAGE_MASK);
-		kunmap(addr-1, KM_READ);
+		} while (kaddr & ~PAGE_MASK);
+		kunmap(kaddr, KM_READ);
 	}
 	return result;
 }
@@ -483,9 +486,7 @@
 {
 	struct task_struct *p;
 	struct mm_struct *mm = NULL;
-
-	/* need kernel lock to avoid the tsk->mm to go away under us */
-	lock_kernel();
+	
 	read_lock(&tasklist_lock);
 	p = find_task_by_pid(pid);
 	if (p)
@@ -493,10 +494,10 @@
 	if (mm)
 		atomic_inc(&mm->mm_users);
 	read_unlock(&tasklist_lock);
-	unlock_kernel();
 	return mm;
 }
 
+
 static int get_env(int pid, char * buffer)
 {
 	struct mm_struct *mm = get_mm(pid);
@@ -849,9 +850,6 @@
 	return buffer;
 }
 
-/*
- * These next two assume that the task's sigmask_lock is held by the caller.
- */
 static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
 				    sigset_t *catch)
 {
@@ -904,115 +902,77 @@
 			    cap_t(p->cap_effective));
 }
 
-/*
- * This is somewhat safer than it was before.  However...
- *
- * Embedded pointers in the task structure may reference data that
- * can be changed or that is no longer valid after the tasklist
- * lock is released, or that isn't even protected by the tasklist
- * lock.  Eg. tsk->tty, tsk->sig, and tsk->p_pptr can change after
- * we make our own copy of the task structure.  This doesn't matter
- * unless we are trying to use the pointed-to data as an address.
- * So there are still a few safety issues to be addressed here.
- */
+
 static int get_status(int pid, char * buffer)
 {
 	char * orig = buffer;
 	struct task_struct *tsk;
 	struct mm_struct *mm = NULL;
 
-	/*
-	 * We lock the whole kernel here because p->files and p->mm are still
-	 * protected by the global kernel lock.
-	 */
-	lock_kernel();
-
 	read_lock(&tasklist_lock);
 	tsk = find_task_by_pid(pid);
-	if (tsk) {
+	if (tsk)
 		mm = tsk->mm;
-		if (mm)
-			atomic_inc(&mm->mm_users);
-
-		buffer = task_name(tsk, buffer);
-		buffer = task_state(tsk, buffer);
-
-		spin_lock_irq(&tsk->sigmask_lock);
-		buffer = task_sig(tsk, buffer);
-		spin_unlock_irq(&tsk->sigmask_lock);
-
-		buffer = task_cap(tsk, buffer);
-	}
-	read_unlock(&tasklist_lock);
-
-	unlock_kernel();
-
-	/*
-	 * We can't hold the tasklist_lock and jiggle the mmap_sem --
-	 * that can result in a deadlock.
-	 */
-	if (mm) {
+	if (mm)
+		atomic_inc(&mm->mm_users);
+	read_unlock(&tasklist_lock);	/* FIXME!! This should be done after the last use */
+	if (!tsk)
+		return 0;
+	buffer = task_name(tsk, buffer);
+	buffer = task_state(tsk, buffer);
+	if (mm)
 		buffer = task_mem(mm, buffer);
+	buffer = task_sig(tsk, buffer);
+	buffer = task_cap(tsk, buffer);
+	if (mm)
 		mmput(mm);
-	}
-
-	/*
-	 * (buffer - orig) will be zero on an error exit.
-	 */
 	return buffer - orig;
 }
 
 static int get_stat(int pid, char * buffer)
 {
 	struct task_struct *tsk;
-	struct mm_struct *mm;
+	struct mm_struct *mm = NULL;
 	unsigned long vsize, eip, esp, wchan;
 	long priority, nice;
-	pid_t ppid = 0;
+	int tty_pgrp;
 	sigset_t sigign, sigcatch;
 	char state;
-	int res = 0;
-	unsigned int tty_device;
-	int tty_pgrp;
+	int res;
 
 	read_lock(&tasklist_lock);
 	tsk = find_task_by_pid(pid);
-	if (!tsk)
-		goto out_unlock;
-	/* avoid the task list to go away under us (security) */
-	get_page(MAP_NR(tsk) + mem_map);
-	ppid = tsk->p_pptr->pid;
-	read_unlock(&tasklist_lock);
-
-	/* we need the big kernel lock to avoid tsk->mm and tsk->tty
-	   to change under us */
-	lock_kernel();
-	mm = tsk->mm;
+	if (tsk)
+		mm = tsk->mm;
 	if (mm)
 		atomic_inc(&mm->mm_users);
-	tty_device = tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0;
-	tty_pgrp = tsk->tty ? tsk->tty->pgrp : -1;
-	unlock_kernel();
-
-	spin_lock_irq(&tsk->sigmask_lock);
-	collect_sigign_sigcatch(tsk, &sigign, &sigcatch);
-	spin_unlock_irq(&tsk->sigmask_lock);
-
-	eip = KSTK_EIP(tsk);
-	esp = KSTK_ESP(tsk);
-	wchan = get_wchan(tsk);
-
+	read_unlock(&tasklist_lock);	/* FIXME!! This should be done after the last use */
+	if (!tsk)
+		return 0;
 	state = *get_task_state(tsk);
 	vsize = eip = esp = 0;
-	if (mm)
-	{
+	if (mm) {
 		struct vm_area_struct *vma;
 		down(&mm->mmap_sem);
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
+		vma = mm->mmap;
+		while (vma) {
 			vsize += vma->vm_end - vma->vm_start;
+			vma = vma->vm_next;
+		}
+		eip = KSTK_EIP(tsk);
+		esp = KSTK_ESP(tsk);
 		up(&mm->mmap_sem);
 	}
 
+	wchan = get_wchan(tsk);
+
+	collect_sigign_sigcatch(tsk, &sigign, &sigcatch);
+
+	if (tsk->tty)
+		tty_pgrp = tsk->tty->pgrp;
+	else
+		tty_pgrp = -1;
+
 	/* scale priority and nice values from timeslices to -20..20 */
 	/* to make it look like a "normal" Unix priority/nice value  */
 	priority = tsk->counter;
@@ -1026,10 +986,10 @@
 		pid,
 		tsk->comm,
 		state,
-		ppid,
+		tsk->p_pptr->pid,
 		tsk->pgrp,
 		tsk->session,
-		tty_device,
+	        tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0,
 		tty_pgrp,
 		tsk->flags,
 		tsk->min_flt,
@@ -1066,16 +1026,9 @@
 		tsk->cnswap,
 		tsk->exit_signal,
 		tsk->processor);
-
 	if (mm)
 		mmput(mm);
-	free_task_struct(tsk);
 	return res;
-
-out_unlock:
-	read_unlock(&tasklist_lock);
-	unlock_kernel();
-	return 0;
 }
 		
 static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
@@ -1087,7 +1040,7 @@
 	if (pmd_none(*pmd))
 		return;
 	if (pmd_bad(*pmd)) {
-		printk("statm_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+		pmd_ERROR(*pmd);
 		pmd_clear(pmd);
 		return;
 	}
@@ -1125,7 +1078,7 @@
 	if (pgd_none(*pgd))
 		return;
 	if (pgd_bad(*pgd)) {
-		printk("statm_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
+		pgd_ERROR(*pgd);
 		pgd_clear(pgd);
 		return;
 	}
@@ -1223,11 +1176,11 @@
 			  size_t count, loff_t *ppos)
 {
 	struct task_struct *p;
-	struct mm_struct *mm = NULL;
 	struct vm_area_struct * map, * next;
 	char * destptr = buf, * buffer;
 	loff_t lineno;
 	ssize_t column, i;
+	int volatile_task;
 	long retval;
 
 	/*
@@ -1239,30 +1192,24 @@
 		goto out;
 
 	retval = -EINVAL;
-	lock_kernel();
 	read_lock(&tasklist_lock);
 	p = find_task_by_pid(pid);
-	if (p) {
-		mm = p->mm;
-		if (mm)
-			atomic_inc(&mm->mm_users);
-	}
-	read_unlock(&tasklist_lock);
-	unlock_kernel();
+	read_unlock(&tasklist_lock);	/* FIXME!! This should be done after the last use */
 	if (!p)
 		goto freepage_out;
 
-	/* nothing to map */
-	if (!mm || count == 0)
+	if (!p->mm || count == 0)
 		goto getlen_out;
 
+	/* Check whether the mmaps could change if we sleep */
+	volatile_task = (p != current || atomic_read(&p->mm->mm_users) > 1);
+
 	/* decode f_pos */
 	lineno = *ppos >> MAPS_LINE_SHIFT;
 	column = *ppos & (MAPS_LINE_LENGTH-1);
 
-	down(&mm->mmap_sem);
-	/* quickly go to line "lineno" */
-	for (map = mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++)
+	/* quickly go to line lineno */
+	for (map = p->mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++)
 		continue;
 
 	for ( ; map ; map = next ) {
@@ -1333,13 +1280,17 @@
 		/* done? */
 		if (count == 0)
 			break;
+
+		/* By writing to user space, we might have slept.
+		 * Stop the loop, to avoid a race condition.
+		 */
+		if (volatile_task)
+			break;
 	}
-	up(&mm->mmap_sem);
 
 	/* encode f_pos */
 	*ppos = (lineno << MAPS_LINE_SHIFT) + column;
 
-	mmput(mm);
 getlen_out:
 	retval = destptr - buf;
 
@@ -1352,31 +1303,28 @@
 #ifdef __SMP__
 static int get_pidcpu(int pid, char * buffer)
 {
-	struct task_struct * tsk;
-	int i, len = 0;
+	struct task_struct * tsk = current ;
+	int i, len;
 
-	/*
-	 * Hold the tasklist_lock to guarantee that the task_struct
-	 * address will remain valid while we examine its contents.
-	 */
 	read_lock(&tasklist_lock);
-	tsk = find_task_by_pid(pid);
-	if (tsk)
-		get_page(MAP_NR(tsk) + mem_map);
-	read_unlock(&tasklist_lock);
-	if (tsk) {
-		len = sprintf(buffer,
-			"cpu  %lu %lu\n",
-			tsk->times.tms_utime,
-			tsk->times.tms_stime);
+	if (pid != tsk->pid)
+		tsk = find_task_by_pid(pid);
+	read_unlock(&tasklist_lock);	/* FIXME!! This should be done after the last use */
+
+	if (tsk == NULL)
+		return 0;
+
+	len = sprintf(buffer,
+		"cpu  %lu %lu\n",
+		tsk->times.tms_utime,
+		tsk->times.tms_stime);
 		
-		for (i = 0 ; i < smp_num_cpus; i++)
-			len += sprintf(buffer + len, "cpu%d %lu %lu\n",
-				i,
-				tsk->per_cpu_utime[cpu_logical_map(i)],
-				tsk->per_cpu_stime[cpu_logical_map(i)]);
-		free_task_struct(tsk);
-	}
+	for (i = 0 ; i < smp_num_cpus; i++)
+		len += sprintf(buffer + len, "cpu%d %lu %lu\n",
+			i,
+			tsk->per_cpu_utime[cpu_logical_map(i)],
+			tsk->per_cpu_stime[cpu_logical_map(i)]);
+
 	return len;
 }
 #endif
@@ -1513,6 +1461,12 @@
 	int ok = 0;
 		
 	read_lock(&tasklist_lock);
+	
+	/*
+	 *	Grab the lock, find the task, save the uid and
+	 *	check it has an mm still (ie its not dead)
+	 */
+	
 	p = find_task_by_pid(pid);
 	if (p) {
 		euid=p->euid;
@@ -1520,7 +1474,9 @@
 		if(!cap_issubset(p->cap_permitted, current->cap_permitted))
 			ok=0;			
 	}
+		
 	read_unlock(&tasklist_lock);
+
 	if (!p)
 		return 1;
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)