patch-2.3.26 linux/ipc/shm.c

Next file: linux/ipc/util.c
Previous file: linux/include/net/irda/wrapper.h
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.25/linux/ipc/shm.c linux/ipc/shm.c
@@ -9,6 +9,8 @@
  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
+ * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
+ *                         Christoph Rohland <hans-christoph.rohland@sap.com>
  */
 
 #include <linux/config.h>
@@ -25,7 +27,17 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
+#include "util.h"
+
+struct shmid_kernel /* extend struct shmis_ds with private fields */
+{	
+	struct shmid_ds		u;
+	unsigned long		shm_npages; /* size of segment (pages) */
+	pte_t			**shm_dir;  /* ptr to array of ptrs to frames -> SHMMAX */ 
+	struct vm_area_struct	*attaches;  /* descriptors for attaches */
+	int                     id; /* backreference to id for shm_close */
+};
+
 static int findkey (key_t key);
 static int newseg (key_t key, int shmflg, size_t size);
 static int shm_map (struct vm_area_struct *shmd);
@@ -38,13 +50,15 @@
 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
 #endif
 
+unsigned int shm_prm[3] = {SHMMAX, SHMALL, SHMMNI};
+
 static int shm_tot = 0; /* total number of shared memory pages */
 static int shm_rss = 0; /* number of shared memory pages that are in memory */
 static int shm_swp = 0; /* number of shared memory pages that are in swap */
-static int max_shmid = 0; /* every used id is <= max_shmid */
+static int max_shmid = -1; /* every used id is <= max_shmid */
 static DECLARE_WAIT_QUEUE_HEAD(shm_wait); /* calling findkey() may need to wait */
-static struct shmid_kernel *shm_segs[SHMMNI];
-
+static struct shmid_kernel **shm_segs = NULL;
+static unsigned int num_segs = 0;
 static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
 
 spinlock_t shm_lock = SPIN_LOCK_UNLOCKED;
@@ -56,22 +70,109 @@
 
 void __init shm_init (void)
 {
-	int id;
-
-	for (id = 0; id < SHMMNI; id++)
-		shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
-	shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
-	init_waitqueue_head(&shm_wait);
 #ifdef CONFIG_PROC_FS
 	create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
 #endif
 	return;
 }
 
+#define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
+
+static pte_t **shm_alloc(unsigned long pages)
+{
+	unsigned short dir  = pages / PTRS_PER_PTE;
+	unsigned short last = pages % PTRS_PER_PTE;
+	pte_t **ret, **ptr;
+
+	ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
+	if (!ret)
+		goto out;
+
+	for (ptr = ret; ptr < ret+dir ; ptr++)
+	{
+		*ptr = (pte_t *)__get_free_page (GFP_KERNEL);
+		if (!*ptr)
+			goto free;
+		memset (*ptr, 0, PAGE_SIZE); 
+	}
+
+	/* The last one is probably not of PAGE_SIZE: we use kmalloc */
+	if (last) {
+		*ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
+		if (!*ptr)
+			goto free;
+		memset (*ptr, 0, last*sizeof(pte_t));
+	}
+out:	
+	return ret;
+
+free:
+	/* The last failed: we decrement first */
+	while (--ptr >= ret)
+		free_page ((unsigned long)*ptr);
+
+	kfree (ret);
+	return NULL;
+}
+
+
+static void shm_free(pte_t** dir, unsigned long pages)
+{
+	pte_t **ptr = dir+pages/PTRS_PER_PTE;
+
+	/* first the last page */
+	if (pages%PTRS_PER_PTE)
+		kfree (*ptr);
+	/* now the whole pages */
+	while (--ptr >= dir)
+		free_page ((unsigned long)*ptr);
+
+	/* Now the indirect block */
+	kfree (dir);
+}
+
+static int shm_expand (unsigned int size)
+{
+	int id;
+	struct shmid_kernel ** new_array;
+
+	spin_unlock(&shm_lock);
+	new_array = kmalloc (size * sizeof(struct shmid_kernel *), GFP_KERNEL);
+	spin_lock(&shm_lock);
+
+	if (!new_array)
+		return -ENOMEM;
+
+	if (size <= num_segs){ /* We check this after kmalloc so
+				   nobody changes num_segs afterwards */
+		/*
+		 * We never shrink the segment. If we shrink we have to
+		 * check for stale handles in newseg
+		 */
+		kfree (new_array);
+		return 0;
+	}
+
+	if (num_segs) {
+		memcpy (new_array, shm_segs,
+			size*sizeof(struct shmid_kernel *));
+		kfree (shm_segs);
+	}
+	for (id = num_segs; id < size; id++) 
+		new_array[id] = (void *) IPC_UNUSED;
+
+	shm_segs = new_array;
+	num_segs = size;
+	return 0;
+}
+
 static int findkey (key_t key)
 {
 	int id;
 	struct shmid_kernel *shp;
+	
+	if (!num_segs)
+		return -1;
 
 	for (id = 0; id <= max_shmid; id++) {
 		if ((shp = shm_segs[id]) == IPC_NOID) {
@@ -89,9 +190,8 @@
 			__set_current_state(TASK_RUNNING);
 			remove_wait_queue(&shm_wait, &wait);
 		}
-		if (shp == IPC_UNUSED)
-			continue;
-		if (key == shp->u.shm_perm.key)
+		if (shp != IPC_UNUSED &&
+		    key == shp->u.shm_perm.key)
 			return id;
 	}
 	return -1;
@@ -99,18 +199,31 @@
 
 /*
  * allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
+ * This has to be called with the shm_lock held
  */
 static int newseg (key_t key, int shmflg, size_t size)
 {
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
-	int id;
+	int id, err;
+	unsigned int shmall, shmmni;
+
+	shmall = shm_prm[1];
+	shmmni = shm_prm[2];
+	if (shmmni > IPCMNI) {
+		printk ("shmmni reset to max of %u\n", IPCMNI);
+		shmmni = shm_prm[2] = IPCMNI;
+	}
 
+	if (shmmni < used_segs)
+		return -ENOSPC;
+	if ((err = shm_expand (shmmni)))
+		return err;
 	if (size < SHMMIN)
 		return -EINVAL;
-	if (shm_tot + numpages >= SHMALL)
+	if (shm_tot + numpages >= shmall)
 		return -ENOSPC;
-	for (id = 0; id < SHMMNI; id++)
+	for (id = 0; id < num_segs; id++)
 		if (shm_segs[id] == IPC_UNUSED) {
 			shm_segs[id] = (struct shmid_kernel *) IPC_NOID;
 			goto found;
@@ -126,10 +239,8 @@
 		wake_up (&shm_wait);
 		return -ENOMEM;
 	}
-	lock_kernel();
-	shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t));
-	unlock_kernel();
-	if (!shp->shm_pages) {
+	shp->shm_dir = shm_alloc (numpages);
+	if (!shp->shm_dir) {
 		kfree(shp);
 		spin_lock(&shm_lock);
 		shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
@@ -137,8 +248,6 @@
 		return -ENOMEM;
 	}
 
-	memset(shp->shm_pages, 0, numpages*sizeof(pte_t));
-
 	shp->u.shm_perm.key = key;
 	shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
 	shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
@@ -150,6 +259,7 @@
 	shp->u.shm_atime = shp->u.shm_dtime = 0;
 	shp->u.shm_ctime = CURRENT_TIME;
 	shp->shm_npages = numpages;
+	shp->id = id;
 
 	spin_lock(&shm_lock);
 
@@ -161,21 +271,22 @@
 	shm_segs[id] = shp;
 	used_segs++;
 	wake_up (&shm_wait);
-	return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
+	return (unsigned int) shp->u.shm_perm.seq * IPCMNI + id;
 }
 
-size_t shmmax = SHMMAX;
-
 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
 {
 	struct shmid_kernel *shp;
 	int err, id = 0;
+	size_t shmmax;
+
+	shmmax = shm_prm[0];
+	if (size > shmmax)
+		return -EINVAL;
 
 	down(&current->mm->mmap_sem);
 	spin_lock(&shm_lock);
-	if (size > shmmax) {
-		err = -EINVAL;
-	} else if (key == IPC_PRIVATE) {
+	if (key == IPC_PRIVATE) {
 		err = newseg(key, shmflg, size);
 	} else if ((id = findkey (key)) == -1) {
 		if (!(shmflg & IPC_CREAT))
@@ -193,7 +304,7 @@
 		else if (ipcperms (&shp->u.shm_perm, shmflg))
 			err = -EACCES;
 		else
-			err = (int) shp->u.shm_perm.seq * SHMMNI + id;
+			err = (int) shp->u.shm_perm.seq * IPCMNI + id;
 	}
 	spin_unlock(&shm_lock);
 	up(&current->mm->mmap_sem);
@@ -214,18 +325,18 @@
 	if (shp == IPC_NOID || shp == IPC_UNUSED)
 		BUG();
 	shp->u.shm_perm.seq++;     /* for shmat */
-	shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
+	shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/IPCMNI); /* increment, but avoid overflow */
 	shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
 	used_segs--;
 	if (id == max_shmid)
-		while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
-	if (!shp->shm_pages)
+		while (max_shmid-- > 0 && (shm_segs[max_shmid] == IPC_UNUSED));
+	if (!shp->shm_dir)
 		BUG();
 	spin_unlock(&shm_lock);
 	numpages = shp->shm_npages;
 	for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
 		pte_t pte;
-		pte = shp->shm_pages[i];
+		pte = SHM_ENTRY (shp,i);
 		if (pte_none(pte))
 			continue;
 		if (pte_present(pte)) {
@@ -238,9 +349,7 @@
 			swp++;
 		}
 	}
-	lock_kernel();
-	vfree(shp->shm_pages);
-	unlock_kernel();
+	shm_free (shp->shm_dir, numpages);
 	kfree(shp);
 	spin_lock(&shm_lock);
 	shm_rss -= rss;
@@ -269,19 +378,20 @@
 	case IPC_INFO:
 	{
 		struct shminfo shminfo;
+		spin_unlock(&shm_lock);
 		err = -EFAULT;
 		if (!buf)
 			goto out;
-		shminfo.shmmni = SHMMNI;
-		shminfo.shmmax = shmmax;
+
+		shminfo.shmmni = shminfo.shmseg = shm_prm[2];
+		shminfo.shmmax = shm_prm[0];
+		shminfo.shmall = shm_prm[1];
+
 		shminfo.shmmin = SHMMIN;
-		shminfo.shmall = SHMALL;
-		shminfo.shmseg = SHMSEG;
-		spin_unlock(&shm_lock);
 		if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
 			goto out_unlocked;
 		spin_lock(&shm_lock);
-		err = max_shmid;
+		err = max_shmid < 0 ? 0 : max_shmid;
 		goto out;
 	}
 	case SHM_INFO:
@@ -298,7 +408,7 @@
 		if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
 			goto out_unlocked;
 		spin_lock(&shm_lock);
-		err = max_shmid;
+		err = max_shmid < 0 ? 0 : max_shmid;
 		goto out;
 	}
 	case SHM_STAT:
@@ -310,7 +420,7 @@
 			goto out;
 		if (ipcperms (&shp->u.shm_perm, S_IRUGO))
 			goto out;
-		id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
+		id = (unsigned int) shp->u.shm_perm.seq * IPCMNI + shmid;
 		err = -EFAULT;
 		spin_unlock(&shm_lock);
 		if(copy_to_user (buf, &shp->u, sizeof(*buf)))
@@ -320,12 +430,13 @@
 		goto out;
 	}
 
-	shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
 	err = -EINVAL;
-	if (shp == IPC_UNUSED || shp == IPC_NOID)
+	if ((id = (unsigned int) shmid % IPCMNI) > max_shmid)
+		goto out;
+	if ((shp = shm_segs[id]) == IPC_UNUSED || shp == IPC_NOID)
 		goto out;
 	err = -EIDRM;
-	if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+	if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
 		goto out;
 	ipcp = &shp->u.shm_perm;
 
@@ -480,7 +591,7 @@
 	if (shmid < 0)
 		goto out;
 
-	shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
+	shp = shm_segs[id = (unsigned int) shmid % IPCMNI];
 	if (shp == IPC_UNUSED || shp == IPC_NOID)
 		goto out;
 
@@ -523,7 +634,7 @@
 	if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
 		goto out;
 	err = -EIDRM;
-	if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+	if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
 		goto out;
 
 	spin_unlock(&shm_lock);
@@ -532,13 +643,13 @@
 	spin_lock(&shm_lock);
 	if (!shmd)
 		goto out;
-	if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
+	if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)) {
 		kmem_cache_free(vm_area_cachep, shmd);
 		err = -EIDRM;
 		goto out;
 	}
 
-	shmd->vm_private_data = shm_segs + id;
+	shmd->vm_private_data = shm_segs[id];
 	shmd->vm_start = addr;
 	shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
 	shmd->vm_mm = current->mm;
@@ -584,7 +695,7 @@
 	struct shmid_kernel *shp;
 
 	spin_lock(&shm_lock);
-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	insert_attach(shp,shmd);  /* insert shmd into shp->attaches */
 	shp->u.shm_nattch++;
 	shp->u.shm_atime = CURRENT_TIME;
@@ -604,14 +715,12 @@
 
 	spin_lock(&shm_lock);
 	/* remove from the list of attaches of the shm segment */
-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	remove_attach(shp,shmd);  /* remove from shp->attaches */
   	shp->u.shm_lpid = current->pid;
 	shp->u.shm_dtime = CURRENT_TIME;
-	if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) {
-		unsigned int id = (struct shmid_kernel **)shmd->vm_private_data - shm_segs;
-		killseg (id);
-	}
+	if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
+		killseg (shp->id);
 	spin_unlock(&shm_lock);
 }
 
@@ -648,7 +757,7 @@
 }
 
 /*
- * page not present ... go through shm_pages
+ * page not present ... go through shm_dir
  */
 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
 {
@@ -657,13 +766,13 @@
 	unsigned int idx;
 	struct page * page;
 
-	shp = *(struct shmid_kernel **) shmd->vm_private_data;
+	shp = (struct shmid_kernel *) shmd->vm_private_data;
 	idx = (address - shmd->vm_start) >> PAGE_SHIFT;
 	idx += shmd->vm_pgoff;
 
 	spin_lock(&shm_lock);
 again:
-	pte = shp->shm_pages[idx];
+	pte = SHM_ENTRY(shp,idx);
 	if (!pte_present(pte)) {
 		if (pte_none(pte)) {
 			spin_unlock(&shm_lock);
@@ -672,7 +781,7 @@
 				goto oom;
 			clear_highpage(page);
 			spin_lock(&shm_lock);
-			if (pte_val(pte) != pte_val(shp->shm_pages[idx]))
+			if (pte_val(pte) != pte_val(SHM_ENTRY(shp, idx)))
 				goto changed;
 		} else {
 			swp_entry_t entry = pte_to_swp_entry(pte);
@@ -694,18 +803,18 @@
 			unlock_kernel();
 			spin_lock(&shm_lock);
 			shm_swp--;
-			pte = shp->shm_pages[idx];
+			pte = SHM_ENTRY(shp, idx);
 			if (pte_present(pte))
 				goto present;
 		}
 		shm_rss++;
 		pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
-		shp->shm_pages[idx] = pte;
+		SHM_ENTRY(shp, idx) = pte;
 	} else
 		--current->maj_flt;  /* was incremented in do_no_page */
 
 done:
-	/* pte_val(pte) == shp->shm_pages[idx] */
+	/* pte_val(pte) == SHM_ENTRY (shp, idx) */
 	get_page(pte_page(pte));
 	spin_unlock(&shm_lock);
 	current->min_flt++;
@@ -719,7 +828,7 @@
 		free_page_and_swap_cache(page);
 	goto done;
 oom:
-	return (struct page *)(-1);
+	return NOPAGE_OOM;
 }
 
 /*
@@ -770,7 +879,7 @@
 	if (idx >= shp->shm_npages)
 		goto next_id;
 
-	page = shp->shm_pages[idx];
+	page = SHM_ENTRY(shp, idx);
 	if (!pte_present(page))
 		goto check_table;
 	page_map = pte_page(page);
@@ -788,11 +897,11 @@
 		unlock_kernel();
 		return 0;
 	}
-	if (page_count(page_map))
+	if (page_count(page_map) != 1)
 		goto check_table;
 	if (!(page_map = prepare_highmem_swapout(page_map)))
-		goto check_table;
-	shp->shm_pages[idx] = swp_entry_to_pte(swap_entry);
+		goto failed;
+	SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
 	swap_successes++;
 	shm_swp++;
 	shm_rss--;
@@ -812,12 +921,12 @@
  * Free the swap entry and set the new pte for the shm page.
  */
 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
-			swp_entry_t entry, struct page *page)
+			   swp_entry_t entry, struct page *page)
 {
 	pte_t pte;
 
 	pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
-	shp->shm_pages[idx] = pte;
+	SHM_ENTRY(shp, idx) = pte;
 	get_page(page);
 	shm_rss++;
 
@@ -837,16 +946,16 @@
 	int i, n;
 
 	spin_lock(&shm_lock);
-	for (i = 0; i < SHMMNI; i++) {
+	for (i = 0; i <= max_shmid; i++) {
 		struct shmid_kernel *seg = shm_segs[i];
 		if ((seg == IPC_UNUSED) || (seg == IPC_NOID))
 			continue;
 		for (n = 0; n < seg->shm_npages; n++) {
-			if (pte_none(seg->shm_pages[n]))
+			if (pte_none(SHM_ENTRY(seg,n)))
 				continue;
-			if (pte_present(seg->shm_pages[n]))
+			if (pte_present(SHM_ENTRY(seg,n)))
 				continue;
-			if (pte_to_swp_entry(seg->shm_pages[n]).val == entry.val) {
+			if (pte_to_swp_entry(SHM_ENTRY(seg,n)).val == entry.val) {
 				shm_unuse_page(seg, n, entry, page);
 				return;
 			}
@@ -865,7 +974,7 @@
     	len += sprintf(buffer, "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime\n");
 
 	spin_lock(&shm_lock);
-    	for(i = 0; i < SHMMNI; i++)
+    	for(i = 0; i <= max_shmid; i++)
 		if(shm_segs[i] != IPC_UNUSED) {
 #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
 #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
@@ -877,7 +986,7 @@
 				format = BIG_STRING;
 	    		len += sprintf(buffer + len, format,
 			shm_segs[i]->u.shm_perm.key,
-			shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
+			shm_segs[i]->u.shm_perm.seq * IPCMNI + i,
 			shm_segs[i]->u.shm_perm.mode,
 			shm_segs[i]->u.shm_segsz,
 			shm_segs[i]->u.shm_cpid,

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)