patch-2.3.35 linux/arch/sparc64/mm/init.c

Next file: linux/arch/sparc64/mm/ultra.S
Previous file: linux/arch/sparc64/mm/generic.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.34/linux/arch/sparc64/mm/init.c linux/arch/sparc64/mm/init.c
@@ -1,4 +1,4 @@
-/*  $Id: init.c,v 1.135 1999/09/06 22:55:10 ecd Exp $
+/*  $Id: init.c,v 1.143 1999/12/16 16:15:14 davem Exp $
  *  arch/sparc64/mm/init.c
  *
  *  Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
@@ -6,8 +6,11 @@
  */
  
 #include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/init.h>
+#include <linux/bootmem.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
 #include <linux/blk.h>
@@ -17,6 +20,7 @@
 #include <asm/head.h>
 #include <asm/system.h>
 #include <asm/page.h>
+#include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/oplib.h>
 #include <asm/iommu.h>
@@ -26,13 +30,8 @@
 #include <asm/vaddrs.h>
 #include <asm/dma.h>
 
-/* Turn this off if you suspect some place in some physical memory hole
-   might get into page tables (something would be broken very much). */
-   
-#define FREE_UNUSED_MEM_MAP
-
 extern void show_net_buffers(void);
-extern unsigned long device_scan(unsigned long);
+extern void device_scan(void);
 
 struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
 
@@ -41,6 +40,8 @@
 /* Ugly, but necessary... -DaveM */
 unsigned long phys_base;
 
+static unsigned long totalram_pages = 0;
+
 /* get_new_mmu_context() uses "cache + 1".  */
 spinlock_t ctx_alloc_lock = SPIN_LOCK_UNLOCKED;
 unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
@@ -48,7 +49,7 @@
 unsigned long mmu_context_bmap[CTX_BMAP_SLOTS];
 
 /* References to section boundaries */
-extern char __init_begin, __init_end, etext, __bss_start;
+extern char __init_begin, __init_end, _start, _end, etext, edata;
 
 int do_check_pgt_cache(int low, int high)
 {
@@ -60,8 +61,10 @@
 			if(pgd_quicklist)
 				free_pgd_slow(get_pgd_fast()), freed++;
 #endif
-			if(pte_quicklist)
-				free_pte_slow(get_pte_fast()), freed++;
+			if(pte_quicklist[0])
+				free_pte_slow(get_pte_fast(0)), freed++;
+			if(pte_quicklist[1])
+				free_pte_slow(get_pte_fast(1)), freed++;
 		} while(pgtable_cache_size > low);
 	}
 #ifndef __SMP__ 
@@ -110,42 +113,20 @@
 pte_t __bad_page(void)
 {
 	memset((void *) &empty_bad_page, 0, PAGE_SIZE);
-	return pte_mkdirty(mk_pte((((unsigned long) &empty_bad_page) 
-		- ((unsigned long)&empty_zero_page) + phys_base + PAGE_OFFSET),
-				  PAGE_SHARED));
+	return pte_mkdirty(mk_pte_phys((((unsigned long) &empty_bad_page) 
+					- ((unsigned long)&empty_zero_page)
+					+ phys_base),
+				       PAGE_SHARED));
 }
 
 void show_mem(void)
 {
-	int free = 0,total = 0,reserved = 0;
-	int shared = 0, cached = 0;
-	struct page *page, *end;
-
-	printk("\nMem-info:\n");
+	printk("Mem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
-	for (page = mem_map, end = mem_map + max_mapnr;
-	     page < end; page++) {
-		if (PageSkip(page)) {
-			if (page->next_hash < page)
-				break;
-			page = page->next_hash;
-		}
-		total++;
-		if (PageReserved(page))
-			reserved++;
-		else if (PageSwapCache(page))
-			cached++;
-		else if (!atomic_read(&page->count))
-			free++;
-		else
-			shared += atomic_read(&page->count) - 1;
-	}
-	printk("%d pages of RAM\n",total);
-	printk("%d free pages\n",free);
-	printk("%d reserved pages\n",reserved);
-	printk("%d pages shared\n",shared);
-	printk("%d pages swap cached\n",cached);
+	printk("Free swap:       %6dkB\n",
+	       nr_swap_pages << (PAGE_SHIFT-10));
+	printk("%ld pages of RAM\n", totalram_pages);
+	printk("%d free pages\n", nr_free_pages());
 	printk("%d pages in page table cache\n",pgtable_cache_size);
 #ifndef __SMP__
 	printk("%d entries in page dir cache\n",pgd_cache_size);
@@ -156,508 +137,46 @@
 #endif
 }
 
-/* IOMMU support, the ideas are right, the code should be cleaned a bit still... */
-
-/* This keeps track of pages used in sparc_alloc_dvma() invocations. */
-/* NOTE: All of these are inited to 0 in bss, don't need to make data segment bigger */
-#define DVMAIO_SIZE 0x2000000
-static unsigned long dvma_map_pages[DVMAIO_SIZE >> 16];
-static unsigned long dvma_pages_current_offset;
-static int dvma_pages_current_index;
-static unsigned long dvmaiobase = 0;
-static unsigned long dvmaiosz __initdata = 0;
-
-void __init dvmaio_init(void)
-{
-	long i;
-	
-	if (!dvmaiobase) {
-		for (i = 0; sp_banks[i].num_bytes != 0; i++)
-			if (sp_banks[i].base_addr + sp_banks[i].num_bytes > dvmaiobase)
-				dvmaiobase = sp_banks[i].base_addr + sp_banks[i].num_bytes;
-
-		/* We map directly phys_base to phys_base+(4GB-DVMAIO_SIZE). */
-		dvmaiobase -= phys_base;
-
-		dvmaiobase = (dvmaiobase + DVMAIO_SIZE + 0x400000 - 1) & ~(0x400000 - 1);
-		for (i = 0; i < 6; i++)
-			if (dvmaiobase <= ((1024L * 64 * 1024) << i))
-				break;
-		dvmaiobase = ((1024L * 64 * 1024) << i) - DVMAIO_SIZE;
-		dvmaiosz = i;
-	}
-}
-
-void __init iommu_init(int iommu_node, struct linux_sbus *sbus)
-{
-	extern int this_is_starfire;
-	extern void *starfire_hookup(int);
-	struct iommu_struct *iommu;
-	struct sysio_regs *sregs;
-	struct linux_prom64_registers rprop;
-	unsigned long impl, vers;
-	unsigned long control, tsbbase;
-	unsigned long tsbbases[32];
-	unsigned long *iopte;
-	int err, i, j;
-	
-	dvmaio_init();
-	err = prom_getproperty(iommu_node, "reg", (char *)&rprop,
-			       sizeof(rprop));
-	if(err == -1) {
-		prom_printf("iommu_init: Cannot map SYSIO control registers.\n");
-		prom_halt();
-	}
-	sregs = (struct sysio_regs *) __va(rprop.phys_addr);
-
-	if(!sregs) {
-		prom_printf("iommu_init: Fatal error, sysio regs not mapped\n");
-		prom_halt();
-	}
-
-	iommu = kmalloc(sizeof(struct iommu_struct), GFP_ATOMIC);
-	if (!iommu) {
-		prom_printf("iommu_init: Fatal error, kmalloc(iommu) failed\n");
-		prom_halt();
-	}
-
-	spin_lock_init(&iommu->iommu_lock);
-	iommu->sysio_regs = sregs;
-	sbus->iommu = iommu;
-
-	control = sregs->iommu_control;
-	impl = (control & IOMMU_CTRL_IMPL) >> 60;
-	vers = (control & IOMMU_CTRL_VERS) >> 56;
-	printk("IOMMU(SBUS): IMPL[%x] VERS[%x] SYSIO mapped at %016lx\n",
-	       (unsigned int) impl, (unsigned int)vers, (unsigned long) sregs);
-	
-	/* Streaming buffer is unreliable on VERS 0 of SYSIO,
-	 * although such parts were never shipped in production
-	 * Sun hardware, I check just to be robust.  --DaveM
-	 */
-	vers = ((sregs->control & SYSIO_CONTROL_VER) >> 56);
-	if (vers == 0)
-		iommu->strbuf_enabled = 0;
-	else
-		iommu->strbuf_enabled = 1;
-
-	control &= ~(IOMMU_CTRL_TSBSZ);
-	control |= ((IOMMU_TSBSZ_2K * dvmaiosz) | IOMMU_CTRL_TBWSZ | IOMMU_CTRL_ENAB);
-
-	/* Use only 64k pages, things are layed out in the 32-bit SBUS
-	 * address space like this:
-	 *
-	 * 0x00000000	  ----------------------------------------
-	 *		  | Direct physical mappings for most    |
-	 *                | DVMA to paddr's within this range    |
-	 * dvmaiobase     ----------------------------------------
-	 * 		  | For mappings requested via           |
-	 *                | sparc_alloc_dvma()		         |
-	 * dvmaiobase+32M ----------------------------------------
-	 *
-	 * NOTE: we need to order 2 contiguous order 5, that's the largest
-	 *       chunk page_alloc will give us.   -JJ */
-	tsbbase = 0;
-	if (dvmaiosz == 6) {
-		memset (tsbbases, 0, sizeof(tsbbases));
-		for (i = 0; i < 32; i++) {
-			tsbbases[i] = __get_free_pages(GFP_DMA, 5);
-			for (j = 0; j < i; j++)
-				if (tsbbases[j] == tsbbases[i] + 32768*sizeof(iopte_t)) {
-					tsbbase = tsbbases[i];
-					break;
-				} else if (tsbbases[i] == tsbbases[j] + 32768*sizeof(iopte_t)) {
-					tsbbase = tsbbases[j];
-					break;
-				}
-			if (tsbbase) {
-				tsbbases[i] = 0;
-				tsbbases[j] = 0;
-				break;
-			}
-		}
-		for (i = 0; i < 32; i++)
-			if (tsbbases[i])
-				free_pages(tsbbases[i], 5);
-	} else
-		tsbbase = __get_free_pages(GFP_DMA, dvmaiosz);
-	if (!tsbbase) {
-		prom_printf("Strange. Could not allocate 512K of contiguous RAM.\n");
-		prom_halt();
-	}
-	iommu->page_table = (iopte_t *) tsbbase;
-	iopte = (unsigned long *) tsbbase;
-
-	/* Setup aliased mappings... */
-	for(i = 0; i < (dvmaiobase >> 16); i++) {
-		unsigned long val = ((((unsigned long)i) << 16UL) + phys_base);
-
-		val |= IOPTE_VALID | IOPTE_64K | IOPTE_WRITE;
-		if (iommu->strbuf_enabled)
-			val |= IOPTE_STBUF;
-		else
-			val |= IOPTE_CACHE;
-		*iopte = val;
-		iopte++;
-	}
-
-	/* Clear all sparc_alloc_dvma() maps. */
-	for( ; i < ((dvmaiobase + DVMAIO_SIZE) >> 16); i++)
-		*iopte++ = 0;
-
-	sregs->iommu_tsbbase = __pa(tsbbase);
-	sregs->iommu_control = control;
-
-	/* Get the streaming buffer going. */
-	control = sregs->sbuf_control;
-	impl = (control & SYSIO_SBUFCTRL_IMPL) >> 60;
-	vers = (control & SYSIO_SBUFCTRL_REV) >> 56;
-	printk("IOMMU: Streaming Buffer IMPL[%x] REV[%x] ... ",
-	       (unsigned int)impl, (unsigned int)vers);
-	iommu->flushflag = 0;
-
-	if (iommu->strbuf_enabled != 0) {
-		sregs->sbuf_control = (control | SYSIO_SBUFCTRL_SB_EN);
-		printk("ENABLED\n");
-	} else {
-		sregs->sbuf_control = (control & ~(SYSIO_SBUFCTRL_SB_EN));
-		printk("DISABLED\n");
-	}
-
-	/* Finally enable DVMA arbitration for all devices, just in case. */
-	sregs->sbus_control |= SYSIO_SBCNTRL_AEN;
-
-	/* If necessary, hook us up for starfire IRQ translations. */
-	sbus->upaid = prom_getintdefault(sbus->prom_node, "upa-portid", -1);
-	if(this_is_starfire)
-		sbus->starfire_cookie = starfire_hookup(sbus->upaid);
-	else
-		sbus->starfire_cookie = NULL;
-}
-
-void mmu_map_dma_area(unsigned long addr, int len, __u32 *dvma_addr,
-		      struct linux_sbus *sbus)
-{
-	pgd_t *pgdp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-
-	/* Find out if we need to grab some pages. */
-	if(!dvma_map_pages[dvma_pages_current_index] ||
-	   ((dvma_pages_current_offset + len) > (1 << 16))) {
-		struct linux_sbus *sbus;
-		unsigned long *iopte;
-		unsigned long newpages = __get_free_pages(GFP_KERNEL, 3);
-		int i;
-
-		if(!newpages)
-			panic("AIEEE cannot get DVMA pages.");
-
-		memset((char *)newpages, 0, (1 << 16));
-
-		if(!dvma_map_pages[dvma_pages_current_index]) {
-			dvma_map_pages[dvma_pages_current_index] = newpages;
-			i = dvma_pages_current_index;
-		} else {
-			dvma_map_pages[dvma_pages_current_index + 1] = newpages;
-			i = dvma_pages_current_index + 1;
-		}
-
-		/* Stick it in the IOMMU. */
-		i = (dvmaiobase >> 16) + i;
-		for_each_sbus(sbus) {
-			struct iommu_struct *iommu = sbus->iommu;
-			unsigned long flags;
-
-			spin_lock_irqsave(&iommu->iommu_lock, flags);
-			iopte = (unsigned long *)(iommu->page_table + i);
-			*iopte  = (IOPTE_VALID | IOPTE_64K | IOPTE_CACHE | IOPTE_WRITE);
-			*iopte |= __pa(newpages);
-			spin_unlock_irqrestore(&iommu->iommu_lock, flags);
-		}
-	}
-
-	/* Get this out of the way. */
-	*dvma_addr = (__u32) ((dvmaiobase) +
-			      (dvma_pages_current_index << 16) +
-			      (dvma_pages_current_offset));
-
-	while(len > 0) {
-		while((len > 0) && (dvma_pages_current_offset < (1 << 16))) {
-			pte_t pte;
-			unsigned long the_page =
-				dvma_map_pages[dvma_pages_current_index] +
-				dvma_pages_current_offset;
-
-			/* Map the CPU's view. */
-			pgdp = pgd_offset(&init_mm, addr);
-			pmdp = pmd_alloc_kernel(pgdp, addr);
-			ptep = pte_alloc_kernel(pmdp, addr);
-			pte = mk_pte(the_page, PAGE_KERNEL);
-			set_pte(ptep, pte);
-
-			dvma_pages_current_offset += PAGE_SIZE;
-			addr += PAGE_SIZE;
-			len -= PAGE_SIZE;
-		}
-		dvma_pages_current_index++;
-		dvma_pages_current_offset = 0;
-	}
-}
-
-__u32 mmu_get_scsi_one(char *vaddr, unsigned long len, struct linux_sbus *sbus)
-{
-	struct iommu_struct *iommu = sbus->iommu;
-	struct sysio_regs *sregs = iommu->sysio_regs;
-	unsigned long start = (unsigned long) vaddr;
-	unsigned long end = PAGE_ALIGN(start + len);
-	unsigned long flags, tmp;
-	volatile u64 *sbctrl = (volatile u64 *) &sregs->sbus_control;
-
-	start &= PAGE_MASK;
-	if (end > MAX_DMA_ADDRESS) {
-		printk("mmu_get_scsi_one: Bogus DMA buffer address [%016lx:%d]\n",
-		       (unsigned long) vaddr, (int)len);
-		panic("DMA address too large, tell DaveM");
-	}
-
-	if (iommu->strbuf_enabled) {
-		volatile u64 *sbuf_pflush = (volatile u64 *) &sregs->sbuf_pflush;
-
-		spin_lock_irqsave(&iommu->iommu_lock, flags);
-		iommu->flushflag = 0;
-		while(start < end) {
-			*sbuf_pflush = start;
-			start += PAGE_SIZE;
-		}
-		sregs->sbuf_fsync = __pa(&(iommu->flushflag));
-		tmp = *sbctrl;
-		while(iommu->flushflag == 0)
-			membar("#LoadLoad");
-		spin_unlock_irqrestore(&iommu->iommu_lock, flags);
-	}
-
-	return sbus_dvma_addr(vaddr);
-}
-
-void mmu_release_scsi_one(u32 vaddr, unsigned long len, struct linux_sbus *sbus)
-{
-	struct iommu_struct *iommu = sbus->iommu;
-	struct sysio_regs *sregs = iommu->sysio_regs;
-	unsigned long start = (unsigned long) vaddr;
-	unsigned long end = PAGE_ALIGN(start + len);
-	unsigned long flags, tmp;
-	volatile u64 *sbctrl = (volatile u64 *) &sregs->sbus_control;
-
-	start &= PAGE_MASK;
-
-	if (iommu->strbuf_enabled) {
-		volatile u64 *sbuf_pflush = (volatile u64 *) &sregs->sbuf_pflush;
-
-		spin_lock_irqsave(&iommu->iommu_lock, flags);
-
-		/* 1) Clear the flush flag word */
-		iommu->flushflag = 0;
-
-		/* 2) Tell the streaming buffer which entries
-		 *    we want flushed.
-		 */
-		while(start < end) {
-			*sbuf_pflush = start;
-			start += PAGE_SIZE;
-		}
-
-		/* 3) Initiate flush sequence. */
-		sregs->sbuf_fsync = __pa(&(iommu->flushflag));
-
-		/* 4) Guarentee completion of all previous writes
-		 *    by reading SYSIO's SBUS control register.
-		 */
-		tmp = *sbctrl;
-
-		/* 5) Wait for flush flag to get set. */
-		while(iommu->flushflag == 0)
-			membar("#LoadLoad");
-
-		spin_unlock_irqrestore(&iommu->iommu_lock, flags);
-	}
-}
-
-void mmu_get_scsi_sgl(struct mmu_sglist *sg, int sz, struct linux_sbus *sbus)
-{
-	struct iommu_struct *iommu = sbus->iommu;
-	struct sysio_regs *sregs = iommu->sysio_regs;
-	unsigned long flags, tmp;
-	volatile u64 *sbctrl = (volatile u64 *) &sregs->sbus_control;
-
-	if (iommu->strbuf_enabled) {
-		volatile u64 *sbuf_pflush = (volatile u64 *) &sregs->sbuf_pflush;
-
-		spin_lock_irqsave(&iommu->iommu_lock, flags);
-		iommu->flushflag = 0;
-
-		while(sz >= 0) {
-			unsigned long start = (unsigned long)sg[sz].addr;
-			unsigned long end = PAGE_ALIGN(start + sg[sz].len);
-
-			if (end > MAX_DMA_ADDRESS) {
-				printk("mmu_get_scsi_sgl: Bogus DMA buffer address "
-				       "[%016lx:%d]\n", start, (int) sg[sz].len);
-				panic("DMA address too large, tell DaveM");
-			}
-
-			sg[sz--].dvma_addr = sbus_dvma_addr(start);
-			start &= PAGE_MASK;
-			while(start < end) {
-				*sbuf_pflush = start;
-				start += PAGE_SIZE;
-			}
-		}
-
-		sregs->sbuf_fsync = __pa(&(iommu->flushflag));
-		tmp = *sbctrl;
-		while(iommu->flushflag == 0)
-			membar("#LoadLoad");
-		spin_unlock_irqrestore(&iommu->iommu_lock, flags);
-	} else {
-		/* Just verify the addresses and fill in the
-		 * dvma_addr fields in this case.
-		 */
-		while(sz >= 0) {
-			unsigned long start = (unsigned long)sg[sz].addr;
-			unsigned long end = PAGE_ALIGN(start + sg[sz].len);
-			if (end > MAX_DMA_ADDRESS) {
-				printk("mmu_get_scsi_sgl: Bogus DMA buffer address "
-				       "[%016lx:%d]\n", start, (int) sg[sz].len);
-				panic("DMA address too large, tell DaveM");
-			}
-			sg[sz--].dvma_addr = sbus_dvma_addr(start);
-		}
-	}
-}
-
-void mmu_release_scsi_sgl(struct mmu_sglist *sg, int sz, struct linux_sbus *sbus)
-{
-	struct iommu_struct *iommu = sbus->iommu;
-	struct sysio_regs *sregs = iommu->sysio_regs;
-	volatile u64 *sbctrl = (volatile u64 *) &sregs->sbus_control;
-	unsigned long flags, tmp;
-
-	if (iommu->strbuf_enabled) {
-		volatile u64 *sbuf_pflush = (volatile u64 *) &sregs->sbuf_pflush;
-
-		spin_lock_irqsave(&iommu->iommu_lock, flags);
-
-		/* 1) Clear the flush flag word */
-		iommu->flushflag = 0;
-
-		/* 2) Tell the streaming buffer which entries
-		 *    we want flushed.
-		 */
-		while(sz >= 0) {
-			unsigned long start = sg[sz].dvma_addr;
-			unsigned long end = PAGE_ALIGN(start + sg[sz].len);
-
-			start &= PAGE_MASK;
-			while(start < end) {
-				*sbuf_pflush = start;
-				start += PAGE_SIZE;
-			}
-			sz--;
-		}
-
-		/* 3) Initiate flush sequence. */
-		sregs->sbuf_fsync = __pa(&(iommu->flushflag));
-
-		/* 4) Guarentee completion of previous writes
-		 *    by reading SYSIO's SBUS control register.
-		 */
-		tmp = *sbctrl;
-
-		/* 5) Wait for flush flag to get set. */
-		while(iommu->flushflag == 0)
-			membar("#LoadLoad");
-
-		spin_unlock_irqrestore(&iommu->iommu_lock, flags);
-	}
-}
-
-void mmu_set_sbus64(struct linux_sbus_device *sdev, int bursts)
-{
-	struct linux_sbus *sbus = sdev->my_bus;
-	struct sysio_regs *sregs = sbus->iommu->sysio_regs;
-	int slot = sdev->slot;
-	volatile u64 *cfg;
-	u64 tmp;
-
-	switch(slot) {
-	case 0:
-		cfg = &sregs->sbus_s0cfg;
-		break;
-	case 1:
-		cfg = &sregs->sbus_s1cfg;
-		break;
-	case 2:
-		cfg = &sregs->sbus_s2cfg;
-		break;
-	case 3:
-		cfg = &sregs->sbus_s3cfg;
-		break;
-
-	case 13:
-		cfg = &sregs->sbus_s4cfg;
-		break;
-	case 14:
-		cfg = &sregs->sbus_s5cfg;
-		break;
-	case 15:
-		cfg = &sregs->sbus_s6cfg;
-		break;
-
-	default:
-		return;
-	};
-
-	/* ETM already enabled?  If so, we're done. */
-	tmp = *cfg;
-	if ((tmp & SYSIO_SBSCFG_ETM) != 0)
-		return;
-
-	/* Set burst bits. */
-	if (bursts & DMA_BURST8)
-		tmp |= SYSIO_SBSCFG_BA8;
-	if (bursts & DMA_BURST16)
-		tmp |= SYSIO_SBSCFG_BA16;
-	if (bursts & DMA_BURST32)
-		tmp |= SYSIO_SBSCFG_BA32;
-	if (bursts & DMA_BURST64)
-		tmp |= SYSIO_SBSCFG_BA64;
-
-	/* Finally turn on ETM and set register. */
-	*cfg = (tmp | SYSIO_SBSCFG_ETM);
-}
-
 int mmu_info(char *buf)
 {
 	/* We'll do the rest later to make it nice... -DaveM */
+#if 0
+	if (this_is_cheetah)
+		sprintf(buf, "MMU Type\t: One bad ass cpu\n");
+	else
+#endif
 	return sprintf(buf, "MMU Type\t: Spitfire\n");
 }
 
-static unsigned long mempool;
-
 struct linux_prom_translation {
 	unsigned long virt;
 	unsigned long size;
 	unsigned long data;
 };
 
-static inline void inherit_prom_mappings(void)
+extern unsigned long prom_boot_page;
+extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle);
+extern int prom_get_mmu_ihandle(void);
+extern void register_prom_callbacks(void);
+
+/* Exported for SMP bootup purposes. */
+unsigned long kern_locked_tte_data;
+
+void __init early_pgtable_allocfail(char *type)
+{
+	prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type);
+	prom_halt();
+}
+
+static void inherit_prom_mappings(void)
 {
 	struct linux_prom_translation *trans;
+	unsigned long phys_page, tte_vaddr, tte_data;
+	void (*remap_func)(unsigned long, unsigned long, int);
 	pgd_t *pgdp;
 	pmd_t *pmdp;
 	pte_t *ptep;
-	int node, n, i;
+	int node, n, i, tsz;
 
 	node = prom_finddevice("/virtual-memory");
 	n = prom_getproplen(node, "translations");
@@ -665,11 +184,17 @@
 		prom_printf("Couldn't get translation property\n");
 		prom_halt();
 	}
+	n += 5 * sizeof(struct linux_prom_translation);
+	for (tsz = 1; tsz < n; tsz <<= 1)
+		/* empty */;
+	trans = __alloc_bootmem(tsz, SMP_CACHE_BYTES, 0UL);
+	if (trans == NULL) {
+		prom_printf("inherit_prom_mappings: Cannot alloc translations.\n");
+		prom_halt();
+	}
+	memset(trans, 0, tsz);
 
-	for (i = 1; i < n; i <<= 1) /* empty */;
-	trans = sparc_init_alloc(&mempool, i);
-
-	if (prom_getproperty(node, "translations", (char *)trans, i) == -1) {
+	if ((n = prom_getproperty(node, "translations", (char *)trans, tsz)) == -1) {
 		prom_printf("Couldn't get translation property\n");
 		prom_halt();
 	}
@@ -684,15 +209,22 @@
 			     vaddr += PAGE_SIZE) {
 				pgdp = pgd_offset(&init_mm, vaddr);
 				if (pgd_none(*pgdp)) {
-					pmdp = sparc_init_alloc(&mempool,
-							 PMD_TABLE_SIZE);
-					memset(pmdp, 0, PAGE_SIZE);
+					pmdp = __alloc_bootmem(PMD_TABLE_SIZE,
+							       PMD_TABLE_SIZE,
+							       0UL);
+					if (pmdp == NULL)
+						early_pgtable_allocfail("pmd");
+					memset(pmdp, 0, PMD_TABLE_SIZE);
 					pgd_set(pgdp, pmdp);
 				}
 				pmdp = pmd_offset(pgdp, vaddr);
 				if (pmd_none(*pmdp)) {
-					ptep = sparc_init_alloc(&mempool,
-							 PTE_TABLE_SIZE);
+					ptep = __alloc_bootmem(PTE_TABLE_SIZE,
+							       PTE_TABLE_SIZE,
+							       0UL);
+					if (ptep == NULL)
+						early_pgtable_allocfail("pte");
+					memset(ptep, 0, PTE_TABLE_SIZE);
 					pmd_set(pmdp, ptep);
 				}
 				ptep = pte_offset(pmdp, vaddr);
@@ -701,6 +233,83 @@
 			}
 		}
 	}
+
+	/* Now fixup OBP's idea about where we really are mapped. */
+	prom_printf("Remapping the kernel... ");
+	phys_page = spitfire_get_dtlb_data(63) & _PAGE_PADDR;
+	phys_page += ((unsigned long)&prom_boot_page -
+		      (unsigned long)&empty_zero_page);
+
+	/* Lock this into i/d tlb entry 59 */
+	__asm__ __volatile__(
+		"stxa	%%g0, [%2] %3\n\t"
+		"stxa	%0, [%1] %4\n\t"
+		"membar	#Sync\n\t"
+		"flush	%%g6\n\t"
+		"stxa	%%g0, [%2] %5\n\t"
+		"stxa	%0, [%1] %6\n\t"
+		"membar	#Sync\n\t"
+		"flush	%%g6"
+		: : "r" (phys_page | _PAGE_VALID | _PAGE_SZ8K | _PAGE_CP |
+			 _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W),
+		    "r" (59 << 3), "r" (TLB_TAG_ACCESS),
+		    "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS),
+		    "i" (ASI_IMMU), "i" (ASI_ITLB_DATA_ACCESS)
+		: "memory");
+
+	tte_vaddr = (unsigned long) &empty_zero_page;
+	kern_locked_tte_data = tte_data = spitfire_get_dtlb_data(63);
+
+	remap_func = (void *)  ((unsigned long) &prom_remap -
+				(unsigned long) &prom_boot_page);
+
+	remap_func(spitfire_get_dtlb_data(63) & _PAGE_PADDR,
+		   (unsigned long) &empty_zero_page,
+		   prom_get_mmu_ihandle());
+
+	/* Flush out that temporary mapping. */
+	spitfire_flush_dtlb_nucleus_page(0x0);
+	spitfire_flush_itlb_nucleus_page(0x0);
+
+	/* Now lock us back into the TLBs via OBP. */
+	prom_dtlb_load(63, tte_data, tte_vaddr);
+	prom_itlb_load(63, tte_data, tte_vaddr);
+
+	/* Re-read translations property. */
+	if ((n = prom_getproperty(node, "translations", (char *)trans, tsz)) == -1) {
+		prom_printf("Couldn't get translation property\n");
+		prom_halt();
+	}
+	n = n / sizeof(*trans);
+
+	for (i = 0; i < n; i++) {
+		unsigned long vaddr = trans[i].virt;
+		unsigned long size = trans[i].size;
+
+		if (vaddr < 0xf0000000UL) {
+			unsigned long avoid_start = (unsigned long) &empty_zero_page;
+			unsigned long avoid_end = avoid_start + (4 * 1024 * 1024);
+
+			if (vaddr < avoid_start) {
+				unsigned long top = vaddr + size;
+
+				if (top > avoid_start)
+					top = avoid_start;
+				prom_unmap(top - vaddr, vaddr);
+			}
+			if ((vaddr + size) > avoid_end) {
+				unsigned long bottom = vaddr;
+
+				if (bottom < avoid_end)
+					bottom = avoid_end;
+				prom_unmap((vaddr + size) - bottom, bottom);
+			}
+		}
+	}
+
+	prom_printf("done.\n");
+
+	register_prom_callbacks();
 }
 
 /* The OBP specifications for sun4u mark 0xfffffffc00000000 and
@@ -1020,6 +629,10 @@
 struct pgtable_cache_struct pgt_quicklists;
 #endif
 
+/* For PMDs we don't care about the color, writes are
+ * only done via Dcache which is write-thru, so non-Dcache
+ * reads will always see correct data.
+ */
 pmd_t *get_pmd_slow(pgd_t *pgd, unsigned long offset)
 {
 	pmd_t *pmd;
@@ -1033,79 +646,55 @@
 	return NULL;
 }
 
-pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset)
-{
-	pte_t *pte;
-
-	pte = (pte_t *) __get_free_page(GFP_KERNEL);
-	if(pte) {
-		memset(pte, 0, PAGE_SIZE);
-		pmd_set(pmd, pte);
-		return pte + offset;
-	}
-	return NULL;
-}
-
-static void __init
-allocate_ptable_skeleton(unsigned long start, unsigned long end)
-{
-	pgd_t *pgdp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-
-	while (start < end) {
-		pgdp = pgd_offset(&init_mm, start);
-		if (pgd_none(*pgdp)) {
-			pmdp = sparc_init_alloc(&mempool, PAGE_SIZE);
-			memset(pmdp, 0, PAGE_SIZE);
-			pgd_set(pgdp, pmdp);
-		}
-		pmdp = pmd_offset(pgdp, start);
-		if (pmd_none(*pmdp)) {
-			ptep = sparc_init_alloc(&mempool, PAGE_SIZE);
-			memset(ptep, 0, PAGE_SIZE);
-			pmd_set(pmdp, ptep);
-		}
-		start = (start + PMD_SIZE) & PMD_MASK;
-	}
-}
-
-/*
- * Create a mapping for an I/O register.  Have to make sure the side-effect
- * bit is set.
+/* OK, we have to color these pages because during DTLB
+ * protection faults we set the dirty bit via a non-Dcache
+ * enabled mapping in the VPTE area.  The kernel can end
+ * up missing the dirty bit resulting in processes crashing
+ * _iff_ the VPTE mapping of the ptes have a virtual address
+ * bit 13 which is different from bit 13 of the physical address.
+ *
+ * The sequence is:
+ *	1) DTLB protection fault, write dirty bit into pte via VPTE
+ *	   mappings.
+ *	2) Swapper checks pte, does not see dirty bit, frees page.
+ *	3) Process faults back in the page, the old pre-dirtied copy
+ *	   is provided and here is the corruption.
  */
- 
-void sparc_ultra_mapioaddr(unsigned long physaddr, unsigned long virt_addr,
-			   int bus, int rdonly)
+pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset, unsigned long color)
 {
-	pgd_t *pgdp = pgd_offset(&init_mm, virt_addr);
-	pmd_t *pmdp = pmd_offset(pgdp, virt_addr);
-	pte_t *ptep = pte_offset(pmdp, virt_addr);
-	pte_t pte;
+	unsigned long paddr = __get_free_pages(GFP_KERNEL, 1);
 
-	physaddr &= PAGE_MASK;
+	if (paddr) {
+		struct page *page2 = mem_map + MAP_NR(paddr + PAGE_SIZE);
+		unsigned long *to_free;
+		pte_t *pte;
 
-	if(rdonly)
-		pte = mk_pte_phys(physaddr, __pgprot(pg_iobits | __PRIV_BITS));
-	else
-		pte = mk_pte_phys(physaddr, __pgprot(pg_iobits | __DIRTY_BITS | __PRIV_BITS));
+		/* Set count of second page, so we can free it
+		 * seperately later on.
+		 */
+		atomic_set(&page2->count, 1);
 
-	set_pte(ptep, pte);
-}
+		/* Clear out both pages now. */
+		memset((char *)paddr, 0, (PAGE_SIZE << 1));
 
-/* XXX no longer used, remove me... -DaveM */
-void sparc_ultra_unmapioaddr(unsigned long virt_addr)
-{
-	pgd_t *pgdp;
-	pmd_t *pmdp;
-	pte_t *ptep;
+		/* Determine which page we give to this request. */
+		if (!color) {
+			pte = (pte_t *) paddr;
+			to_free = (unsigned long *) (paddr + PAGE_SIZE);
+		} else {
+			pte = (pte_t *) (paddr + PAGE_SIZE);
+			to_free = (unsigned long *) paddr;
+		}
 
-	pgdp = pgd_offset(&init_mm, virt_addr);
-	pmdp = pmd_offset(pgdp, virt_addr);
-	ptep = pte_offset(pmdp, virt_addr);
+		/* Now free the other one up, adjust cache size. */
+		*to_free = (unsigned long) pte_quicklist[color ^ 0x1];
+		pte_quicklist[color ^ 0x1] = to_free;
+		pgtable_cache_size++;
 
-	/* No need to flush uncacheable page. */
-	pte_clear(ptep);
+		pmd_set(pmd, pte);
+		return pte + offset;
+	}
+	return NULL;
 }
 
 void sparc_ultra_dump_itlb(void)
@@ -1139,21 +728,114 @@
         }
 }
 
+#undef DEBUG_BOOTMEM
+
+extern unsigned long cmdline_memory_size;
+
+unsigned long __init bootmem_init(void)
+{
+	unsigned long bootmap_size, start_pfn, end_pfn;
+	unsigned long end_of_phys_memory = 0UL;
+	int i;
+
+	/* XXX It is a bit ambiguous here, whether we should
+	 * XXX treat the user specified mem=xxx as total wanted
+	 * XXX physical memory, or as a limit to the upper
+	 * XXX physical address we allow.  For now it is the
+	 * XXX latter. -DaveM
+	 */
+#ifdef DEBUG_BOOTMEM
+	prom_printf("bootmem_init: Scan sp_banks,  ");
+#endif
+	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
+		end_of_phys_memory = sp_banks[i].base_addr +
+			sp_banks[i].num_bytes;
+		if (cmdline_memory_size) {
+			if (end_of_phys_memory > cmdline_memory_size) {
+				if (cmdline_memory_size > sp_banks[i].base_addr) {
+					end_of_phys_memory =
+						sp_banks[i-1].base_addr +
+						sp_banks[i-1].num_bytes;
+					sp_banks[i].base_addr = 0xdeadbeef;
+					sp_banks[i].num_bytes = 0;
+				} else {
+					sp_banks[i].num_bytes -=
+						(end_of_phys_memory -
+						 cmdline_memory_size);
+					end_of_phys_memory = cmdline_memory_size;
+					sp_banks[++i].base_addr = 0xdeadbeef;
+					sp_banks[i].num_bytes = 0;
+				}
+				break;
+			}
+		}
+	}
+
+	/* Start with page aligned address of last symbol in kernel
+	 * image.  The kernel is hard mapped below PAGE_OFFSET in a
+	 * 4MB locked TLB translation.
+	 */
+	start_pfn  = PAGE_ALIGN((unsigned long) &_end) -
+		((unsigned long) &empty_zero_page);
+
+	/* Adjust up to the physical address where the kernel begins. */
+	start_pfn += phys_base;
+
+	/* Now shift down to get the real physical page frame number. */
+	start_pfn >>= PAGE_SHIFT;
+
+	end_pfn = end_of_phys_memory >> PAGE_SHIFT;
+
+	/* Initialize the boot-time allocator. */
+#ifdef DEBUG_BOOTMEM
+	prom_printf("init_bootmem(spfn[%lx],epfn[%lx])\n",
+		    start_pfn, end_pfn);
+#endif
+	bootmap_size = init_bootmem(start_pfn, end_pfn);
+
+	/* Now register the available physical memory with the
+	 * allocator.
+	 */
+	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
+#ifdef DEBUG_BOOTMEM
+		prom_printf("free_bootmem: base[%lx] size[%lx]\n",
+			    sp_banks[i].base_addr,
+			    sp_banks[i].num_bytes);
+#endif
+		free_bootmem(sp_banks[i].base_addr,
+			     sp_banks[i].num_bytes);
+	}
+
+	/* Reserve the kernel text/data/bss and the bootmem bitmap. */
+#ifdef DEBUG_BOOTMEM
+	prom_printf("reserve_bootmem: base[%lx] size[%lx]\n",
+		    phys_base,
+		    (((start_pfn << PAGE_SHIFT) +
+		      bootmap_size) - phys_base));
+#endif
+	reserve_bootmem(phys_base, (((start_pfn << PAGE_SHIFT) +
+				     bootmap_size) - phys_base));
+
+#ifdef DEBUG_BOOTMEM
+	prom_printf("init_bootmem: return end_pfn[%lx]\n", end_pfn);
+#endif
+	return end_pfn;
+}
+
 /* paging_init() sets up the page tables */
 
-extern unsigned long free_area_init(unsigned long, unsigned long);
-extern unsigned long sun_serial_setup(unsigned long);
+extern void sun_serial_setup(void);
+
+static unsigned long last_valid_pfn;
 
-unsigned long __init
-paging_init(unsigned long start_mem, unsigned long end_mem)
+void __init paging_init(void)
 {
 	extern pmd_t swapper_pmd_dir[1024];
 	extern unsigned int sparc64_vpte_patchme1[1];
 	extern unsigned int sparc64_vpte_patchme2[1];
 	unsigned long alias_base = phys_base + PAGE_OFFSET;
 	unsigned long second_alias_page = 0;
-	unsigned long pt;
-	unsigned long flags;
+	unsigned long pt, flags, end_pfn;
 	unsigned long shift = alias_base - ((unsigned long)&empty_zero_page);
 
 	set_bit(0, mmu_context_bmap);
@@ -1176,7 +858,7 @@
 	: "r" (TLB_TAG_ACCESS), "r" (alias_base), "r" (pt),
 	  "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS), "r" (61 << 3)
 	: "memory");
-	if (start_mem >= KERNBASE + 0x340000) {
+	if (((unsigned long)&_end) >= KERNBASE + 0x340000) {
 		second_alias_page = alias_base + 0x400000;
 		__asm__ __volatile__("
 		stxa	%1, [%0] %3
@@ -1203,24 +885,22 @@
 	/* Now can init the kernel/bad page tables. */
 	pgd_set(&swapper_pg_dir[0], swapper_pmd_dir + (shift / sizeof(pgd_t)));
 	
-	sparc64_vpte_patchme1[0] |= (init_mm.pgd[0] >> 10);
-	sparc64_vpte_patchme2[0] |= (init_mm.pgd[0] & 0x3ff);
+	sparc64_vpte_patchme1[0] |= (pgd_val(init_mm.pgd[0]) >> 10);
+	sparc64_vpte_patchme2[0] |= (pgd_val(init_mm.pgd[0]) & 0x3ff);
 	flushi((long)&sparc64_vpte_patchme1[0]);
 	
-	/* We use mempool to create page tables, therefore adjust it up
-	 * such that __pa() macros etc. work.
-	 */
-	mempool = PAGE_ALIGN(start_mem) + shift;
-	
+	/* Setup bootmem... */
+	last_valid_pfn = end_pfn = bootmem_init();
+
 #ifdef CONFIG_SUN_SERIAL
-	/* This does not logically belong here, but is the first place
-	   we can initialize it at, so that we work in the PAGE_OFFSET+
-	   address space. */
-	mempool = sun_serial_setup(mempool);
+	/* This does not logically belong here, but we need to
+	 * call it at the moment we are able to use the bootmem
+	 * allocator.
+	 */
+	sun_serial_setup();
 #endif
 
-	/* Allocate 64M for dynamic DVMA mapping area. */
-	allocate_ptable_skeleton(DVMA_VADDR, DVMA_VADDR + 0x4000000);
+	/* Inherit non-locked OBP mappings. */
 	inherit_prom_mappings();
 	
 	/* Ok, we can use our TLB miss and window trap handlers safely.
@@ -1231,205 +911,314 @@
 	{
 		extern void setup_tba(int);
 		int is_starfire = prom_finddevice("/ssp-serial");
-		if(is_starfire != 0 && is_starfire != -1)
+		if (is_starfire != 0 && is_starfire != -1)
 			is_starfire = 1;
 		else
 			is_starfire = 0;
 		setup_tba(is_starfire);
 	}
 
-	/* Really paranoid. */
-	flushi((long)&empty_zero_page);
-	membar("#Sync");
-
-	/* Cleanup the extra locked TLB entry we created since we have the
-	 * nice TLB miss handlers of ours installed now.
-	 */
+	inherit_locked_prom_mappings(1);
+	
 	/* We only created DTLB mapping of this stuff. */
 	spitfire_flush_dtlb_nucleus_page(alias_base);
 	if (second_alias_page)
 		spitfire_flush_dtlb_nucleus_page(second_alias_page);
-	membar("#Sync");
 
-	/* Paranoid */
-	flushi((long)&empty_zero_page);
-	membar("#Sync");
+	flush_tlb_all();
 
-	inherit_locked_prom_mappings(1);
+	{
+		unsigned int zones_size[MAX_NR_ZONES] = { 0, 0, 0};
 
-	flush_tlb_all();
+		zones_size[ZONE_DMA] = end_pfn;
+		free_area_init(zones_size);
+	}
 
-	start_mem = free_area_init(PAGE_ALIGN(mempool), end_mem);
+	device_scan();
+}
 
-	return device_scan (PAGE_ALIGN (start_mem));
+/* Ok, it seems that the prom can allocate some more memory chunks
+ * as a side effect of some prom calls we perform during the
+ * boot sequence.  My most likely theory is that it is from the
+ * prom_set_traptable() call, and OBP is allocating a scratchpad
+ * for saving client program register state etc.
+ */
+void __init sort_memlist(struct linux_mlist_p1275 *thislist)
+{
+	int swapi = 0;
+	int i, mitr;
+	unsigned long tmpaddr, tmpsize;
+	unsigned long lowest;
+
+	for (i = 0; thislist[i].theres_more != 0; i++) {
+		lowest = thislist[i].start_adr;
+		for (mitr = i+1; thislist[mitr-1].theres_more != 0; mitr++)
+			if (thislist[mitr].start_adr < lowest) {
+				lowest = thislist[mitr].start_adr;
+				swapi = mitr;
+			}
+		if (lowest == thislist[i].start_adr)
+			continue;
+		tmpaddr = thislist[swapi].start_adr;
+		tmpsize = thislist[swapi].num_bytes;
+		for (mitr = swapi; mitr > i; mitr--) {
+			thislist[mitr].start_adr = thislist[mitr-1].start_adr;
+			thislist[mitr].num_bytes = thislist[mitr-1].num_bytes;
+		}
+		thislist[i].start_adr = tmpaddr;
+		thislist[i].num_bytes = tmpsize;
+	}
 }
 
-static void __init taint_real_pages(unsigned long start_mem, unsigned long end_mem)
+void __init rescan_sp_banks(void)
 {
-	unsigned long tmp = 0, paddr, endaddr;
-	unsigned long end = __pa(end_mem);
+	struct linux_prom64_registers memlist[64];
+	struct linux_mlist_p1275 avail[64], *mlist;
+	unsigned long bytes, base_paddr;
+	int num_regs, node = prom_finddevice("/memory");
+	int i;
 
-	dvmaio_init();
-	for (paddr = __pa(start_mem); paddr < end; ) {
-		for (; sp_banks[tmp].num_bytes != 0; tmp++)
-			if (sp_banks[tmp].base_addr + sp_banks[tmp].num_bytes > paddr)
-				break;
-		if (!sp_banks[tmp].num_bytes) {
-			mem_map[paddr>>PAGE_SHIFT].flags |= (1<<PG_skip);
-			mem_map[paddr>>PAGE_SHIFT].next_hash = mem_map + (phys_base >> PAGE_SHIFT);
-			mem_map[(paddr>>PAGE_SHIFT)+1UL].flags |= (1<<PG_skip);
-			mem_map[(paddr>>PAGE_SHIFT)+1UL].next_hash = mem_map + (phys_base >> PAGE_SHIFT);
-			return;
+	num_regs = prom_getproperty(node, "available",
+				    (char *) memlist, sizeof(memlist));
+	num_regs = (num_regs / sizeof(struct linux_prom64_registers));
+	for (i = 0; i < num_regs; i++) {
+		avail[i].start_adr = memlist[i].phys_addr;
+		avail[i].num_bytes = memlist[i].reg_size;
+		avail[i].theres_more = &avail[i + 1];
+	}
+	avail[i - 1].theres_more = NULL;
+	sort_memlist(avail);
+
+	mlist = &avail[0];
+	i = 0;
+	bytes = mlist->num_bytes;
+	base_paddr = mlist->start_adr;
+  
+	sp_banks[0].base_addr = base_paddr;
+	sp_banks[0].num_bytes = bytes;
+
+	while (mlist->theres_more != NULL){
+		i++;
+		mlist = mlist->theres_more;
+		bytes = mlist->num_bytes;
+		if (i >= SPARC_PHYS_BANKS-1) {
+			printk ("The machine has more banks than "
+				"this kernel can support\n"
+				"Increase the SPARC_PHYS_BANKS "
+				"setting (currently %d)\n",
+				SPARC_PHYS_BANKS);
+			i = SPARC_PHYS_BANKS-1;
+			break;
 		}
-		
-		if (sp_banks[tmp].base_addr > paddr) {
-			/* Making a one or two pages PG_skip holes
-			 * is not necessary.  We add one more because
-			 * we must set the PG_skip flag on the first
-			 * two mem_map[] entries for the hole.  Go and
-			 * see the mm/filemap.c:shrink_mmap() loop for
-			 * details. -DaveM
-			 */
-			if (sp_banks[tmp].base_addr - paddr > 3 * PAGE_SIZE) {
-				mem_map[paddr>>PAGE_SHIFT].flags |= (1<<PG_skip);
-				mem_map[paddr>>PAGE_SHIFT].next_hash = mem_map + (sp_banks[tmp].base_addr >> PAGE_SHIFT);
-				mem_map[(paddr>>PAGE_SHIFT)+1UL].flags |= (1<<PG_skip);
-				mem_map[(paddr>>PAGE_SHIFT)+1UL].next_hash = mem_map + (sp_banks[tmp].base_addr >> PAGE_SHIFT);
+    
+		sp_banks[i].base_addr = mlist->start_adr;
+		sp_banks[i].num_bytes = mlist->num_bytes;
+	}
+
+	i++;
+	sp_banks[i].base_addr = 0xdeadbeefbeefdeadUL;
+	sp_banks[i].num_bytes = 0;
+
+	for (i = 0; sp_banks[i].num_bytes != 0; i++)
+		sp_banks[i].num_bytes &= PAGE_MASK;
+}
+
+static void __init taint_real_pages(void)
+{
+	struct sparc_phys_banks saved_sp_banks[SPARC_PHYS_BANKS];
+	int i;
+
+#ifdef DEBUG_BOOTMEM
+	prom_printf("taint_real_pages: Rescan sp_banks[].\n");
+#endif
+	for (i = 0; i < SPARC_PHYS_BANKS; i++) {
+		saved_sp_banks[i].base_addr =
+			sp_banks[i].base_addr;
+		saved_sp_banks[i].num_bytes =
+			sp_banks[i].num_bytes;
+	}
+
+	rescan_sp_banks();
+
+	/* Find changes discovered in the sp_bank rescan and
+	 * reserve the lost portions in the bootmem maps.
+	 */
+	for (i = 0; saved_sp_banks[i].num_bytes; i++) {
+		unsigned long old_start, old_end;
+
+		old_start = saved_sp_banks[i].base_addr;
+		old_end = old_start +
+			saved_sp_banks[i].num_bytes;
+		while (old_start < old_end) {
+			int n;
+
+			for (n = 0; sp_banks[n].num_bytes; n++) {
+				unsigned long new_start, new_end;
+
+				new_start = sp_banks[n].base_addr;
+				new_end = new_start + sp_banks[n].num_bytes;
+
+				if (new_start <= old_start &&
+				    new_end >= (old_start + PAGE_SIZE)) {
+					set_bit (old_start >> 22,
+						 sparc64_valid_addr_bitmap);
+					goto do_next_page;
+				}
 			}
-			paddr = sp_banks[tmp].base_addr;
+#ifdef DEBUG_BOOTMEM
+			prom_printf("taint: Page went away, reserve page %lx.\n",
+				    old_start);
+#endif
+			reserve_bootmem(old_start, PAGE_SIZE);
+
+		do_next_page:
+			old_start += PAGE_SIZE;
 		}
-		
-		endaddr = sp_banks[tmp].base_addr + sp_banks[tmp].num_bytes;
-		while (paddr < endaddr) {
-			mem_map[paddr>>PAGE_SHIFT].flags &= ~(1<<PG_reserved);
-			set_bit(paddr >> 22, sparc64_valid_addr_bitmap);
-			if (paddr >= (MAX_DMA_ADDRESS - PAGE_OFFSET))
-				mem_map[paddr>>PAGE_SHIFT].flags &= ~(1<<PG_DMA);
-			paddr += PAGE_SIZE;
+	}
+}
+
+void __init free_mem_map_range(struct page *first, struct page *last)
+{
+	first = (struct page *) PAGE_ALIGN((unsigned long)first);
+	last  = (struct page *) ((unsigned long)last & PAGE_MASK);
+#ifdef DEBUG_BOOTMEM
+	prom_printf("[%p,%p] ", first, last);
+#endif
+	while (first < last) {
+		ClearPageReserved(mem_map + MAP_NR(first));
+		set_page_count(mem_map + MAP_NR(first), 1);
+		free_page((unsigned long)first);
+		totalram_pages++;
+		num_physpages++;
+
+		first = (struct page *)((unsigned long)first + PAGE_SIZE);
+	}
+}
+
+/* Walk through holes in sp_banks regions, if the mem_map array
+ * areas representing those holes consume a page or more, free
+ * up such pages.  This helps a lot on machines where physical
+ * ram is configured such that it begins at some hugh value.
+ *
+ * The sp_banks array is sorted by base address.
+ */
+void __init free_unused_mem_map(void)
+{
+	int i;
+
+#ifdef DEBUG_BOOTMEM
+	prom_printf("free_unused_mem_map: ");
+#endif
+	for (i = 0; sp_banks[i].num_bytes; i++) {
+		if (i == 0) {
+			struct page *first, *last;
+
+			first = mem_map;
+			last = &mem_map[sp_banks[i].base_addr >> PAGE_SHIFT];
+			free_mem_map_range(first, last);
+		} else {
+			struct page *first, *last;
+			unsigned long prev_end;
+
+			prev_end = sp_banks[i-1].base_addr +
+				sp_banks[i-1].num_bytes;
+			prev_end = PAGE_ALIGN(prev_end);
+			first = &mem_map[prev_end >> PAGE_SHIFT];
+			last = &mem_map[sp_banks[i].base_addr >> PAGE_SHIFT];
+
+			free_mem_map_range(first, last);
+
+			if (!sp_banks[i+1].num_bytes) {
+				prev_end = sp_banks[i].base_addr +
+					sp_banks[i].num_bytes;
+				first = &mem_map[prev_end >> PAGE_SHIFT];
+				last = &mem_map[last_valid_pfn];
+				free_mem_map_range(first, last);
+			}
 		}
 	}
+#ifdef DEBUG_BOOTMEM
+	prom_printf("\n");
+#endif
 }
 
-void __init mem_init(unsigned long start_mem, unsigned long end_mem)
+void __init mem_init(void)
 {
-	int codepages = 0;
-	int datapages = 0;
-	int initpages = 0;
-	unsigned long addr;
-	unsigned long alias_base = phys_base + PAGE_OFFSET - (long)(&empty_zero_page);
-	struct page *page, *end;
+	unsigned long codepages, datapages, initpages;
+	unsigned long addr, last;
 	int i;
 
-	end_mem &= PAGE_MASK;
-	max_mapnr = MAP_NR(end_mem);
-	high_memory = (void *) end_mem;
-	
-	start_mem = ((start_mem + 7UL) & ~7UL);
-	sparc64_valid_addr_bitmap = (unsigned long *)start_mem;
-	i = max_mapnr >> ((22 - PAGE_SHIFT) + 6);
+	i = last_valid_pfn >> ((22 - PAGE_SHIFT) + 6);
 	i += 1;
-	memset(sparc64_valid_addr_bitmap, 0, i << 3);
-	start_mem += i << 3;
-
-	start_mem = PAGE_ALIGN(start_mem);
-	num_physpages = 0;
-	
-	if (phys_base) {
-		mem_map[0].flags |= (1<<PG_skip) | (1<<PG_reserved);
-		mem_map[0].next_hash = mem_map + (phys_base >> PAGE_SHIFT);
-		mem_map[1].flags |= (1<<PG_skip) | (1<<PG_reserved);
-		mem_map[1].next_hash = mem_map + (phys_base >> PAGE_SHIFT);
+	sparc64_valid_addr_bitmap = (unsigned long *)
+		__alloc_bootmem(i << 3, SMP_CACHE_BYTES, 0UL);
+	if (sparc64_valid_addr_bitmap == NULL) {
+		prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
+		prom_halt();
 	}
+	memset(sparc64_valid_addr_bitmap, 0, i << 3);
 
 	addr = PAGE_OFFSET + phys_base;
-	while(addr < start_mem) {
+	last = PAGE_ALIGN((unsigned long)&_end) -
+		((unsigned long) &empty_zero_page);
+	last += PAGE_OFFSET + phys_base;
+	while (addr < last) {
 #ifdef CONFIG_BLK_DEV_INITRD
+// FIXME to use bootmem scheme...
 		if (initrd_below_start_ok && addr >= initrd_start && addr < initrd_end)
 			mem_map[MAP_NR(addr)].flags &= ~(1<<PG_reserved);
-		else
 #endif	
-			mem_map[MAP_NR(addr)].flags |= (1<<PG_reserved);
 		set_bit(__pa(addr) >> 22, sparc64_valid_addr_bitmap);
 		addr += PAGE_SIZE;
 	}
 
-	taint_real_pages(start_mem, end_mem);
-	
-#ifdef FREE_UNUSED_MEM_MAP	
-	end = mem_map + max_mapnr;
-	for (page = mem_map; page < end; page++) {
-		if (PageSkip(page)) {
-			unsigned long low, high;
-			
-			/* See taint_real_pages() for why this is done.  -DaveM */
-			page++;
-
-			low = PAGE_ALIGN((unsigned long)(page+1));
-			if (page->next_hash < page)
-				high = ((unsigned long)end) & PAGE_MASK;
-			else
-				high = ((unsigned long)page->next_hash) & PAGE_MASK;
-			while (low < high) {
-				mem_map[MAP_NR(low)].flags &= ~(1<<PG_reserved);
-				low += PAGE_SIZE;
-			}
-		}
-	}
+	taint_real_pages();
+
+	max_mapnr = last_valid_pfn;
+	high_memory = __va(last_valid_pfn << PAGE_SHIFT);
+
+#ifdef DEBUG_BOOTMEM
+	prom_printf("mem_init: Calling free_all_bootmem().\n");
 #endif
-	
-	for (addr = PAGE_OFFSET; addr < end_mem; addr += PAGE_SIZE) {
-		if (PageSkip(mem_map + MAP_NR(addr))) {
-			unsigned long next = mem_map[MAP_NR(addr)].next_hash - mem_map;
-			
-			next = (next << PAGE_SHIFT) + PAGE_OFFSET;
-			if (next < addr || next >= end_mem)
-				break;
-			addr = next;
-		}
-		num_physpages++;
-		if (PageReserved(mem_map + MAP_NR(addr))) {
-			if ((addr < ((unsigned long) &etext) + alias_base) && (addr >= alias_base))
-				codepages++;
-			else if((addr >= ((unsigned long)&__init_begin) + alias_base)
-				&& (addr < ((unsigned long)&__init_end) + alias_base))
-				initpages++;
-			else if((addr < start_mem) && (addr >= alias_base))
-				datapages++;
-			continue;
-		}
-		atomic_set(&mem_map[MAP_NR(addr)].count, 1);
-#ifdef CONFIG_BLK_DEV_INITRD
-		if (!initrd_start ||
-		    (addr < initrd_start || addr >= initrd_end))
+	num_physpages = totalram_pages = free_all_bootmem();
+#if 0
+	free_unused_mem_map();
 #endif
-			free_page(addr);
-	}
-	
+	codepages = (((unsigned long) &etext) - ((unsigned long)&_start));
+	codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT;
+	datapages = (((unsigned long) &edata) - ((unsigned long)&etext));
+	datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT;
+	initpages = (((unsigned long) &__init_end) - ((unsigned long) &__init_begin));
+	initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT;
+
 #ifndef __SMP__
 	{
 		/* Put empty_pg_dir on pgd_quicklist */
 		extern pgd_t empty_pg_dir[1024];
 		unsigned long addr = (unsigned long)empty_pg_dir;
+		unsigned long alias_base = phys_base + PAGE_OFFSET -
+			(long)(&empty_zero_page);
 		
 		memset(empty_pg_dir, 0, sizeof(empty_pg_dir));
 		addr += alias_base;
-		mem_map[MAP_NR(addr)].pprev_hash = 0;
 		free_pgd_fast((pgd_t *)addr);
+		totalram_pages++;
+		num_physpages++;
 	}
 #endif
 
-	printk("Memory: %uk available (%dk kernel code, %dk data, %dk init) [%016lx,%016lx]\n",
-	       nr_free_pages << (PAGE_SHIFT-10),
+	printk("Memory: %uk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n",
+	       nr_free_pages() << (PAGE_SHIFT-10),
 	       codepages << (PAGE_SHIFT-10),
 	       datapages << (PAGE_SHIFT-10), 
 	       initpages << (PAGE_SHIFT-10), 
-	       PAGE_OFFSET, end_mem);
+	       PAGE_OFFSET, (last_valid_pfn << PAGE_SHIFT));
 
 	/* NOTE NOTE NOTE NOTE
 	 * Please keep track of things and make sure this
 	 * always matches the code in mm/page_alloc.c -DaveM
 	 */
-	i = nr_free_pages >> 7;
+	i = nr_free_pages() >> 7;
 	if (i < 48)
 		i = 48;
 	if (i > 256)
@@ -1442,42 +1231,35 @@
 void free_initmem (void)
 {
 	unsigned long addr;
-	
+
 	addr = (unsigned long)(&__init_begin);
 	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
-		unsigned long page = addr + (long)__va(phys_base)
-					- (long)(&empty_zero_page);
+		unsigned long page;
+		struct page *p;
 
-		mem_map[MAP_NR(page)].flags &= ~(1 << PG_reserved);
-		atomic_set(&mem_map[MAP_NR(page)].count, 1);
-		free_page(page);
+		page = (addr +
+			((unsigned long) __va(phys_base)) -
+			((unsigned long) &empty_zero_page));
+		p = mem_map + MAP_NR(page);
+
+		ClearPageReserved(p);
+		set_page_count(p, 1);
+		__free_page(p);
+		totalram_pages++;
+		num_physpages++;
 	}
 }
 
 void si_meminfo(struct sysinfo *val)
 {
-	struct page *page, *end;
-
-	val->totalram = 0;
+	val->totalram = totalram_pages;
 	val->sharedram = 0;
-	val->freeram = ((unsigned long)nr_free_pages) << PAGE_SHIFT;
-	val->bufferram = atomic_read(&buffermem);
-	for (page = mem_map, end = mem_map + max_mapnr;
-	     page < end; page++) {
-		if (PageSkip(page)) {
-			if (page->next_hash < page)
-				break;
-			page = page->next_hash;
-		}
-		if (PageReserved(page))
-			continue;
-		val->totalram++;
-		if (!atomic_read(&page->count))
-			continue;
-		val->sharedram += atomic_read(&page->count) - 1;
-	}
-	val->totalram <<= PAGE_SHIFT;
-	val->sharedram <<= PAGE_SHIFT;
-	val->totalbig = 0;
-	val->freebig = 0;
+	val->freeram = nr_free_pages();
+	val->bufferram = atomic_read(&buffermem_pages);
+
+	/* These are always zero on Sparc64. */
+	val->totalhigh = 0;
+	val->freehigh = 0;
+
+	val->mem_unit = PAGE_SIZE;
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)