patch-2.4.2 linux/include/asm-s390/pgtable.h

Next file: linux/include/asm-s390/processor.h
Previous file: linux/include/asm-s390/pgalloc.h
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.1/linux/include/asm-s390/pgtable.h linux/include/asm-s390/pgtable.h
@@ -3,7 +3,9 @@
  *
  *  S390 version
  *    Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Author(s): Hartmut Penner
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *               Ulrich Weigand (weigand@de.ibm.com)
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
  *
  *  Derived from "include/asm-i386/pgtable.h"
  */
@@ -17,14 +19,19 @@
  * table, so that we physically have the same two-level page table as the
  * S390 mmu expects.
  *
+ * The "pgd_xxx()" functions are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ *
  * This file contains the functions and defines necessary to modify and use
  * the S390 page table tree.
  */
 #ifndef __ASSEMBLY__
 #include <asm/processor.h>
-#include <linux/tasks.h>
+#include <linux/threads.h>
 
 extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
+extern void paging_init(void);
 
 /* Caches aren't brain-dead on S390. */
 #define flush_cache_all()                       do { } while (0)
@@ -37,20 +44,30 @@
 #define flush_icache_page(vma,pg)               do { } while (0)
 
 /*
+ * The S390 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma, address, pte)     do { } while (0)
+
+/*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern unsigned long empty_zero_page[1024];
+extern char empty_zero_page[PAGE_SIZE];
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 #endif /* !__ASSEMBLY__ */
 
-/* Certain architectures need to do special things when PTEs
+/*
+ * Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
  */
 #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
 
-/* PMD_SHIFT determines the size of the area a second-level page table can map */
+/*
+ * PMD_SHIFT determines the size of the area a second-level page
+ * table can map
+ */
 #define PMD_SHIFT       22
 #define PMD_SIZE        (1UL << PMD_SHIFT)
 #define PMD_MASK        (~(PMD_SIZE-1))
@@ -70,7 +87,6 @@
 #define PTRS_PER_PMD    1
 #define PTRS_PER_PGD    512
 
-
 /*
  * pgd entries used up by user/kernel:
  */
@@ -87,7 +103,8 @@
 	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
 #ifndef __ASSEMBLY__
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
  * any out-of-bounds memory accesses will hopefully be caught.
@@ -95,14 +112,14 @@
  * area for the same reason. ;)
  */
 #define VMALLOC_OFFSET  (8*1024*1024)
-#define VMALLOC_START   (((unsigned long) high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
+#define VMALLOC_START   (((unsigned long) high_memory + VMALLOC_OFFSET) \
+			 & ~(VMALLOC_OFFSET-1))
 #define VMALLOC_VMADDR(x) ((unsigned long)(x))
 #define VMALLOC_END     (0x7fffffffL)
 
 
 /*
  * A pagetable entry of S390 has following format:
- *
  *  |   PFRA          |    |  OS  |
  * 0                   0IP0
  * 00000000001111111111222222222233
@@ -110,11 +127,8 @@
  *
  * I Page-Invalid Bit:    Page is not available for address-translation
  * P Page-Protection Bit: Store access not possible for page
- */
-
-/*
- * A segmenttable entry of S390 has following format:
  *
+ * A segmenttable entry of S390 has following format:
  *  |   P-table origin      |  |PTL
  * 0                         IC
  * 00000000001111111111222222222233
@@ -122,10 +136,8 @@
  *
  * I Segment-Invalid Bit:    Segment is not available for address-translation
  * C Common-Segment Bit:     Segment is not private (PoP 3-30)
- * PTL Page-Table-Length:    Length of Page-table (PTL+1*16 entries -> up to 256 entries)
- */
-
-/*
+ * PTL Page-Table-Length:    Page-table length (PTL+1*16 entries -> up to 256)
+ *
  * The segmenttable origin of S390 has following format:
  *
  *  |S-table origin   |     | STL |
@@ -137,27 +149,38 @@
  * G Segment-Invalid Bit:     *
  * P Private-Space Bit:       Segment is not private (PoP 3-30)
  * S Storage-Alteration:
- * STL Segment-Table-Length:  Length of Page-table (STL+1*16 entries -> up to 2048 entries)
+ * STL Segment-Table-Length:  Segment-table length (STL+1*16 entries -> up to 2048)
+ *
+ * A storage key has the following format:
+ * | ACC |F|R|C|0|
+ *  0   3 4 5 6 7
+ * ACC: access key
+ * F  : fetch protection bit
+ * R  : referenced bit
+ * C  : changed bit
  */
 
+/* Bits in the page table entry */
 #define _PAGE_PRESENT   0x001          /* Software                         */
-#define _PAGE_ACCESSED  0x002          /* Software accessed                */
-#define _PAGE_DIRTY     0x004          /* Software dirty                   */
 #define _PAGE_RO        0x200          /* HW read-only                     */
 #define _PAGE_INVALID   0x400          /* HW invalid                       */
 
+/* Bits in the segment table entry */
 #define _PAGE_TABLE_LEN 0xf            /* only full page-tables            */
 #define _PAGE_TABLE_COM 0x10           /* common page-table                */
 #define _PAGE_TABLE_INV 0x20           /* invalid page-table               */
 #define _SEG_PRESENT    0x001          /* Software (overlap with PTL)      */
 
+/* Bits int the storage key */
+#define _PAGE_CHANGED    0x02          /* HW changed bit                   */
+#define _PAGE_REFERENCED 0x04          /* HW referenced bit                */
+
 #define _USER_SEG_TABLE_LEN    0x7f    /* user-segment-table up to 2 GB    */
 #define _KERNEL_SEG_TABLE_LEN  0x7f    /* kernel-segment-table up to 2 GB  */
 
 /*
  * User and Kernel pagetables are identical
  */
-
 #define _PAGE_TABLE     (_PAGE_TABLE_LEN )
 #define _KERNPG_TABLE   (_PAGE_TABLE_LEN )
 
@@ -165,22 +188,25 @@
  * The Kernel segment-tables includes the User segment-table
  */
 
-#define _SEGMENT_TABLE  (_USER_SEG_TABLE_LEN|0x80000000)
+#define _SEGMENT_TABLE  (_USER_SEG_TABLE_LEN|0x80000000|0x100)
 #define _KERNSEG_TABLE  (_KERNEL_SEG_TABLE_LEN)
+
 /*
  * No mapping available
  */
-#define PAGE_NONE       __pgprot(_PAGE_INVALID )
-
-#define PAGE_SHARED     __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED)
-#define PAGE_COPY       __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_RO)
-#define PAGE_READONLY   __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_RO)
-#define PAGE_KERNEL     __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define PAGE_INVALID  __pgprot(_PAGE_INVALID)
+#define PAGE_NONE     __pgprot(_PAGE_PRESENT | _PAGE_INVALID)
+#define PAGE_COPY     __pgprot(_PAGE_PRESENT | _PAGE_RO)
+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_RO)
+#define PAGE_SHARED   __pgprot(_PAGE_PRESENT)
+#define PAGE_KERNEL   __pgprot(_PAGE_PRESENT)
 
 /*
- * The S390 can't do page protection for execute, and considers that the same are read.
- * Also, write permissions imply read permissions. This is the closest we can get..
+ * The S390 can't do page protection for execute, and considers that the
+ * same are read. Also, write permissions imply read permissions. This is
+ * the closest we can get..
  */
+         /*xwr*/
 #define __P000  PAGE_NONE
 #define __P001  PAGE_READONLY
 #define __P010  PAGE_COPY
@@ -200,212 +226,245 @@
 #define __S111  PAGE_SHARED
 
 /*
- * Define this if things work differently on an i386 and an i486:
- * it will (on an i486) warn about kernel memory accesses that are
- * done without a 'verify_area(VERIFY_WRITE,..)'
- *
- * Kernel and User memory-access are done equal, so we don't need verify
+ * Permanent address of a page.
  */
-#undef TEST_VERIFY_AREA
-
-/* page table for 0-4MB for everybody */
-extern unsigned long pg0[1024];
-
-/* number of bits that fit into a memory pointer */
-#define BITS_PER_PTR                    (8*sizeof(unsigned long))
-
-/* to align the pointer to a pointer address */
-#define PTR_MASK                        (~(sizeof(void*)-1))
-
-/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
-/* 64-bit machines, beware!  SRB. */
-#define SIZEOF_PTR_LOG2                 2
-
-/* to find an entry in a page-table */
-#define PAGE_PTR(address) \
-((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK)
-
-
-
-/* 
- * CR 7 (SPST) and cr 13 (HPST) are set to the user pgdir. 
- * Kernel is running in its own, disjunct address space,
- * running in primary address space.
- * Copy to/from user is done via access register mode with
- * access registers set to 0 or 1. For that purpose we need 
- * set up CR 7 with the user pgd.  
- * 
- */
-
-#define SET_PAGE_DIR(tsk,pgdir)                                              \
-do {                                                                         \
-        unsigned long __pgdir = (__pa(pgdir) & PAGE_MASK ) | _SEGMENT_TABLE; \
-        (tsk)->thread.user_seg = __pgdir;                                    \
-        if ((tsk) == current) {                                              \
-                __asm__ __volatile__("lctl  7,7,%0": :"m" (__pgdir));        \
-                __asm__ __volatile__("lctl  13,13,%0": :"m" (__pgdir));      \
-        }                                                                    \
-} while (0)
-
-/* 
- * CR 7 (SPST) and cr 13 (HPST) are set to the user pgdir. 
- * Kernel is running in its own, disjunct address space,
- * running in primary address space.
- * Copy to/from user is done via access register mode with
- * access registers set to 0 or 1. For that purpose we need 
- * set up CR 7 with the user pgd.  
- * 
- */
-
-#define SET_PAGE_DIR(tsk,pgdir)                                              \
-do {                                                                         \
-        unsigned long __pgdir = (__pa(pgdir) & PAGE_MASK ) | _SEGMENT_TABLE; \
-        (tsk)->thread.user_seg = __pgdir;                                    \
-        if ((tsk) == current) {                                              \
-                __asm__ __volatile__("lctl  7,7,%0": :"m" (__pgdir));        \
-                __asm__ __volatile__("lctl  13,13,%0": :"m" (__pgdir));      \
-        }                                                                    \
-} while (0)
-
-
-extern inline int pte_none(pte_t pte)           { return ((pte_val(pte) & (_PAGE_INVALID | _PAGE_RO)) == _PAGE_INVALID); } 
-extern inline int pte_present(pte_t pte)        { return pte_val(pte) & _PAGE_PRESENT; }
-extern inline void pte_clear(pte_t *ptep)       { pte_val(*ptep) = _PAGE_INVALID; }
-#define PTE_INIT(x) pte_clear(x)
-
-extern inline int pmd_none(pmd_t pmd)           { return pmd_val(pmd) & _PAGE_TABLE_INV; }
-extern inline int pmd_bad(pmd_t pmd)            { return (pmd_val(pmd) == 0); }
-extern inline int pmd_present(pmd_t pmd)        { return pmd_val(pmd) & _SEG_PRESENT; }
-extern inline void pmd_clear(pmd_t * pmdp)      {
-                                                        pmd_val(pmdp[0]) = _PAGE_TABLE_INV;
-                                                        pmd_val(pmdp[1]) = _PAGE_TABLE_INV;
-                                                        pmd_val(pmdp[2]) = _PAGE_TABLE_INV;
-                                                        pmd_val(pmdp[3]) = _PAGE_TABLE_INV;
-                                                }
+#define page_address(page) ((page)->virtual)
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
 
 /*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
+ * pgd/pmd/pte query functions
  */
-extern inline int pgd_none(pgd_t pgd)           { return 0; }
-extern inline int pgd_bad(pgd_t pgd)            { return 0; }
-extern inline int pgd_present(pgd_t pgd)        { return 1; }
-extern inline void pgd_clear(pgd_t * pgdp)      { }
+extern inline int pgd_present(pgd_t pgd) { return 1; }
+extern inline int pgd_none(pgd_t pgd)    { return 0; }
+extern inline int pgd_bad(pgd_t pgd)     { return 0; }
 
+extern inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) & _SEG_PRESENT; }
+extern inline int pmd_none(pmd_t pmd)    { return pmd_val(pmd) & _PAGE_TABLE_INV; }
+extern inline int pmd_bad(pmd_t pmd)
+{
+	return (pmd_val(pmd) & (~PAGE_MASK & ~_PAGE_TABLE_INV)) != _PAGE_TABLE;
+}
+
+extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; }
+extern inline int pte_none(pte_t pte)
+{
+	return ((pte_val(pte) & 
+                (_PAGE_INVALID | _PAGE_RO | _PAGE_PRESENT)) == _PAGE_INVALID);
+}
+
+#define pte_same(a,b)	(pte_val(a) == pte_val(b))
 
 /*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
+ * query functions pte_write/pte_dirty/pte_young only work if
+ * pte_present() is true. Undefined behaviour if not..
  */
-extern inline int pte_write(pte_t pte)          { return !(pte_val(pte) & _PAGE_RO); }
-extern inline int pte_dirty(pte_t pte)          { return pte_val(pte) & _PAGE_DIRTY; }
-extern inline int pte_young(pte_t pte)          { return pte_val(pte) & _PAGE_ACCESSED; }
-
-/* who needs that
-extern inline int pte_read(pte_t pte)           { return !(pte_val(pte) & _PAGE_INVALID); }
-extern inline int pte_exec(pte_t pte)           { return !(pte_val(pte) & _PAGE_INVALID); }
-extern inline pte_t pte_rdprotect(pte_t pte)    { pte_val(pte) |= _PAGE_INVALID; return pte; }
-extern inline pte_t pte_exprotect(pte_t pte)    { pte_val(pte) |= _PAGE_INVALID; return pte; }
-extern inline pte_t pte_mkread(pte_t pte)       { pte_val(pte) &= _PAGE_INVALID; return pte; }
-extern inline pte_t pte_mkexec(pte_t pte)       { pte_val(pte) &= _PAGE_INVALID; return pte; }
-*/
+extern inline int pte_write(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_RO) == 0;
+}
 
-extern inline pte_t pte_wrprotect(pte_t pte)    { pte_val(pte) |= _PAGE_RO; return pte; }
-extern inline pte_t pte_mkwrite(pte_t pte)      { pte_val(pte) &= ~_PAGE_RO ; return pte; }
+extern inline int pte_dirty(pte_t pte)
+{
+	int skey;
 
-extern inline pte_t pte_mkclean(pte_t pte)      { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
-extern inline pte_t pte_mkdirty(pte_t pte)      { pte_val(pte) |= _PAGE_DIRTY; return pte; }
+	asm volatile ("iske %0,%1" : "=d" (skey) : "a" (pte_val(pte)));
+	return skey & _PAGE_CHANGED;
+}
 
-extern inline pte_t pte_mkold(pte_t pte)        { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-extern inline pte_t pte_mkyoung(pte_t pte)      { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+extern inline int pte_young(pte_t pte)
+{
+	int skey;
 
+	asm volatile ("iske %0,%1" : "=d" (skey) : "a" (pte_val(pte)));
+	return skey & _PAGE_REFERENCED;
+}
 
 /*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
+ * pgd/pmd/pte modification functions
  */
-#define mk_pte(page, pgprot) \
-({ pte_t __pte; pte_val(__pte) = __pa(((page)-mem_map)<<PAGE_SHIFT) + pgprot_val(pgprot); __pte; })
+extern inline void pgd_clear(pgd_t * pgdp)      { }
 
-/* This takes a physical page address that is used by the remapping functions */
-#define mk_pte_phys(physpage, pgprot) \
-({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
+extern inline void pmd_clear(pmd_t * pmdp)
+{
+	pmd_val(pmdp[0]) = _PAGE_TABLE_INV;
+	pmd_val(pmdp[1]) = _PAGE_TABLE_INV;
+	pmd_val(pmdp[2]) = _PAGE_TABLE_INV;
+	pmd_val(pmdp[3]) = _PAGE_TABLE_INV;
+}
 
-extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{ pte_val(pte) = (pte_val(pte) & PAGE_MASK) | pgprot_val(newprot); return pte; }
+extern inline void pte_clear(pte_t *ptep)
+{
+	pte_val(*ptep) = _PAGE_INVALID; 
+}
 
-#define page_address(page)  ((page)->virtual)
-#define pte_page(x) (mem_map+(unsigned long)((pte_val(pte) >> PAGE_SHIFT)))
+#define PTE_INIT(x) pte_clear(x)
 
-#define pmd_page(pmd) \
-((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+/*
+ * The following pte modification functions only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) = (pte_val(pte) & PAGE_MASK) | pgprot_val(newprot);
+	return pte;
+}
 
-/* to find an entry in a page-table-directory */
-#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+extern inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_RO;
+	return pte;
+}
 
-#define __pgd_offset(address) pgd_index(address)
+extern inline pte_t pte_mkwrite(pte_t pte) 
+{
+	pte_val(pte) &= ~_PAGE_RO;
+	return pte;
+}
 
-#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+extern inline pte_t pte_mkclean(pte_t pte)
+{
+	/* We can't clear the changed bit atomically. The iske/and/sske
+         * sequence has a race condition with the page referenced bit.
+         * At the moment pte_mkclean is always followed by a pte_mkold.
+         * So its safe to ignore the problem for now. Hope this will
+         * never change ... */
+	asm volatile ("sske %0,%1" 
+	              : : "d" (0), "a" (pte_val(pte)));
+	return pte;
+}
 
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+extern inline pte_t pte_mkdirty(pte_t pte)
+{
+	/* We can't set the changed bit atomically either. For now we
+         * set (!) the page referenced bit. */
+	asm volatile ("sske %0,%1" 
+	              : : "d" (_PAGE_CHANGED|_PAGE_REFERENCED),
+		          "a" (pte_val(pte)));
+	return pte;
+}
 
-/* Find an entry in the second-level page table.. */
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+extern inline pte_t pte_mkold(pte_t pte)
 {
-        return (pmd_t *) dir;
+	asm volatile ("rrbe 0,%0" : : "a" (pte_val(pte)));
+	return pte;
 }
 
-/* Find an entry in the third-level page table.. */
-#define pte_offset(pmd, address) \
-((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2))))
+extern inline pte_t pte_mkyoung(pte_t pte)
+{
+	/* To set the referenced bit we read the first word from the real
+	 * page with a special instruction: load using real address (lura).
+	 * Isn't S/390 a nice architecture ?! */
+	asm volatile ("lura 0,%0" : : "a" (pte_val(pte) & PAGE_MASK) : "0" );
+	return pte;
+}
 
+static inline int ptep_test_and_clear_young(pte_t *ptep)
+{
+	int ccode;
 
-/* We don't use pmd cache, so these are dummy routines */
-extern __inline__ pmd_t *get_pmd_fast(void)
+	asm volatile ("rrbe 0,%1\n\t"
+		      "ipm  %0\n\t"
+		      "srl  %0,28\n\t" : "=d" (ccode) : "a" (pte_val(*ptep)));
+	return ccode & 2;
+}
+
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
 {
-        return (pmd_t *)0;
+	int skey;
+
+	asm volatile ("iske %0,%1" : "=d" (skey) : "a" (*ptep));
+	if ((skey & _PAGE_CHANGED) == 0)
+		return 0;
+	/* We can't clear the changed bit atomically. For now we
+         * clear (!) the page referenced bit. */
+	asm volatile ("sske %0,%1" 
+	              : : "d" (0), "a" (*ptep));
+	return 1;
 }
 
-extern __inline__ void free_pmd_fast(pmd_t *pmd)
+static inline pte_t ptep_get_and_clear(pte_t *ptep)
 {
+	pte_t pte = *ptep;
+	pte_clear(ptep);
+	return pte;
 }
 
-extern __inline__ void free_pmd_slow(pmd_t *pmd)
+static inline void ptep_set_wrprotect(pte_t *ptep)
 {
+	pte_t old_pte = *ptep;
+	set_pte(ptep, pte_wrprotect(old_pte));
 }
 
-extern void __handle_bad_pmd(pmd_t *pmd);
-extern void __handle_bad_pmd_kernel(pmd_t *pmd);
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+	pte_mkdirty(*ptep);
+}
 
 /*
- * The S390 doesn't have any external MMU info: the kernel page
- * tables contain all the necessary information.
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
  */
-extern inline void update_mmu_cache(struct vm_area_struct * vma,
-        unsigned long address, pte_t pte)
+extern inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 {
+	pte_t __pte;
+	pte_val(__pte) = physpage + pgprot_val(pgprot);
+	return __pte;
 }
+#define mk_pte(page,pgprot) mk_pte_phys(__pa(((page)-mem_map)<<PAGE_SHIFT),pgprot)
+
+#define pte_page(x) (mem_map+(unsigned long)((pte_val(x) >> PAGE_SHIFT)))
+
+#define pmd_page(pmd) \
+        ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+/* to find an entry in a page-table-directory */
+#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* Find an entry in the second-level page table.. */
+extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+{
+        return (pmd_t *) dir;
+}
+
+/* Find an entry in the third-level page table.. */
+#define pte_offset(pmd, address) \
+        ((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2))))
 
 /*
- * a page-table entry has only 19 bit for offset and 7 bit for type
- * if bits 0, 20 or 23 are set, a translation specification exceptions occures, and it's
- * hard to find out the failing address
- * therefor, we zero out this bits
+ * A page-table entry has some bits we have to treat in a special way.
+ * Bits 0, 20 and bit 23 have to be zero, otherwise an specification
+ * exception will occur instead of a page translation exception. The
+ * specifiation exception has the bad habit not to store necessary
+ * information in the lowcore.
+ * Bit 21 and bit 22 are the page invalid bit and the page protection
+ * bit. We set both to indicate a swapped page.
+ * Bit 31 is used as the software page present bit. If a page is
+ * swapped this obviously has to be zero.
+ * This leaves the bits 1-19 and bits 24-30 to store type and offset.
+ * We use the 7 bits from 24-30 for the type and the 19 bits from 1-19
+ * for the offset.
+ * 0|     offset      |0110|type |0
+ * 00000000001111111111222222222233
+ * 01234567890123456789012345678901
  */
+extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
+{
+	pte_t pte;
+	pte_val(pte) = (type << 1) | (offset << 12) | _PAGE_INVALID | _PAGE_RO;
+	pte_val(pte) &= 0x7ffff6fe;  /* better to be paranoid */
+	return pte;
+}
 
-#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f)
-#define SWP_OFFSET(entry) (((entry).val >> 12) & 0x7FFFF )
-#define SWP_ENTRY(type,offset) ((swp_entry_t) { (((type) << 1) | \
-                                                 ((offset) << 12) | \
-                                                 _PAGE_INVALID | _PAGE_RO) \
-                                                 & 0x7ffff6fe })
-
-#define pte_to_swp_entry(pte)           ((swp_entry_t) { pte_val(pte) })
-#define swp_entry_to_pte(x)             ((pte_t) { (x).val })
+#define SWP_TYPE(entry)		(((entry).val >> 1) & 0x3f)
+#define SWP_OFFSET(entry)	(((entry).val >> 12) & 0x7FFFF )
+#define SWP_ENTRY(type,offset)	((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
 
-#include <asm-generic/pgtable.h>
+#define pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define swp_entry_to_pte(x)	((pte_t) { (x).val })
 
 #endif /* !__ASSEMBLY__ */
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)