Plan 9 from Bell Labs’s /usr/web/sources/contrib/quanstro/root/sys/src/fs/pc/mmu.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include	"all.h"
#include	"mem.h"
#include	"io.h"
#include	"ureg.h"

/*
 *  task state segment.  Plan 9 ignores all the task switching goo and just
 *  uses the tss for esp0 and ss0 on gate's into the kernel, interrupts,
 *  and exceptions.  The rest is completely ignored.
 *
 *  This means that we only need one tss in the whole system.
 */
typedef struct Tss	Tss;
struct Tss
{
	ulong	backlink;	/* unused */
	ulong	sp0;		/* pl0 stack pointer */
	ulong	ss0;		/* pl0 stack selector */
	ulong	sp1;		/* pl1 stack pointer */
	ulong	ss1;		/* pl1 stack selector */
	ulong	sp2;		/* pl2 stack pointer */
	ulong	ss2;		/* pl2 stack selector */
	ulong	cr3;		/* page table descriptor */
	ulong	eip;		/* instruction pointer */
	ulong	eflags;		/* processor flags */
	ulong	eax;		/* general (hah?) registers */
	ulong 	ecx;
	ulong	edx;
	ulong	ebx;
	ulong	esp;
	ulong	ebp;
	ulong	esi;
	ulong	edi;
	ulong	es;		/* segment selectors */
	ulong	cs;
	ulong	ss;
	ulong	ds;
	ulong	fs;
	ulong	gs;
	ulong	ldt;		/* local descriptor table */
	ulong	iomap;		/* io map base */
};
Tss tss;

/*
 *  segment descriptor initializers
 */
//#define	DATASEGM(p) 	{ 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
#define	DATASEGM(p) 	{ 1     , SEGG|SEGB|(0<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW|SEGE }

#define	EXECSEGM(p) 	{ 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
#define CALLGATE(s,o,p)	{ ((o)&0xFFFF)|((s)<<16), (o)&0xFFFF0000|SEGP|SEGPL(p)|SEGCG }
#define	D16SEGM(p) 	{ 0xFFFF, (0x0<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
#define	E16SEGM(p) 	{ 0xFFFF, (0x0<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
#define	TSSSEGM(b,p)	{ ((b)<<16)|sizeof(Tss),\
			  ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }

/*
 *  global descriptor table describing all segments
 */
Segdesc gdt[] =
{
[NULLSEG]	{ 0, 0},		/* null descriptor */
[KDSEG]		DATASEGM(0),		/* kernel data/stack */
[KESEG]		EXECSEGM(0),		/* kernel code */
[UDSEG]		DATASEGM(3),		/* user data/stack */
[UESEG]		EXECSEGM(3),		/* user code */
[TSSSEG]	TSSSEGM(0,0),		/* tss segment */
};

static struct {
	ulong	va;
	ulong	pa;
} ktoppg;			/* prototype top level page table
				 * containing kernel mappings  */
static ulong	*kpt;		/* 2nd level page tables for kernel mem */

#define ROUNDUP(s,v)	(((s)+(v-1))&~(v-1))
/*
 *  offset of virtual address into
 *  top level page table
 */
#define TOPOFF(v)	(((ulong)(v))>>(2*PGSHIFT-2))

/*
 *  offset of virtual address into
 *  bottom level page table
 */
#define BTMOFF(v)	((((ulong)(v))>>(PGSHIFT))&(WD2PG-1))

/*
 *  Change current page table and the stack to use for exceptions
 *  (traps & interrupts).  The exception stack comes from the tss.
 *  Since we use only one tss, (we hope) there's no need for a
 *  puttr().
 */
static void
taskswitch(ulong pagetbl, ulong stack)
{
	tss.ss0 = KDSEL;
	tss.sp0 = stack;
tss.ss1 = KDSEL;
tss.sp1 = stack;
tss.ss2 = KDSEL;
tss.sp2 = stack;
	tss.cr3 = pagetbl;
	putcr3(pagetbl);
}

/*
 *  Create a prototype page map that maps all of memory into
 *  kernel (KZERO) space.  This is the default map.  It is used
 *  whenever the processor is not running a process or whenever running
 *  a process which does not yet have its own map.
 */

void
mmuinit(void)
{
	ulong i, nkpt, npage, x, y, *top, nbytes, pgsz, flag;

	/*
	 *  set up the global descriptor table. we make the tss entry here
	 *  since it requires arithmetic on an address and hence cannot
	 *  be a compile or link time constant.
	 */
	x = (ulong)&tss;
	gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
	gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
	putgdt(gdt, sizeof gdt);

	/*
	 *  set up system page tables.
	 *  map all of physical memory to start at KZERO.
	 *  leave a map entry for a user area.
	 */

	/*
	 *  allocate top level table
	 */
	top = ialloc(BY2PG, BY2PG);

	ktoppg.va = (ulong)top;
	ktoppg.pa = PADDR(ktoppg.va);

	flag = PTEVALID|PTEKERNEL|PTEWRITE;
	pgsz = BY2PG;
	if(m->cpuiddx & 0x08){
		putcr4(getcr4()|0x10);
		pgsz = 4*MB;
		flag |= PTESIZE;
	}

	/*  map all memory to KZERO */
	npage = mconf.topofmem/pgsz;

	if(pgsz == BY2PG){
		nbytes = PGROUND(npage*BY2WD);		/* words of page map */
		nkpt = nbytes/BY2PG;			/* pages of page map */
		kpt = ialloc(nbytes, BY2PG);
		for(i = 0; i < npage; i++)
			kpt[i] = (0+i*pgsz)|flag;
		y = PADDR((ulong)kpt);
	} else{
		nkpt = npage;
		y = 0;
	}
	x = TOPOFF(KZERO);
	for(i = 0; i < nkpt; i++)
		top[x+i] = (y+i*pgsz)|flag;

	/*
	 *  set up the task segment
	 */
	memset(&tss, 0, sizeof(tss));
	taskswitch(ktoppg.pa, pgsz + (ulong)m);
	puttr(TSSSEL);/**/
}

/*
 *  used to map a page into 4 meg - BY2PG for confinit(). tpt is the temporary
 *  page table set up by l.s.
 */
enum{
	Pteoff	= 4*MB-BY2PG,
};

ulong*
mapaddr(ulong paddr)
{
	ulong base;
	ulong off;
	static ulong *pte, top;
	extern ulong tpt[];

	if(pte == 0){
		top = getcr3();
		pte = (ulong*)KADDR(top-BY2PG)+(Pteoff>>PGSHIFT);
	}

	off = paddr&(BY2PG-1);
	base = paddr-off;

	*pte = base|PTEVALID|PTEKERNEL|PTEWRITE; /**/
	putcr3(top);

	return (ulong*)(KZERO+Pteoff+off);
}

#define PDX(va)		((((ulong)(va))>>22) & 0x03FF)
#define PTX(va)		((((ulong)(va))>>12) & 0x03FF)
#define PPN(x)		((x)&~(BY2PG-1))

ulong*
mmuwalk(ulong* pdb, ulong va, int level, int create)
{
	ulong pa, *table;

	/*
	 * Walk the page-table pointed to by pdb and return a pointer
	 * to the entry for virtual address va at the requested level.
	 * If the entry is invalid and create isn't requested then bail
	 * out early. Otherwise, for the 2nd level walk, allocate a new
	 * page-table page and register it in the 1st level.
	 */
//print("mmuwalk(%p, %p, %d, %d)\n", pdb, va, level, create);

	table = &pdb[PDX(va)];
	if(!(*table & PTEVALID) && create == 0){
		print("pte not valid\n");
		return 0;
	}

	switch(level){

	default:
		return 0;

	case 1:
		return table;

	case 2:
		if(*table & PTESIZE)
			panic("mmuwalk2: va 0x%ux entry 0x%ux\n", va, *table);
		if(!(*table & PTEVALID)){
			pa = PADDR(ialloc(BY2PG, BY2PG));
			*table = pa|PTEWRITE|PTEVALID;
		}
		table = KADDR(PPN(*table));
//print("  table -> %p %p\n", table, &table[PTX(va)]);
		return &table[PTX(va)];
	}
}

#define	ROUND(s, sz)	(((s)+((sz)-1))&~((sz)-1))

enum{
	WD2PG		= 4096/4,
	BY2XPG	= 4096*1024,	/* bytes per big page */
	VPTSIZE	= BY2XPG,
	KMAPSIZE	= BY2XPG,
	VMAPSIZE	= 0x10000000-VPTSIZE-KMAPSIZE,
	VPT		= KZERO-VPTSIZE,
	KMAP		= VPT-KMAPSIZE,
	VMAP		= KMAP-VMAPSIZE,
};


static Lock vmaplock;

static int
findhole(ulong *a, int n, int count)
{
	int have, i;
	
	have = 0;
	for(i=0; i<n; i++){
		if(a[i] == 0)
			have++;
		else
			have = 0;
		if(have >= count)
			return i+1 - have;
	}
	return -1;
}

/*
 * Look for free space in the vmap.
 */
static ulong
vmapalloc(ulong size)
{
	int i, n, o;
	ulong *vpdb, *pdb;
	int vpdbsize;

	pdb = (ulong*)ktoppg.va;
	vpdb = &pdb[PDX(VMAP)];
	vpdbsize = VMAPSIZE/(4*MB);

	if(size >= 4*MB){
		n = (size+4*MB-1) / (4*MB);
		if((o = findhole(vpdb, vpdbsize, n)) != -1)
			return VMAP + o*4*MB;
		return 0;
	}
	n = (size+BY2PG-1) / BY2PG;
	for(i=0; i<vpdbsize; i++)
		if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
			if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
				return VMAP + i*4*MB + o*BY2PG;
	if((o = findhole(vpdb, vpdbsize, 1)) != -1)
		return VMAP + o*4*MB;
		
	/*
	 * could span page directory entries, but not worth the trouble.
	 * not going to be very much contention.
	 */
	return 0;
}

int
pdbmap(ulong *pdb, ulong pa, ulong va, int size)
{
	int pse;
	ulong pgsz, *pte, *table;
	ulong flag, off;
	extern int cpuiddx;

	flag = pa&0xFFF;
	pa &= ~0xFFF;

	if((m->cpuiddx & 0x08) && (getcr4() & 0x10))
		pse = 1;
	else
		pse = 0;

	for(off=0; off<size; off+=pgsz){
		table = &pdb[PDX(va+off)];
		if((*table&PTEVALID) && (*table&PTESIZE))
			panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
				va+off, pa+off, *table);

		/*
		 * Check if it can be mapped using a 4MB page:
		 * va, pa aligned and size >= 4MB and processor can do it.
		 */
		if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
			*table = (pa+off)|flag|PTESIZE|PTEVALID;
			pgsz = 4*MB;
		}else{
			pte = mmuwalk(pdb, va+off, 2, 1);
			if(*pte&PTEVALID)
				panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
					va+off, pa+off, *pte);
			*pte = (pa+off)|flag|PTEVALID;
			pgsz = BY2PG;
		}
	}
	return 0;
}

void*
vmap(ulong pa, int size)
{
	int osize;
	ulong o, va, *pdb;
	
	/*
	 * might be asking for less than a page.
	 */
	osize = size;
	o = pa & (BY2PG-1);
	pa -= o;
	size += o;

	size = ROUND(size, BY2PG);
	if(pa == 0){
		print("vmap pa=0 pc=%#.8lux\n", getcallerpc(&pa));
		return nil;
	}
	ilock(&vmaplock);
	pdb = (ulong*)ktoppg.va;
	if((va = vmapalloc(size)) == 0 
	|| pdbmap(pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
		iunlock(&vmaplock);
		return 0;
	}
	iunlock(&vmaplock);
	USED(osize);
//	print("  vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
	return (void*)(va + o);
}

ulong
upamalloc(ulong pa, int size, int)
{
	return (ulong)vmap(pa, size);
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.