Plan 9 from Bell Labs’s /usr/web/sources/xen/xen2/9/xenpc/xen_mm_hypervisor.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/******************************************************************************
 * xen/mm/hypervisor.c
 * 
 * Update page tables via the hypervisor.
 * 
 * Copyright (c) 2002, K A Fraser
 */
#ifdef linux
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <asm/hypervisor.h>
#include <asm/hypervisor-ifs/dom_mem_ops.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/multicall.h>
#endif
#include	"u.h"
#include	"../port/lib.h"
#include	"mem.h"
#include	"dat.h"
#include	"fns.h"
#include	"io.h"
#include "../xen/xen.h"


#define LOG(a)
/* wmb is a write memory barrier. I will leave it in here for now just as a marker
 * if we have troubles that might be cured by such a thing. 
 
#define wmb()

 */

#define physpfn_to_mfn(a) (xen_mm_mfn((void *) (a)))
#define xen_pa_to_ma(a)    (xen_va_to_ma((void *)(a)))
/*
 * This suffices to protect us if we ever move to SMP domains.
 * Further, it protects us against interrupts. At the very least, this is
 * required for the network driver which flushes the update queue before
 * pushing new receive buffers.
 */
static Lock update_lock;
//static spinlock_t update_lock = SPIN_LOCK_UNLOCKED;

#define QUEUE_SIZE 2048
static mmu_update_t update_queue[QUEUE_SIZE];
unsigned int mmu_update_queue_idx = 0;
#define idx mmu_update_queue_idx

#undef MMU_UPDATE_DEBUG
#undef MMU_UPDATE_DEBUG_RECORD
#ifdef MMU_UPDATE_DEBUG_RECORD
/* this ain't define anywhere
page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}};
 */
#undef queue_l1_entry_update
#undef queue_l2_entry_update
/* plan 9 doesn't go overboard with this pte_t stuff. We're in the arch
 * directory here. PTEs are unsigned longs and that's that. 
 */
static void DEBUG_allow_pt_reads(void)
{
    unsigned long *pte;
    mmu_update_t update;
    int i;
    for ( i = idx-1; i >= 0; i-- )
    {
        pte = update_debug_queue[i].ptep;
        if ( pte == NULL ) continue;
        update_debug_queue[i].ptep = NULL;
        update.ptr = PADDR(pte);
        update.val = update_debug_queue[i].pteval;
        HYPERVISOR_mmu_update(&update, 1, NULL);
    }
}
static void DEBUG_disallow_pt_read(unsigned long va)
{
    ulong *pte;
    unsigned long pteval;
    /*
     * We may fault because of an already outstanding update.
     * That's okay -- it'll get fixed up in the fault handler.
     */
    mmu_update_t update;
    pte = mmuwalk(mach0->pdb, va, 0);
    update.ptr = virt_to_machine(pte);
    pteval = *(unsigned long *)pte;
    update.val = pteval & ~_PAGE_PRESENT;
    HYPERVISOR_mmu_update(&update, 1, NULL);
    update_debug_queue[idx].ptep = pte;
    update_debug_queue[idx].pteval = pteval;
}
#endif

#ifdef MMU_UPDATE_DEBUG_RECORD
#undef queue_pt_switch
#undef queue_tlb_flush
#undef queue_invlpg
#undef queue_pgd_pin
#undef queue_pgd_unpin
#undef queue_pte_pin
#undef queue_pte_unpin
#endif

#ifdef NOTYET
/*
 * MULTICALL_flush_page_update_queue:
 *   This is a version of the flush which queues as part of a multicall.
 */
void MULTICALL_flush_page_update_queue(void)
{
    unsigned int _idx;
    ilock(&update_lock);
    if ( (_idx = idx) != 0 ) 
    {
#ifdef MMU_UPDATE_DEBUG
        dp("Flushing %d entries from pt update queue\n", idx);
#endif
#ifdef MMU_UPDATE_DEBUG_RECORD
        DEBUG_allow_pt_reads();
#endif
        idx = 0;
        wmb(); /* Make sure index is cleared first to avoid double updates. */
        queue_multicall3(__HYPERVISOR_mmu_update, 
                         (unsigned long)update_queue, 
                         (unsigned long)_idx, 
                         (unsigned long)0);
    }
    iunlock(&update_lock);
}
#endif
static void __flush_page_update_queue(void)
{
    unsigned int _idx = idx;
#ifdef MMU_UPDATE_DEBUG
    dp("Flushing %d entries from pt update queue\n", idx);
#endif
#ifdef MMU_UPDATE_DEBUG_RECORD
    DEBUG_allow_pt_reads();
#endif
    idx = 0;
    wmb(); /* Make sure index is cleared first to avoid double updates. */
    if ((HYPERVISOR_mmu_update(update_queue, _idx, 0) < 0) )
        panic("Failed to execute MMU updates");
}

void _flush_page_update_queue(void)
{
    ilock(&update_lock);
    if ( idx != 0 ) __flush_page_update_queue();
    iunlock(&update_lock);
}

static void increment_index(void)
{
    idx++;
    if ((idx == QUEUE_SIZE) ) __flush_page_update_queue();
}

/* the 'val' here is a pfn with permission bits. We need to turn it into an MFN */
void queue_l1_entry_update(unsigned long *pteptr, unsigned long pval)
{
	unsigned long mval;
    ilock(&update_lock);
#ifdef MMU_UPDATE_DEBUG_RECORD
    DEBUG_disallow_pt_read((unsigned long)ptr);
#endif
	mval = xen_pa_to_ma(pval);
//	dp("ql1ue: P 0x%ulx xmfn 0x%ulx mval 0x%ulx\n", 
//				PADDR(pteptr), xen_va_to_ma(pteptr), mval);
    update_queue[idx].ptr = xen_va_to_ma(pteptr);
    update_queue[idx].val = mval;
    increment_index(); 
    iunlock(&update_lock);
}

int set_va_mfn(void *va, unsigned long mfn, unsigned long perm)
{
	unsigned long *pte;
	Mach *mach0 = (Mach *) MACHADDR;

//	dp("set_va_mfn: mach0 is %p\n", mach0);
//	dp("Try to mmuwalk ... probably will fail\n");
	pte = mmuwalk(mach0->pdb, (unsigned long) va, 2, 0);
//	dp("pte for %p is %p\n", va, pte);
	if (! pte)
		return -1;
//	dp("queue request for va %p to be 0x%ulx\n", 
//			(va), mfn<<PGSHIFT|perm);
	HYPERVISOR_update_va_mapping(((unsigned long) va)>>PGSHIFT, 
		(mfn<<PGSHIFT)|perm, UVMF_INVLPG);
//	queue_l1_entry_update(pte, pfn|perm);
//	dp("Flush update queue\n");
//	__flush_page_update_queue();
//	dp("set_va_mfn: done\n");
	return 0;
}

void queue_l2_entry_update(unsigned long *ptr, unsigned long val)
{
    ilock(&update_lock);
    update_queue[idx].ptr = xen_va_to_ma(ptr);
    update_queue[idx].val = val;
    increment_index();
    iunlock(&update_lock);
}

void queue_pt_switch(unsigned long ptr)
{;
    ilock(&update_lock);
    update_queue[idx].ptr  = xen_pa_to_ma(ptr);
    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
    update_queue[idx].val  = MMUEXT_NEW_BASEPTR;
    increment_index();
    iunlock(&update_lock);
}

void queue_tlb_flush(void)
{
    ilock(&update_lock);
    update_queue[idx].ptr  = MMU_EXTENDED_COMMAND;
    update_queue[idx].val  = MMUEXT_TLB_FLUSH;
    increment_index();
    iunlock(&update_lock);
}

void queue_invlpg(unsigned long ptr)
{
    ilock(&update_lock);
    update_queue[idx].ptr  = MMU_EXTENDED_COMMAND;
    update_queue[idx].ptr |= PPN(ptr);
    update_queue[idx].val  = MMUEXT_INVLPG;
    increment_index();
    iunlock(&update_lock);
}

void queue_pgd_pin(unsigned long *ptr)
{
    ilock(&update_lock);
    update_queue[idx].ptr  = xen_va_to_ma(ptr);
    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
    update_queue[idx].val  = MMUEXT_PIN_L2_TABLE;
    increment_index();
    iunlock(&update_lock);
}

void queue_pgd_unpin(unsigned long *ptr)
{
    ilock(&update_lock);
    update_queue[idx].ptr  = xen_va_to_ma(ptr);
    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
    update_queue[idx].val  = MMUEXT_UNPIN_TABLE;
    increment_index();
    iunlock(&update_lock);
}

/* these two should probably take a VA, not a PA, but they're not even used! */
void queue_pte_pin(unsigned long ptr)
{
    ilock(&update_lock);
    update_queue[idx].ptr  = xen_pa_to_ma(ptr);
    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
    update_queue[idx].val  = MMUEXT_PIN_L1_TABLE;
    increment_index();
    iunlock(&update_lock);
}

void queue_pte_unpin(unsigned long ptr)
{
    ilock(&update_lock);
    update_queue[idx].ptr  = xen_pa_to_ma(ptr);
    update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
    update_queue[idx].val  = MMUEXT_UNPIN_TABLE;
    increment_index();
    iunlock(&update_lock);
}

void queue_set_ldt(unsigned long ptr, unsigned long len)
{
    ilock(&update_lock);
    update_queue[idx].ptr  = MMU_EXTENDED_COMMAND | ptr;
    update_queue[idx].val  = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT);
    increment_index();
    iunlock(&update_lock);
}

void queue_machphys_update(unsigned long mfn, unsigned long pfn)
{
    ilock(&update_lock);
    update_queue[idx].ptr = (mfn << PGSHIFT) | MMU_MACHPHYS_UPDATE;
    update_queue[idx].val = pfn;
    increment_index();
    iunlock(&update_lock);
}

#ifdef CONFIG_XEN_PHYSDEV_ACCESS

unsigned long allocate_empty_lowmem_region(unsigned long pages)
{
    pgd_t         *pgd; 
    pmd_t         *pmd;
    pte_t         *pte;
    unsigned long *pfn_array;
    unsigned long  vstart;
    unsigned long  i;
    int            ret;
    unsigned int   order = get_order(pages*PAGE_SIZE);
    dom_mem_op_t   dom_mem_op;

    vstart = __get_free_pages(GFP_KERNEL, order);
    if ( vstart == 0 )
        return 0UL;

    pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
    if ( pfn_array == NULL )
        BUG();

    for ( i = 0; i < (1<<order); i++ )
    {
        pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
        pmd = pmd_offset(pgdtart + (i*PAGE_SIZE)));
        pte = pte_offset(pmdtart + (i*PAGE_SIZE))); 
        pfn_array[i] = pte->pte_low >> PAGE_SHIFT;
        queue_l1_entry_update(pte, 0);
        phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = 0xdeadbeef;
    }

    flush_page_update_queue();

    dom_mem_op.op = MEMOP_RESERVATION_DECREASE;
    dom_mem_op.u.decrease.size  = 1<<order;
    dom_mem_op.u.decrease.pages = pfn_array;
    if ( (ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != (1<<order) )
    {
        printk(KERN_WARNING "Unable to reduce memory reservation (%d)\n", ret);
        BUG();
    }

    vfree(pfn_array);

    return vstart;
}

void deallocate_lowmem_region(unsigned long vstart)
{
    pgd_t         *pgd; 
    pmd_t         *pmd;
    pte_t         *pte;
    unsigned long *pfn_array;
    unsigned long  i;
    int            ret;
    unsigned int   order = get_order(pages*PAGE_SIZE);
    dom_mem_op_t   dom_mem_op;

    pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
    if ( pfn_array == NULL )
        BUG();

    dom_mem_op.op = MEMOP_RESERVATION_INCREASE;
    dom_mem_op.u.increase.size  = 1<<order;
    dom_mem_op.u.increase.pages = pfn_array;
    if ( (ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != (1<<order) )
    {
        dpk(KERN_WARNING "Unable to increase memory reservation (%d)\n",
               ret);
        BUG();
    }

    for ( i = 0; i < (1<<order); i++ )
    {
        pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
        pmd = pmd_offset(pgdtart + (i*PAGE_SIZE)));
        pte = pte_offset(pmdtart + (i*PAGE_SIZE)));
        queue_l1_entry_update(pte, (pfn_array[i]<<PAGE_SHIFT)|__PAGE_KERNEL);
        queue_machphys_update(pfn_array[i]tart)>>PAGE_SHIFT);
        phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = pfn_array[i];
    }

    flush_page_update_queue();

    vfree(pfn_array);

    free_pages(vstart, order);
}

#endif /* CONFIG_XEN_PHYSDEV_ACCESS */

/* startup stuff, it is here because we don't want to reference the mfn outside of this file */
extended_start_info_t xen_start_info;
volatile shared_info_t *HYPERVISOR_shared_info = 0;
unsigned long *mfn;


void
xen_mm_startup(void) {
	/* start_info is at first page. */
	xen_start_info = *((extended_start_info_t *) KZERO);
	mfn = (unsigned long *) xen_start_info.mfn_list;
}

void xen_mm_shared_info(void) {
#ifdef NOT
	int i, j;
	volatile unsigned char *cp;
#endif
	HYPERVISOR_shared_info = (shared_info_t *)0x80002000;
	/* set by xc_plan9_build */
#ifdef NOT
/**/
	set_va_mfn(HYPERVISOR_shared_info, xen_start_info.shared_info, 
			PTEWRITE|PTEVALID);
/**/
#endif
#ifdef NOT
	cp = (unsigned char *)HYPERVISOR_shared_info;

	for(i = 0; i < 4096; i += 16) {
		dp("0x%x: ", i);
		for(j = 0; j < 16; j++) {
			volatile unsigned char dpv;
//			if  (((i+j)<8) && ((i+j)>0))
//				cp[i+j] = 0;
			if (cp[i+j])
			dp("%02x ", cp[i+j]);
			dpv = cp[i+j];
			cp[i+j] = dpv;

		}
		dp("\n");
	}
#endif
}

static unsigned long ma_to_pa_map[1<<20];

extern void xen_meminit(unsigned long, unsigned long, unsigned long, unsigned long);

void xen_mm_meminit(void) {
	int i;
	xen_meminit(
	xen_start_info.pt_base, xen_start_info.nr_pt_frames, 
	xen_start_info.mfn_list, xen_start_info.nr_pages);
	for(i = 0; i < xen_start_info.nr_pages; i++)
		ma_to_pa_map[mfn[i]] = i;
}


void
xen_mm_info(void){
	extended_start_info_t *x = &xen_start_info;
	dp("xen_start_info\n");
	dp("  nr_pages %uld\n", x->nr_pages);
         dp("  shared_info 0x%ulx\n", x->shared_info);
	dp("  flags 0x%ux\n", x->flags);
	dp("  pt_base 0x%ulx\n", x->pt_base);
	dp("  nr_pt_frames %uld\n", x->nr_pt_frames);
	dp("  mfn_list 0x%ulx\n", x->mfn_list);
	dp("  shared info %p\n", HYPERVISOR_shared_info);
	dp("  mfn %p\n", mfn);
	dp("  mfn[0] 0x%ulx\n", mfn[0]);
	
}

/* note that because of the Plan 9 KADDR/PADDR scheme, this function actually
  * works fine for BOTH kernel virtual address and physical addresses
  */

/* this one should get the frame, but you need a VA to MA function, idiot! */
extern unsigned long *mfn;
unsigned long xen_mm_mfn(void *va) {
	unsigned long pmfn;

	pmfn = mfn[PADDR(va)>>PGSHIFT];
//	LOG(dp("PMFN: 0x%ulx\n", pmfn));
	pmfn <<= PGSHIFT;
//	LOG(dp("PMFN: return 0x%lx\n", pmfn));
	return pmfn;
}

/* well, this sucks, but you can't really build a table as things can change
  * out from under you
  */

unsigned long
xen_ma_to_pa(unsigned long ma) {
	unsigned long offset = ((unsigned long)ma) & (BY2PG-1);
	unsigned long pfn, pa;

	ma >>= PGSHIFT;
	pfn = ma_to_pa_map[ma];
	pa = pfn << PGSHIFT;
	pa |= offset;
//	dp("xen_ma_to_pa: ma 0x%ulx, pa 0x%ulx\n", ma, pa);
	return pa;

}
unsigned long
xen_va_to_ma(void *va) {
	unsigned long frame = xen_mm_mfn(va);
	unsigned long offset = ((unsigned long)va) & (BY2PG -1);
	unsigned long retval = frame | offset;
	return retval;
}

void
xen_mm_readonly(void *vva) {
	unsigned long va = (unsigned long) vva;
	/*
	dp("xen_readonly: 0x%ulx set to 0x%ulx flags 0x%x\n", ((unsigned long)va)>>PGSHIFT,
		(xen_mm_mfn(vva))|PTEVALID|PTERONLY, UVMF_INVLPG);
	*/
	HYPERVISOR_update_va_mapping(((unsigned long)va)>>PGSHIFT,
		(xen_mm_mfn(vva))|PTEVALID|PTERONLY, UVMF_INVLPG);
}



void
xen_mm_readwrite(void *vva) {
	unsigned long va = (unsigned long) vva;
/*
	dp("xen_readwrite: 0x%ulx set to 0x%ulx flags 0x%x\n", ((unsigned long)va)>>PGSHIFT,
		(xen_mm_mfn(vva))|PTEVALID|PTEWRITE, UVMF_INVLPG);
 */
	HYPERVISOR_update_va_mapping(((unsigned long)va)>>PGSHIFT,
		(xen_mm_mfn(vva))|PTEVALID|PTEWRITE, UVMF_INVLPG);
}

void
xen_mm_setl2(void *l2, unsigned long *pteptr) {
		LOG(dp("   quee l2 entry update for 0x%ulx\n", pteptr));
		LOG(dp("0x%ulx set to 0x%ulx flags 0x%x\n", ((unsigned long)l2)>>PGSHIFT,
			xen_mm_mfn(l2)|PTEVALID|PTEWRITE, UVMF_INVLPG));
		HYPERVISOR_update_va_mapping(((unsigned long)l2)>>PGSHIFT,
			xen_mm_mfn(l2)|PTEVALID|PTERONLY, UVMF_INVLPG);
		queue_l2_entry_update(pteptr,
			xen_mm_mfn(l2)|PTEUSER|PTEWRITE|PTEVALID);
		/* have to do this here! */
		/* could be fancy and do tricks but won't. */
		_flush_page_update_queue();
}


int
xen_mm_decrease_reservation(unsigned long *pfn_array, int npfn) {
    int ret;
    ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
			pfn_array, npfn, 0 );
    if (ret < 0) 
    {
        dp( "Unable to reduce memory reservation (%d)\n", ret);
    }

    return ret;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.