Plan 9 from Bell Labs’s /usr/web/sources/contrib/quanstro/root/sys/src/fs/pc/etherm10g.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*
 *	myricom 10 gbit ethernet
 *	© 2007—9 erik quanstrom, coraid, inc.
 */

#include "all.h"
#include "io.h"
#include "../ip/ip.h"
#include "etherif.h"
#include "mem.h"

#undef		MB
#define		K	* 1024
#define		MB	* 1024 K

#define	dprint(...)	if(debug) print(__VA_ARGS__); else {}
#define	pcicapdbg(...)
#define malign(n)	ialloc(n, 4 K)
#define if64(...)		(sizeof(uintptr) == 8? (__VA_ARGS__): 0)
#define pbit32h(x)	if64(pbit32((uvlong)x >> 32))

#include "etherm10g2k.i"
#include "etherm10g4k.i"

enum {
	Epromsz	= 256,
	Maxslots	= 1024,
	Noconf	= 0xffffffff,
	Fwoffset	= 1 MB,
	Hdroff	= 0x00003c,
	Cmdoff	= 0xf80000,		/* offset of command port */
	Fwsubmt	= 0xfc0000,		/* offset of firmware submission command port */
	Rdmaoff	= 0xfc01c0,		/* offset of rdma command port */
};

enum {
	CZero,
	Creset,
	Cversion,

	CSintrqdma,		/* issue these before Cetherup */
	CSbigsz,			/* in bytes bigsize = 2^n */
	CSsmallsz,

	CGsendoff,
	CGsmallrxoff,
	CGbigrxoff,
	CGirqackoff,
	CGirqdeassoff,
	CGsendrgsz,
	CGrxrgsz,

	CSintrqsz,		/* 2^n */
	Cetherup,		/* above paramters + mtu/mac addr must be set first */
	Cetherdn,

	CSmtu,			/* below may be issued live */
	CGcoaloff,		/* in µs */
	CSstatsrate,		/* in µs */
	CSstatsdma,

	Cpromisc,
	Cnopromisc,
	CSmac,

	Cenablefc,
	Cdisablefc,

	Cdmatest,

	Cenableallmc,
	Cdisableallmc,

	CSjoinmc,
	CSleavemc,
	Cleaveallmc,

	CSstatsdma2,
	Cdmatestu,
	Custatus,		/* unaligned status */
};

typedef union {
	uint	i[2];
	uchar	c[8];
} Cmd;

typedef struct {
	ushort	cksum;
	ushort	len;
} Slot;

enum {
	SFsmall	= 1,
	SFfirst	= 2,
	SFalign	= 4,
	SFnotso	= 16,
};

typedef struct {
	uint	high;
	uint	low;
	ushort	hdroff;
	ushort	len;
	union{
		struct {
			uchar	pad;
			uchar	nrdma;
			uchar	chkoff;
			uchar	flags;
		};
		uint	fword;	/* ha! */
	};
} Send;

typedef struct {
	QLock;
	Send	*lanai;		/* tx ring (cksum + len in lanai memory) */
	Send	*host;		/* tx ring (data in our memory). */
	Msgbuf	**bring;
	int	size;		/* how big are the buffers in the z8's memory */
	uint	segsz;
	uint	n;		/* txslots */
	uint	m;		/* mask */
	uint	i;		/* number of segments (not frames) queued */
	uint	cnt;		/* number of segments sent by the card */
	uint	starve;
	uint	starvei;		/* starve pt */
	uint	submit;

	uint	npkt;
	vlong	nbytes;
} Tx;

enum {
	Pstarve	= 1<<0,
};

typedef struct {
	Lock;
	Msgbuf	*head;
	uint	size;		/* buffer size of each block */
	uint	n;		/* n free buffers. */
	uint	cnt;
	uint	flags;
} Bpool;

typedef struct {
	Bpool	*pool;		/* free buffers */
	uint	*lanai;		/* rx ring; we have no perminant host shadow. */
	Msgbuf	**host;		/* called "info" in myricom driver */
	uint	m;
	uint	n;		/* rxslots */
	uint	i;
	uint	cnt;		/* number of buffers allocated (lifetime). */
} Rx;

/* dma mapped.  unix network byte order. */
typedef struct {
	uchar	unused[4];
	uchar	dpause[4];
	uchar	dufilt[4];
	uchar	dcrc32[4];
	uchar	dphy[4];
	uchar	dmcast[4];
	uchar	txcnt[4];
	uchar	linkstat[4];
	uchar	dlinkef[4];
	uchar	derror[4];
	uchar	drunt[4];
	uchar	doverrun[4];
	uchar	dnosm[4];
	uchar	dnobg[4];
	uchar	nrdma[4];
	uchar	txstopped;
	uchar	down;
	uchar	updated;
	uchar	valid;
} Stats;

enum {
	Detached,
	Attached,
	Runed,
};

typedef struct {
	uint	*entry;
	uintptr	busaddr;
	uint	m;
	uint	n;
	uint	i;
} Done;

typedef struct Ctlr Ctlr;
typedef struct Ctlr {
	QLock;
	int	state;
	int	kprocs;
	uintptr	port;
	Pcidev*	pcidev;
	Ctlr*	next;
	int	active;

	uchar	ra[Easize];

	int	ramsz;
	uchar	*ram;

	uint	*irqack;
	uint	*irqdeass;
	uint	*coal;

	char	eprom[Epromsz];
	uint	serial;			/* unit serial number */

	QLock	cmdl;
	Cmd	*cmd;			/* address of command return */
	uintptr	cprt;			/* bus address of command */

	uintptr	boot;			/* boot address */

	Done	done;
	Tx	tx;
	Rx	sm;
	Rx	bg;
	Stats	*stats;
	uintptr	statsprt;
	uint	speed[2];

	Rendez	rxrendez;
	Rendez	txrendez;

	int	msi;
	uint	linkstat;
	uint	nrdma;

	char	rname[12];
	char	tname[12];
} Ctlr;

enum {
	PciCapPMG	= 0x01,		/* power management */
	PciCapAGP	= 0x02,
	PciCapVPD	= 0x03,		/* vital product data */
	PciCapSID	= 0x04,		/* slot id */
	PciCapMSI	= 0x05,
	PciCapCHS	= 0x06,		/* compact pci hot swap */
	PciCapPCIX	= 0x07,
	PciCapHTC	= 0x08,		/* hypertransport irq conf */
	PciCapVND	= 0x09,		/* vendor specific information */
	PciCapPCIe	= 0x10,
	PciCapMSIX	= 0x11,
	PciCapSATA	= 0x12,
	PciCapHSW	= 0x0C,		/* hot swap */
};

enum {
	PcieAERC	= 1,
	PcieVC,
	PcieSNC,
	PciePBC,
};

enum {
	AercCCR	= 0x18,		/* control register */
};

enum {
	PcieCTL		= 8,
	PcieLCR		= 12,
	PcieMRD	= 0x7000,	/* maximum read size */
};

static	int 	debug		= 0;
static	char	Etimeout[]	= "timeout";
static	char	Enomem[]	= "no memory";
static	char	Enonexist[]	= "controler lost";
static	char	Ebadarg[]	= "bad argument";
static	Bpool	smpool 	= {.size	= 128, };
static	Bpool	bgpool	= {.size = 9000,};
static	Ctlr 	ctlrs[3];
static	int	nctlr;

static int
pcicap(Pcidev *p, int cap)
{
	int i, c, off;

	pcicapdbg("pcicap: %x:%d\n", p->vid, p->did);
	off = 0x34;	/* 0x14 for cardbus. */
	for(i = 48; i--;){
		pcicapdbg("\t" "loop %x\n", off);
		off = pcicfgr8(p, off);
		pcicapdbg("\t" "pcicfgr8 %x\n", off);
		if(off < 0x40)
			break;
		off &= ~3;
		c = pcicfgr8(p, off);
		pcicapdbg("\t" "pcicfgr8 %x\n", c);
		if(c == 0xff)
			break;
		if(c == cap)
			return off;
		off++;
	}
	return 0;
}

/*
 * this function doesn't work because pcicgr32 doesn't have access
 * to the pcie extended configuration space.
 */
static int
pciecap(Pcidev *p, int cap)
{
	uint off, i;

	off = 0x100;
	while(((i = pcicfgr32(p, off))&0xffff) != cap){
		off = i>>20;
		print("pciecap offset = %ud\n",  off);
		if(off < 0x100 || off >= 4 K - 1)
			return 0;
	}
	print("pciecap found = %ud\n",  off);
	return off;
}

static int
setpcie(Pcidev *p)
{
	int off;

	/* set 4k writes. */
	off = pcicap(p, PciCapPCIe);
	if(off < 64)
		return -1;
	off += PcieCTL;
	pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12);
	return 0;
}

static void
namelock(QLock *q, char *fmt, ...)
{
	va_list arg;

	va_start(arg, fmt);
	vseprint(q->namebuf, q->namebuf+sizeof q->namebuf, fmt, arg);
	va_end(arg);
	q->name = q->namebuf;
}

static int
whichfw(Pcidev *p)
{
	char *s;
	int i, off, lanes, ecrc;
	uint cap;

	/* check the number of configured lanes */
	off = pcicap(p, PciCapPCIe);
	if(off < 64)
		return -1;
	off += PcieLCR;
	cap = pcicfgr16(p, off);
	lanes = cap>>4 & 0x3f;

	/* check AERC register.  we need it on */
	off = pciecap(p, PcieAERC);
//	print("%d offset\n", off);
	cap = 0;
	if(off != 0){
		off += AercCCR;
		cap = pcicfgr32(p, off);
		print("%ud cap\n", cap);
	}
	ecrc = cap>>4 & 0xf;
	/* if we don't like the aerc, kick it here */

	print("m10g %d lanes; ecrc=%d; ", lanes, ecrc);
	if(s = getconf("myriforce")){
		i = strtoul(s, 0, 0);
		if(i != 4 K || i != 2 K)
			i = 2 K;
		print("fw=%d [forced]\n", i);
		return i;
	}
	if(lanes <= 4){
		print("fw = 4096 [lanes]\n");
		return 4 K;
	}
	if(ecrc & 10){
		print("fw = 4096 [ecrc set]\n");
		return 4K;
	}
	print("fw = 4096 [default]\n");
	return 4 K;
}

static int
parseeprom(Ctlr *c)
{
	int i, j, k, l, bits;
	char *s;

	dprint("m10g eprom:\n");
	s = c->eprom;
	bits = 3;
	for(i = 0; s[i] && i < Epromsz; i++){
		l = strlen(s + i);
		dprint("\t%s\n", s + i);
		if(strncmp(s + i, "MAC=", 4) == 0 && l == 21){
			bits ^= 1;
			j = i + 4;
			for(k = 0; k < 6; k++)
				c->ra[k] = strtoul(s + j + 3*k, 0, 16);
		}else if(strncmp(s + i, "SN=", 3) == 0){
			bits ^= 2;
			c->serial = strtoul(s + i + 3, 0, 0);
		}
		i += l;
	}
	if(bits)
		return -1;
	return 0;
}

static ushort
pbit16(ushort i)
{
	ushort j;
	uchar *p;

	p = (uchar*)&j;
	p[1] = i;
	p[0] = i>>8;
	return j;
}

static ushort
gbit16(uchar i[2])
{
	ushort j;

	j = i[1];
	j |= i[0]<<8;
	return j;
}

static uint
pbit32(uint i)
{
	uint j;
	uchar *p;

	p = (uchar*)&j;
	p[3] = i;
	p[2] = i>>8;
	p[1] = i>>16;
	p[0] = i>>24;
	return j;
}

static uint
gbit32(uchar i[4])
{
	uint j;

	j = i[3];
	j |= i[2]<<8;
	j |= i[1]<<16;
	j |= i[0]<<24;
	return j;
}

static void
prepcmd(uint *cmd, int i)
{
	while(i-- > 0)
		cmd[i] = pbit32(cmd[i]);
}

/*
 * the command looks like this (int 32bit integers)
 * cmd type
 * data0 (or, addr low; endian backwards)
 * data1 (addr high)
 * data2
 * response (high)
 * response (low)
 * 40 byte = 5 int pad.
 */

static uint
cmd(Ctlr *c, int type, int sz, uvlong data)
{
	uint buf[16], i;
	Cmd *cmd;

	qlock(&c->cmdl);
	cmd = c->cmd;
	cmd->i[1] = Noconf;
	memset(buf, 0, sizeof buf);
	buf[0] = type;
	buf[1] = data;
	buf[2] = data>>32;
	buf[3] = sz;
	buf[4] = (uvlong)c->cprt>>32;
	buf[5] = c->cprt;
	prepcmd(buf, 6);
	coherence();
	memmove(c->ram + Cmdoff, buf, sizeof buf);

	for(i = 0; i < 15; i++){
		if(cmd->i[1] != Noconf){
			i = gbit32(cmd->c);
			qunlock(&c->cmdl);
			if(cmd->i[1] != 0)
				dprint("[%ux]", i);
			return i;
		}
		delay(1);
	}
	qunlock(&c->cmdl);
	panic("m10g: cmd timeout [%ux %ux] cmd=%d\n", cmd->i[0], cmd->i[1], type);
	return ~0;
}

static uint
maccmd(Ctlr *c, int type, uchar *m)
{
	uint buf[16], i;
	Cmd * cmd;

	qlock(&c->cmdl);
	cmd = c->cmd;
	cmd->i[1] = Noconf;
	memset(buf, 0, sizeof buf);
	buf[0] = type;
	buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3];
	buf[2] = m[4]<<8 | m[5];
	buf[4] = (uvlong)c->cprt>>32;
	buf[5] = c->cprt;
	prepcmd(buf, 6);
	coherence();
	memmove(c->ram + Cmdoff, buf, sizeof buf);

	for(i = 0; i < 15; i++){
		if(cmd->i[1] != Noconf){
			i = gbit32(cmd->c);
			qunlock(&c->cmdl);
			if(cmd->i[1] != 0)
				dprint("[%ux]", i);
			return i;
		}
		delay(1);
	}
	qunlock(&c->cmdl);
	print("m10g: maccmd timeout [%ux %ux] cmd=%d\n", cmd->i[0], cmd->i[1], type);
	panic(Etimeout);
	return ~0;
}

/* remove this garbage after testing */
enum{
	DMAread	= 0x10000,
	DMAwrite	= 0x1
};

static uint
dmatestcmd(Ctlr *c, int type, uvlong addr, int len)
{
	uint buf[16], i;

	memset(buf, 0, sizeof buf);
	memset(c->cmd, Noconf, sizeof *c->cmd);
	buf[0] = Cdmatest;
	buf[1] = addr;
	buf[2] = addr>>32;
	buf[3] = len*type;
	buf[4] = (uvlong)c->cprt>>32;
	buf[5] = c->cprt;
	prepcmd(buf, 6);
	coherence();
	memmove(c->ram + Cmdoff, buf, sizeof buf);

	for(i = 0; i < 15; i++){
		if(c->cmd->i[1] != Noconf){
			i = gbit32(c->cmd->c);
			if(i == 0)
				return 0;
			return i;
		}
		delay(5);
	}
	panic(Etimeout);
	return ~0;
}

static uint
rdmacmd(Ctlr *c, int on)
{
	uint buf[16], i;

	memset(buf, 0, sizeof buf);
	c->cmd->i[0] = 0;
	coherence();
	buf[0] = (uvlong)c->cprt>>32;
	buf[1] = c->cprt;
	buf[2] = Noconf;
	buf[3] = (uvlong)c->cprt>>32;
	buf[4] = c->cprt;
	buf[5] = on;
	prepcmd(buf, 6);
	memmove(c->ram + Rdmaoff, buf, sizeof buf);

	for(i = 0; i < 20; i++){
		if(c->cmd->i[0] == Noconf)
			return gbit32(c->cmd->c);
		delay(1);
	}
	panic(Etimeout);
	return ~0;
}

static int
loadfw(Ctlr *c, int *align)
{
	uint *f, *s, sz;
	int i;

	if((*align = whichfw(c->pcidev)) == 4 K){
		f = (uint*)fw4k;
		sz = sizeof fw4k;
	}else{
		f = (uint*)fw2k;
		sz = sizeof fw2k;
	}

	s = (uint*)(c->ram + Fwoffset);
	for(i = 0; i < sz/4; i++)
		s[i] = f[i];
	return sz&~3;
}

static int
bootfw(Ctlr *c)
{
	int i, sz, align;
	uint buf[16];
	Cmd* cmd;

	if((sz = loadfw(c, &align)) == 0)
		return 0;
	dprint("m10g: bootfw %d bytes ... ", sz);
	cmd = c->cmd;

	memset(buf, 0, sizeof buf);
	c->cmd->i[0] = 0;
	coherence();
	buf[0] = (uvlong)c->cprt>>32;	/* upper 32 bits of dma target address */
	buf[1] = c->cprt;			/* lower */
	buf[2] = Noconf;			/* writeback */
	buf[3] = Fwoffset + 8,
	buf[4] = sz - 8;
	buf[5] = 8;
	buf[6] = 0;
	prepcmd(buf, 7);
	coherence();
	memmove(c->ram + Fwsubmt, buf, sizeof buf);

	for(i = 0; i < 20; i++){
		if(cmd->i[0] == Noconf)
			break;
		delay(1);
	}
	dprint("[%ux %ux]", gbit32(cmd->c), gbit32(cmd->c + 4));
	if(i == 20){
		print("m10g: cannot load fw\n");
		return -1;
	}
	dprint("\n");
	c->tx.segsz = align;
	return 0;
}

static int
kickthebaby(Pcidev *p, Ctlr *c)
{
	/* don't kick the baby! */
	uint code;

	pcicfgw8(p, 0x10 + c->boot, 0x3);
	pcicfgw32(p, 0x18 + c->boot, 0xfffffff0);
	code = pcicfgr32(p, 0x14 + c->boot);

	dprint("m10g: reboot status = %ux\n", code);
	if(code != 0xfffffff0)
		return -1;
	return 0;
}

typedef struct{
	uchar	len[4];
	uchar	type[4];
	char	version[128];
	uchar	globals[4];
	uchar	ramsz[4];
	uchar	specs[4];
	uchar	specssz[4];
	uchar	idx;
	uchar	norabbit;
	uchar	unaligntlp;
	uchar	pcilinkalg;
	uchar	cntaddr[4];
	uchar	cbinfo[4];
	uchar	handoid[2];
	uchar	handocap[2];
	uchar	msixtab[4];
	uchar	bss[4];
	uchar	features[4];
	uchar	eehdr[4];
} Fwhdr;

enum{
	Tmx	= 0x4d582020,
	Tpcie	= 0x70636965,
	Teth	= 0x45544820,
	Tmcp0	= 0x4d435030,
};

static char*
fwtype(uint type)
{
	switch(type){
	case Tmx:
		return "mx";
	case Tpcie:
		return "PCIe";
	case Teth:
		return "eth";
	case Tmcp0:
		return "mcp0";
	}
	return "*GOK*";
}

static int
chkfw(Ctlr *c)
{
	uint off, type;
	Fwhdr *h;

	off = gbit32(c->ram + Hdroff);
	dprint("m10g: firmware %ux\n", off);
	if(off == 0 || off&3 || off + sizeof *h >= c->ramsz){
		print("m10g: bad firmware %#ux\n", off);
		return -1;
	}
	h = (Fwhdr*)(c->ram + off);
	type = gbit32(h->type);
	dprint("\t" "type	%s\n", fwtype(type));
	dprint("\t" "vers	%s\n", h->version);
	dprint("\t" "ramsz	%ux\n", gbit32(h->ramsz));
	if(type != Teth){
		print("m10g: bad card type %s\n", fwtype(type));
		return -1;
	}

	return bootfw(c) || rdmacmd(c, 0);
}

static int
reset(Ether *e, Ctlr *c)
{
	uint i, sz;

	chkfw(c);
	cmd(c, Creset, 0, 0);

	cmd(c, CSintrqsz, 0, c->done.n*sizeof *c->done.entry);
	cmd(c, CSintrqdma, 0, c->done.busaddr);
	c->irqack = (uint*)(c->ram + cmd(c, CGirqackoff, 0, 0));
	c->irqdeass = (uint*)(c->ram + cmd(c, CGirqdeassoff, 0, 0));
	c->coal = (uint*)(c->ram + cmd(c, CGcoaloff, 0, 0));
	*c->coal = pbit32(25);

	dprint("dma stats:\n");
	rdmacmd(c, 1);
	sz = c->tx.segsz;
	i = dmatestcmd(c, DMAread, c->done.busaddr, sz);
	print("\t" "read: %ud MB/s\n", ((i>>16)*sz*2)/(i&0xffff));
	i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz);
	print("\t" "write: %ud MB/s\n", ((i>>16)*sz*2)/(i&0xffff));
	i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz);
	print("\t" "r/w: %ud MB/s\n", ((i>>16)*sz*2*2)/(i&0xffff));
	memset(c->done.entry, 0, c->done.n*sizeof *c->done.entry);

	maccmd(c, CSmac, c->ra);
	cmd(c, Cenablefc, 0, 0);
	if(e->ifc.maxmtu > 9000)
		e->ifc.maxmtu = 9000;
	cmd(c, CSmtu, 0, e->ifc.maxmtu);

	return 0;
}

static int
setmem(Pcidev *p, Ctlr *c)
{
	uint i, raddr;
	Done *d;
	void *mem;

	c->tx.segsz = 2048;
	c->ramsz = 2 MB - (2*48 K + 32 K) - 0x100;
	if(c->ramsz > p->mem[0].size)
		return -1;

	raddr = p->mem[0].bar & ~0x0F;
	mem = (void*)vmap(raddr, p->mem[0].size);
	if(mem == nil){
		print("m10g: can't map %p\n", p->mem[0].bar);
		return -1;
	}
	c->port = raddr;
	c->ram = mem;
	c->cmd = malign(sizeof *c->cmd);
	c->cprt = PCIWADDR(c->cmd);

	d = &c->done;
	d->n = Maxslots;
	d->m = d->n - 1;
	i = d->n*sizeof *d->entry;
	d->entry = malign(i);
	memset(d->entry, 0, i);
	d->busaddr = PCIWADDR(d->entry);

	c->stats = malign(sizeof *c->stats);
	memset(c->stats, 0, sizeof *c->stats);
	c->statsprt = PCIWADDR(c->stats);

	memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz - 2);
	return setpcie(p) || parseeprom(c);
}

static Rx*
whichrx(Ctlr *c, int sz)
{
	if(sz <= smpool.size)
		return &c->sm;
	return &c->bg;
}

static void
pkick(Bpool*)
{
	int i;

	for(i = 0; i < nctlr; i++)
		wakeup(&ctlrs[i].rxrendez);
}

static Msgbuf*
balloc(Rx* rx)
{
	Msgbuf *m;

	if((m = rx->pool->head) != nil){
		rx->pool->head = m->next;
		m->next = nil;
		rx->pool->n--;
		m->flags &= ~FREE;
	}
	return m;
}

static void
smbfree(Msgbuf *m)
{
	Bpool *p;

	m->data = (uchar*)PGROUND((uintptr)m->xdata);
	m->count = 0;
	m->flags = FREE;
	p = &smpool;
	ilock(p);
	m->next = p->head;
	p->head = m;
	p->n++;
	p->cnt++;
	if(p->flags & Pstarve && p->n > 16)
		pkick(p);
	iunlock(p);
}

static void
bgbfree(Msgbuf *m)
{
	Bpool *p;

	m->data = (uchar*)PGROUND((uintptr)m->xdata);
	m->count = 0;
	m->flags = FREE;
	p = &bgpool;
	ilock(p);
	m->next = p->head;
	p->head = m;
	p->n++;
	p->cnt++;
	if(p->flags & Pstarve && p->n > 16)
		pkick(p);
	iunlock(p);
}

extern void sfence(void);

/*
 * this is highly optimized to reduce bus cycles with
 * w/c memory while respecting the lanai z model a's
 * limit of 32-bytes writes > 32 bytes must be handled
 * by card f/w.  partial writes are also handled by f/w.
 */

static void
replenish(Rx *rx)
{
	uint buf[16], i, idx, e, f;
	Bpool *p;
	Msgbuf *m;

	p = rx->pool;
	e = (rx->i - rx->cnt) & ~7;
	e += rx->n;
	if(e < 16)
		return;
	ilock(rx->pool);
	while(p->n >= 8 && e){
		idx = rx->cnt & rx->m;
		for(i = 0; i < 8; i++){
			m = balloc(rx);
			buf[i*2 + 0] = pbit32h(PCIWADDR(m->data));
			buf[i*2 + 1] = pbit32(PCIWADDR(m->data));
			rx->host[idx + i] = m;
		}
		f = buf[1];
		buf[1] = ~0;
		memmove(rx->lanai + 2*idx, buf, sizeof buf / 2);
		sfence();
		memmove(rx->lanai + 2*(idx + 4), buf + 8, sizeof buf / 2);
		rx->lanai[2*idx + 1] = f;
		sfence();
		rx->cnt += 8;
		e -= 8;
		p->flags &= ~Pstarve;
	}
	if(e){
		if(p->n > 7 + 1)
			print("m10g: should panic? pool->n = %d\n", p->n);
		if(e > rx->n/2)
			p->flags |= Pstarve;
	}
	iunlock(rx->pool);
}

static int
nextpow(int j)
{
	int i;

	for(i = 0; j > 1<<i; i++)
		;
	return 1<<i;
}

static void*
emalign(int sz)
{
	void *v;

	v = malign(sz);
	if(v == 0)
		panic(Enomem);
	memset(v, 0, sz);
	return v;
}

static void
open0(Ctlr *c)
{
	int i, sz, entries;
	Msgbuf *m;

	entries = cmd(c, CGsendrgsz, 0, 0)/sizeof *c->tx.lanai;
	c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0, 0));
	c->tx.host = emalign(entries*sizeof *c->tx.host);
	c->tx.bring = emalign(entries*sizeof *c->tx.bring);
	c->tx.n = entries;
	c->tx.m = entries - 1;

	entries = cmd(c, CGrxrgsz, 0, 0)/8;
	c->sm.pool = &smpool;
	cmd(c, CSsmallsz, 0, c->sm.pool->size);
	c->sm.lanai = (uint*)(c->ram + cmd(c, CGsmallrxoff, 0, 0));
	c->sm.n = entries;
	c->sm.m = entries - 1;
	c->sm.host = emalign(entries*sizeof *c->sm.host);

	c->bg.pool = &bgpool;
	c->bg.pool->size =  nextpow( /*e->maxmtu*/9000 + 2);	/* 2 byte alignment pad */
	cmd(c, CSbigsz, 0, c->bg.pool->size);
	c->bg.lanai = (uint*)(c->ram + cmd(c, CGbigrxoff, 0, 0));
	c->bg.n = entries;
	c->bg.m = entries - 1;
	c->bg.host = emalign(entries*sizeof *c->bg.host);

	sz = c->sm.pool->size + BY2PG;
	for(i = 0; i < c->sm.n; i++){
		m = mballoc(sz, 0, Mbeth10gbesm);
		m->free = smbfree;
		mbfree(m);
	}
	mballocpool(c->bg.n, c->bg.pool->size, BY2PG, Mbeth10gbebg, bgbfree);

	cmd(c, CSstatsdma2, sizeof *c->stats, c->statsprt);
	c->linkstat = ~0;
	c->nrdma = 15;

	cmd(c, Cetherup, 0, 0);
}

static Msgbuf*
nextbuf(Ctlr *c)
{
	uint i;
	ushort l;
	Slot *s;
	Done *d;
	Msgbuf *m;
	Rx *rx;

	d = &c->done;
	i = d->i&d->m;
	s = (Slot*)(d->entry + i);
	l = s->len;
	if(l == 0)
		return 0;
//	k = s->cksum;
	s->len = 0;
	d->i++;
	l = gbit16((uchar*)&l);
	rx = whichrx(c, l);
	if(rx->i - rx->cnt <= rx->n){
		print("m10g: overrun\n");
		return 0;
	}
	i = rx->i&rx->m;
	m = rx->host[i];
	rx->host[i] = 0;
	if(m == 0){
		print("m10g: rx to no block\n");
		return 0;
	}
	rx->i++;
	m->flags |= Bipck|Btcpck|Budpck;
	m->data += 2;
	m->count = l;
	return m;
}

static int
rxcansleep(void *v)
{
	uint *e;
	Ctlr *c;
	Done *d;
	Slot *s;

	c = v;
	d = &c->done;
	e = d->entry + (d->i&d->m);
	s = (Slot*)e;
	if(s->len != 0)
		return -1;
	c->irqack[0] = pbit32(3);
	if((c->sm.pool->flags | c->bg.pool->flags) & Pstarve)
		return -1;
	return 0;
}

static void
m10rx(void)
{
	int i, l;
	Msgbuf *m;
	Ctlr *c;
	Ether *e;

	e = u->arg;
	c = e->ctlr;

	l = c->sm.m;
	if(c->bg.m < l)
		l = c->sm.m;
	l *= 2;
	l /= 3;

	for(;;){
		replenish(&c->sm);
		replenish(&c->bg);
		sleep(&c->rxrendez, rxcansleep, c);
		for(i = 0; i < l && (m = nextbuf(c)); i++)
			etheriq(e, m);
	}
}

static uint
txstarving(Tx *tx, uint u)
{
	uint d;

	d = tx->n - (tx->i - tx->cnt);
	return d <= u;
}

static int
txcleanup(Tx *tx, uint n)
{
	uint j, l;
	Msgbuf *m;

	for(l = 0; l < tx->m; l++){
		if(tx->npkt == n)
			break;
		if(tx->cnt == tx->i){
			dprint("m10g: txcleanup cnt == i %ud\n", tx->i);
			break;
		}
		j = tx->cnt & tx->m;
		if(m = tx->bring[j]){
			tx->bring[j] = 0;
			tx->nbytes += m->count;
			mbfree(m);
			tx->npkt++;
		}
		tx->cnt++;
	}
	if(l == 0 && !tx->starve)
		dprint("m10g: spurious cleanup\n");
	if(l >= tx->m)
		print("m10g: tx ovrun: %ud %ud\n", n, tx->npkt);
	if(tx->starve && !txstarving(tx, tx->n/2)){
		tx->starve = 0;
		return 1;
	}
	return 0;
}

static int
txcansleep(void *v)
{
	Ctlr *c;

	c = v;
	if(c->tx.starve == 0)
		return -1;
	return 0;
}

static void
submittx(Tx *tx, int n)
{
	int i0, i, m;
	uint v;
	Send *l, *h;

	m = tx->m;
	i0 = tx->i&m;
	l = tx->lanai;
	h = tx->host;
	v = h[i0].fword;
	h[i0].flags = 0;
	for(i = n - 1; i >= 0; i--)
		memmove(l+(i+i0&m), h+(i+i0&m), sizeof *h);
	sfence();
	l[i0].fword = v;
	tx->i += n;
	sfence();
}

static void
m10gtransmit(Ether *e)
{
	uchar flags;
	ushort slen;
	uint nseg, end, bus, len, segsz;
	Ctlr *c;
	Msgbuf *m;
	Tx *tx;
	Send *s0, *s, *se;

	c = e->ctlr;
	tx = &c->tx;
	segsz = tx->segsz;
	s = tx->host + (tx->i&tx->m);
	se = tx->host + tx->n;
	for(;;){
		if(txstarving(tx, 16)){
			tx->starvei = tx->i;
			tx->starve = 1;
			sleep(&c->txrendez, txcansleep, c);
			continue;
		}
		if((m = etheroq(e)) == nil){
			recv(e->ifc.reply, 0);
			continue;
		}
		flags = SFfirst|SFnotso;
		if((len = m->count) < 1520)
			flags |= SFsmall;
		bus = PCIWADDR(m->data);
		s0 = s;
		nseg = 0;
		for(; len; len -= slen){
			end = bus+segsz & ~(segsz-1);
			slen = end - bus;
			if(slen > len)
				slen = len;
			s->low = pbit32(bus);
			s->high = pbit32h(bus);
			s->len = pbit16(slen);
			s->flags = flags;
			s->nrdma = 1;

			bus += slen;
			if(++s == se)
				s = tx->host;
			flags &= ~SFfirst;
			nseg++;
		}
		s0->nrdma = nseg;
		tx->bring[tx->i+nseg-1 & tx->m] = m;
		submittx(tx, nseg);
		tx->submit++;
	}
}

static void
checkstats(Ether *, Ctlr *c, Stats *s)
{
	uint i;

	if(s->updated == 0)
		return;

	i = gbit32(s->linkstat);
	if(c->linkstat != i){
//		e->link = i;
		c->speed[i>0]++;
		if(c->linkstat = i){
			dprint("m10g: link up\n");
			c->tx.starve = 0;
			wakeup(&c->txrendez);
		}else
			dprint("m10g: link down\n");
	}
	i = gbit32(s->nrdma);
	if(i != c->nrdma){
		dprint("m10g: rdma timeout %d\n", i);
		c->nrdma = i;
	}
}

static void
waitintx(Ctlr *c)
{
	int i, n;

	for(i = 0; i < 1048576; i++){
		coherence();
		n = gbit32(c->stats->txcnt);
		if(n != c->tx.npkt || c->tx.starve)
			if(txcleanup(&c->tx, n))
				wakeup(&c->txrendez);
		if(c->stats->valid == 0)
			break;
	}
}

static void
m10ginterrupt(Ureg *, void *v)
{
	int valid;
	Ctlr *c;
	Ether *e;

	e = v;
	c = e->ctlr;

	valid = c->stats->valid;
	if(valid == 0)
		return;
	if(c->msi == 0){
		*c->irqdeass = 0;
		mfence();
	}
//	else
		c->stats->valid = 0;
	waitintx(c);
	checkstats(e, c, c->stats);
	c->irqack[1] = pbit32(3);
	if(valid&1)
		wakeup(&c->rxrendez);
}

static void
m10gattach(Ether *e)
{
	Ctlr *c;

	qlock(e->ctlr);
	c = e->ctlr;
	if(c->state != Attached){
		qunlock(c);
		return;
	}
	if(c->kprocs == 0){
		c->kprocs++;
		snprint(c->rname, sizeof c->rname, "#l%drx", e->ctlrno);
		userinit(m10rx, e, c->rname);
	}
	c->state = Runed;
	qunlock(c);
}

static int
lstcount(Msgbuf *m)
{
	int i;

	i = 0;
	for(; m; m = m->next)
		i++;
	return i;
}

static char ifstatbuf[2 K];

static void
cifstat(Ctlr *c, int, char **)
{
	Stats s;
	int i, n;

	/* no point in locking this because this is done via dma */
	memmove(&s, c->stats, sizeof s);
	snprint(ifstatbuf, sizeof ifstatbuf,
		"txcnt = %ud\n" 		"linkstat = %ud\n" 	"dlink = %ud\n"
		"derror = %ud\n" 	"drunt = %ud\n" 		"doverrun = %ud\n"
		"dnosm = %ud\n" 	"dnobg = %ud\n" 	"nrdma = %ud\n"
		"dpause = %ud\n"	"dufilt = %ud\n"		"dcrc32 = %ud\n"
		"dphy = %ud\n"		"dmcast = %ud\n"
		"txstopped = %ud\n" 	"down = %ud\n" 		"updated = %ud\n"
		"valid = %ud\n\n"
		"tx starve = %ud\n"	"tx starvei = %ud\n"
		"tx pkt = %ud\n"		"tx submit = %ud\n"	"tx bytes = %llud\n"
		"tx n = %ud\t"	"cnt = %ud\t"	"i = %ud\n"
		"sm n = %ud\t"	"cnt = %ud\t"	"i = %ud\t"	"lst = %ud\n"
		"bg n = %ud\t"	"cnt = %ud\t"	"i = %ud\t"	"lst = %ud\n"
		"segsz = %ud\n"		"coal = %ud\n\n"
		"speeds 0:%ud 10000:%ud\n",
		gbit32(s.txcnt), gbit32(s.linkstat), gbit32(s.dlinkef),
		gbit32(s.derror), gbit32(s.drunt), gbit32(s.doverrun),
		gbit32(s.dnosm), gbit32(s.dnobg), gbit32(s.nrdma),
		gbit32(s.dpause), gbit32(s.dufilt), gbit32(s.dcrc32),
		gbit32(s.dphy), gbit32(s.dmcast),
		s.txstopped,  s.down, s.updated, s.valid,
		c->tx.starve, c->tx.starvei,
		c->tx.npkt, c->tx.submit, c->tx.nbytes,
		c->tx.n, c->tx.cnt, c->tx.i,
		c->sm.pool->n, c->sm.cnt,  c->sm.i, lstcount(c->sm.pool->head),
		c->bg.pool->n, c->bg.cnt,  c->bg.i, lstcount(c->bg.pool->head),
		c->tx.segsz, gbit32((uchar*)c->coal),
		c->speed[0], c->speed[1]);

	/* HACK */
	n = strlen(ifstatbuf);
	for(i = 0; n-i > 0; i += PRINTSIZE)
		print("%s", ifstatbuf+i);
}

static void
cdebug(Ctlr *, int, char**)
{
	debug ^= debug;
	print("debug %d\n", debug);
}

static void
ccoal(Ctlr *c, int n, char **v)
{
	int i;

	if(n == 1){
		i = strtoul(*v, 0, 0);
		*c->coal = pbit32(i);
		coherence();
	}
	print("%d\n", gbit32((uchar*)c->coal));
}

static void
chelp(Ctlr*, int, char **)
{
	print("coal ctlr n	-- get/set interrupt colesing delay\n");
	print("debug	-- set debug (all ctlrs)\n");
	print("ifstat ctlr	-- print statistics\n");
}

typedef struct{
	void	(*f)(Ctlr *, int, char**);
	char*	name;
	int	minarg;
	int	maxarg;
}Cmdtab;

static void
docmd(Cmdtab *t, int n, int c, char **v)
{
	int i;

	i = n;
	if(c > 0){
		for(i = 0; i < n; i++)
			if(strcmp(*v, t[i].name) == 0)
				break;
		c--;
		v++;
	}
	if(i >= n){
		i = n-1;
		c = 0;
	}
	t += i;
	if(c < t->minarg)
		print("too few args, need %d\n", t->minarg);
	else if(c > t->maxarg)
		print("too many args, max %d\n", t->maxarg);
	else{
		i = 0;
		if(t->minarg > 0){
			i = strtoul(*v++, 0, 0);
			c--;
		}
		if(i < 0 || i >= nctlr)
			print("bad controller %d\n", i);
		else
			t->f(ctlrs+i, c, v);
	}
}

static Cmdtab ctab[] = {
	cdebug,	"debug",		0,	0,
	ccoal,	"coal",		1,	2,
	cifstat,	"ifstat",		1,	1,
	chelp,	"help",		0,	100,
};

static void
m10gctl(int c, char **v)
{
	docmd(ctab, nelem(ctab), c-1, v+1);
}

static void
m10gpci(void)
{
	Ctlr *c;
	Pcidev *p;

	for(p = 0; p = pcimatch(p, 0x14c1, 0x0008); ){
		c = ctlrs+nctlr;
		memset(c, 0, sizeof c);
		c->pcidev = p;
		c->boot = pcicap(p, PciCapVND);
//		kickthebaby(p, c);
		pcisetbme(p);
		if(setmem(p, c) == -1){
			print("m10g: init failed\n");
			continue;
		}
		namelock(c, "my%d", nctlr);
		namelock(&c->cmdl, "my%d.cmd", nctlr);
		namelock(&c->tx, "my%d.tx", nctlr);
		if(++nctlr == nelem(ctlrs))
			break;
	}
}

int
m10gpnp(Ether *e)
{
	Ctlr *c;
	static int once, cmd;

	if(once++ == 0)
		m10gpci();
	for(c = ctlrs; c < ctlrs+nctlr; c++)
		if(c->active)
			continue;
		else if(e->port == 0 || e->port == c->port)
			break;
	if(c == ctlrs+nctlr)
		return -1;
	c->active = 1;

	e->ctlr = c;
	e->port = c->port;
	e->irq = c->pcidev->intl;
	e->tbdf = c->pcidev->tbdf;
	e->mbps = 10000;
	e->ifc.maxmtu = 9000;
	memmove(e->ea, c->ra, Easize);

	reset(e, c);
	open0(c);
	c->state = Attached;

	e->attach = m10gattach;
	e->transmit = m10gtransmit;
	e->interrupt = m10ginterrupt;
	if(cmd++ == 0)
		cmd_install("myrictl", "tweak myri parameters", m10gctl);

	return 0;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.