Plan 9 from Bell Labs’s /usr/web/sources/plan9/sys/src/cmd/upas/filterkit/list.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include <u.h>
#include <libc.h>
#include <regexp.h>
#include <libsec.h>
#include <String.h>
#include <bio.h>
#include "dat.h"

int debug;

enum
{
	Tregexp=	(1<<0),		/* ~ */
	Texact=		(1<<1),		/* = */
};

typedef struct Pattern Pattern;
struct Pattern
{
	Pattern	*next;
	int	type;
	char	*arg;
	int	bang;
};

String	*patternpath;
Pattern	*patterns;
String	*mbox;

static void
usage(void)
{
	fprint(2, "usage: %s add|check patternfile [addressfile ...]\n", argv0);
	exits("usage");
}

/*
 *  convert string to lower case
 */
static void
mklower(char *p)
{
	int c;

	for(; *p; p++){
		c = *p;
		if(c <= 'Z' && c >= 'A')
			*p = c - 'A' + 'a';
	}
}

/*
 *  simplify an address, reduce to a domain
 */
static String*
simplify(char *addr)
{
	int dots, dotlim;
	char *p, *at;
	String *s;

	mklower(addr);
	at = strchr(addr, '@');
	if(at == nil){
		/* local address, make it an exact match */
		s = s_copy("=");
		s_append(s, addr);
		return s;
	}

	/* copy up to, and including, the '@' sign */
	at++;
	s = s_copy("~");
	for(p = addr; p < at; p++){
		if(strchr(".*+?(|)\\[]^$", *p))
			s_putc(s, '\\');
		s_putc(s, *p);
	}

	/*
	 * just any address matching the two most significant domain elements,
	 * except for .uk, which needs three.
	 */
	s_append(s, "(.*\\.)?");
	p = addr+strlen(addr);			/* point at NUL */
	if (p[-1] == '.')
		*--p = '\0';
	if (p - addr > 3 && strcmp(".uk", p - 3) == 0)
		dotlim = 3;
	else
		dotlim = 2;
	dots = 0;
	while(--p > at)
		if(*p == '.' && ++dots >= dotlim){
			p++;
			break;
		}
	for(; *p; p++){
		if(strchr(".*+?(|)\\[]^$", *p) != nil)
			s_putc(s, '\\');
		s_putc(s, *p);
	}
	s_terminate(s);

	return s;
}

/*
 *  link patterns in order
 */
static int
newpattern(int type, char *arg, int bang)
{
	Pattern *p;
	static Pattern *last;

	mklower(arg);

	p = mallocz(sizeof *p, 1);
	if(p == nil)
		return -1;
	if(type == Tregexp){
		p->arg = malloc(strlen(arg)+3);
		if(p->arg == nil){
			free(p);
			return -1;
		}
		p->arg[0] = 0;
		strcat(p->arg, "^");
		strcat(p->arg, arg);
		strcat(p->arg, "$");
	} else {
		p->arg = strdup(arg);
		if(p->arg == nil){
			free(p);
			return -1;
		}
	}
	p->type = type;
	p->bang = bang;
	if(last == nil)
		patterns = p;
	else
		last->next = p;
	last = p;

	return 0;
}

/*
 *  patterns are either
 *	~ regular expression
 *	= exact match string
 *
 *  all comparisons are case insensitive
 */
static int
readpatterns(char *path)
{
	Biobuf *b;
	char *p;
	char *token[2];
	int n;
	int bang;

	b = Bopen(path, OREAD);
	if(b == nil)
		return -1;
	while((p = Brdline(b, '\n')) != nil){
		p[Blinelen(b)-1] = 0;
		n = tokenize(p, token, 2);
		if(n == 0)
			continue;

		mklower(token[0]);
		p = token[0];
		if(*p == '!'){
			p++;
			bang = 1;
		} else
			bang = 0;

		if(*p == '='){
			if(newpattern(Texact, p+1, bang) < 0)
				return -1;
		} else if(*p == '~'){
			if(newpattern(Tregexp, p+1, bang) < 0)
				return -1;
		} else if(strcmp(token[0], "#include") == 0 && n == 2)
			readpatterns(token[1]);
	}
	Bterm(b);
	return 0;
}

/* fuck, shit, bugger, damn */
void regerror(char*)
{
}

/*
 *  check lower case version of address agains patterns
 */
static Pattern*
checkaddr(char *arg)
{
	Pattern *p;
	Reprog *rp;
	String *s;

	s = s_copy(arg);
	mklower(s_to_c(s));

	for(p = patterns; p != nil; p = p->next)
		switch(p->type){
		case Texact:
			if(strcmp(p->arg, s_to_c(s)) == 0){
				free(s);
				return p;
			}
			break;
		case Tregexp:
			rp = regcomp(p->arg);
			if(rp == nil)
				continue;
			if(regexec(rp, s_to_c(s), nil, 0)){
				free(rp);
				free(s);
				return p;
			}
			free(rp);
			break;
		}
	s_free(s);
	return 0;
}
static char*
check(int argc, char **argv)
{
	int i;
	Addr *a;
	Pattern *p;
	int matchedbang;

	matchedbang = 0;
	for(i = 0; i < argc; i++){
		a = readaddrs(argv[i], nil);
		for(; a != nil; a = a->next){
			p = checkaddr(a->val);
			if(p == nil)
				continue;
			if(p->bang)
				matchedbang = 1;
			else
				return nil;
		}
	}
	if(matchedbang)
		return "!match";
	else
		return "no match";
}

/*
 *  add anything that isn't already matched, all matches are lower case
 */
static char*
add(char *pp, int argc, char **argv)
{
	int fd, i;
	String *s;
	char *cp;
	Addr *a;

	a = nil;
	for(i = 0; i < argc; i++)
		a = readaddrs(argv[i], a);

	fd = open(pp, OWRITE);
	seek(fd, 0, 2);
	for(; a != nil; a = a->next){
		if(checkaddr(a->val))
			continue;
		s = simplify(a->val);
		cp = s_to_c(s);
		fprint(fd, "%q\t%q\n", cp, a->val);
		if(*cp == '=')
			newpattern(Texact, cp+1, 0);
		else if(*cp == '~')
			newpattern(Tregexp, cp+1, 0);
		s_free(s);
	}
	close(fd);
	return nil;
}

void
main(int argc, char **argv)
{
	char *patternpath;

	ARGBEGIN {
	case 'd':
		debug++;
		break;
	} ARGEND;

	quotefmtinstall();

	if(argc < 3)
		usage();

	patternpath = argv[1];
	readpatterns(patternpath);
	if(strcmp(argv[0], "add") == 0)
		exits(add(patternpath, argc-2, argv+2));
	else if(strcmp(argv[0], "check") == 0)
		exits(check(argc-2, argv+2));
	else
		usage();
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.