Plan 9 from Bell Labs’s /usr/web/sources/plan9/sys/src/ape/lib/ap/gen/mbwc.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include <stdlib.h>
#include <limits.h>

/*
 * Use the FSS-UTF transformation proposed by posix.
 *	We define 7 byte types:
 *	T0	0xxxxxxx	7 free bits
 *	Tx	10xxxxxx	6 free bits
 *	T1	110xxxxx	5 free bits
 *	T2	1110xxxx	4 free bits
 *	T3	11110xxx	3 free bits
 *	T4	111110xx	2 free bits
 *	T5	1111110x	1 free bit
 *
 *	Encoding is as follows.
 *	From hex	Thru hex	Sequence		Bits
 *	00000000	0000007F	T0				7
 *	00000080	000007FF	T1 Tx			11
 *	00000800	0000FFFF	T2 Tx Tx			16
 *	00010000	001FFFFF	T3 Tx Tx Tx		21
 *	00200000	03FFFFFF	T4 Tx Tx Tx Tx		26
 *	04000000	7FFFFFFF	T5 Tx Tx  Tx Tx Tx	31
 */
int
mbtowc(wchar_t *pwc, const char *s, size_t n);

int
mblen(const char *s, size_t n)
{
	return mbtowc(0, s, n);
}

enum {
	C0MSK = 0x7F,
	C1MSK = 0x7FF,
	T1 = 0xC0,
	T2 = 0xE0,
	NT1BITS = 11,
	NSHFT = 5,
	NCSHFT = NSHFT + 1,
	WCHARMSK = (1<< (8*MB_LEN_MAX - 1)) - 1,
};

int
mbtowc(wchar_t *pwc, const char *s, size_t n)
{
	unsigned long long c[MB_LEN_MAX];
	unsigned long long l, m, wm, b;
	int i;

	if(!s)
		return 0;

	if(n < 1)
		goto bad;

	c[0] = s[0] & 0xff;		/* first one is special */
	if((c[0] & 0x80) == 0x00) {
		if(pwc)
			*pwc = c[0];
		if(c[0] == 0)
			return 0;
		return 1;
	}

	m = T2;
	b = m^0x20;
	l = c[0];
	wm = C1MSK;
	for(i = 1; i < MB_LEN_MAX + 1; i++){
		if(n < i+1)
			goto bad;
		c[i] = (s[i] ^ 0x80) & 0xff;
		l = (l << NCSHFT) | c[i];
		if((c[i] & 0xC0) != 0x00)
			goto bad;
		if((c[0] & m) == b) {
			if(pwc)
				*pwc = l & wm;
			return i + 1;
		}
		b = m;
		m = (m >> 1) | 0x80;
		wm = (wm << NSHFT) | wm;
	}

	/*
	 * bad decoding
	 */
bad:
	return -1;

}

int
wctomb(char *s, wchar_t wchar)
{
	unsigned long long c, maxc, m;
	int i, j;

	if(!s)
		return 0;

	maxc = 0x80;
	c = wchar & WCHARMSK;
	if(c < maxc) {
		s[0] = c;
		return 1;
	}

	m = T1;
	for(i = 2; i < MB_LEN_MAX + 1; i++){
		maxc <<= 4;
		if(c < maxc || i == MB_LEN_MAX){
			s[0] = m | (c >> ((i - 1) * NCSHFT));
			for(j = i - 1; j >= 1; j--){
				s[i - j] = 0x80|((c>>(6 * (j - 1)))&0x3f);
			}
			return i;
		}
		m = (m >> 1) | 0x80;
	}
	return MB_LEN_MAX;
}

size_t
mbstowcs(wchar_t *pwcs, const char *s, size_t n)
{
	int i, d, c;

	for(i=0; i < n; i++) {
		c = *s & 0xff;
		if(c < 0x80) {
			*pwcs = c;
			if(c == 0)
				break;
			s++;
		} else {
			d = mbtowc(pwcs, s, MB_LEN_MAX);
			if(d <= 0)
				return (size_t)((d<0) ? -1 : i);
			s += d;
		}
		pwcs++;
	}
	return i;
}

size_t
wcstombs(char *s, const wchar_t *pwcs, size_t n)
{
	int i, d;
	long c;
	char *p, *pe;
	char buf[MB_LEN_MAX];

	p = s;
	pe = p+n-MB_LEN_MAX;
	while(p < pe) {
		c = *pwcs++;
		if(c < 0x80)
			*p++ = c;
		else
			p += wctomb(p, c);
		if(c == 0)
			return p-s;
	}
	while(p < pe+MB_LEN_MAX) {
		c = *pwcs++;
		d = wctomb(buf, c);
		if(p+d <= pe+MB_LEN_MAX) {
			*p++ = buf[0];		/* first one is special */
			for(i = 2; i < MB_LEN_MAX + 1; i++){
				if(d <= i -1)
					break;
				*p++ = buf[i];
			}
		}
		if(c == 0)
			break;
	}
	return p-s;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.