Plan 9 from Bell Labs’s /usr/web/sources/contrib/ericvh/go-plan9/src/pkg/http/request.go

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// HTTP Request reading and parsing.

// The http package implements parsing of HTTP requests, replies,
// and URLs and provides an extensible HTTP server and a basic
// HTTP client.
package http

import (
	"bufio";
	"bytes";
	"container/vector";
	"fmt";
	"io";
	"io/ioutil";
	"os";
	"strconv";
	"strings";
)

const (
	maxLineLength	= 1024;	// assumed < bufio.DefaultBufSize
	maxValueLength	= 1024;
	maxHeaderLines	= 1024;
	chunkSize	= 4 << 10;	// 4 KB chunks
)

// HTTP request parsing errors.
type ProtocolError struct {
	os.ErrorString;
}

var (
	ErrLineTooLong		= &ProtocolError{"header line too long"};
	ErrHeaderTooLong	= &ProtocolError{"header too long"};
	ErrShortBody		= &ProtocolError{"entity body too short"};
)

type badStringError struct {
	what	string;
	str	string;
}

func (e *badStringError) String() string	{ return fmt.Sprintf("%s %q", e.what, e.str) }

// A Request represents a parsed HTTP request header.
type Request struct {
	Method		string;	// GET, POST, PUT, etc.
	RawURL		string;	// The raw URL given in the request.
	URL		*URL;	// Parsed URL.
	Proto		string;	// "HTTP/1.0"
	ProtoMajor	int;	// 1
	ProtoMinor	int;	// 0

	// A header mapping request lines to their values.
	// If the header says
	//
	//	Accept-Language: en-us
	//	accept-encoding: gzip, deflate
	//	Connection: keep-alive
	//
	// then
	//
	//	Header = map[string]string{
	//		"Accept-Encoding": "en-us",
	//		"Accept-Language": "gzip, deflate",
	//		"Connection": "keep-alive"
	//	}
	//
	// HTTP defines that header names are case-insensitive.
	// The request parser implements this by canonicalizing the
	// name, making the first character and any characters
	// following a hyphen uppercase and the rest lowercase.
	Header	map[string]string;

	// The message body.
	Body	io.Reader;

	// Whether to close the connection after replying to this request.
	Close	bool;

	// The host on which the URL is sought.
	// Per RFC 2616, this is either the value of the Host: header
	// or the host name given in the URL itself.
	Host	string;

	// The referring URL, if sent in the request.
	//
	// Referer is misspelled as in the request itself,
	// a mistake from the earliest days of HTTP.
	// This value can also be fetched from the Header map
	// as Header["Referer"]; the benefit of making it
	// available as a structure field is that the compiler
	// can diagnose programs that use the alternate
	// (correct English) spelling req.Referrer but cannot
	// diagnose programs that use Header["Referrer"].
	Referer	string;

	// The User-Agent: header string, if sent in the request.
	UserAgent	string;

	// The parsed form. Only available after ParseForm is called.
	Form	map[string][]string;
}

// ProtoAtLeast returns whether the HTTP protocol used
// in the request is at least major.minor.
func (r *Request) ProtoAtLeast(major, minor int) bool {
	return r.ProtoMajor > major ||
		r.ProtoMajor == major && r.ProtoMinor >= minor
}

// Return value if nonempty, def otherwise.
func valueOrDefault(value, def string) string {
	if value != "" {
		return value
	}
	return def;
}

const defaultUserAgent = "Go http package"

// Write writes an HTTP/1.1 request -- header and body -- in wire format.
// This method consults the following fields of req:
//	URL
//	Method (defaults to "GET")
//	UserAgent (defaults to defaultUserAgent)
//	Referer
//	Header
//	Body
//
// If Body is present, "Transfer-Encoding: chunked" is forced as a header.
func (req *Request) Write(w io.Writer) os.Error {
	uri := URLEscape(req.URL.Path);
	if req.URL.RawQuery != "" {
		uri += "?" + req.URL.RawQuery
	}

	fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri);
	fmt.Fprintf(w, "Host: %s\r\n", req.URL.Host);
	fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent));

	if req.Referer != "" {
		fmt.Fprintf(w, "Referer: %s\r\n", req.Referer)
	}

	if req.Body != nil {
		// Force chunked encoding
		req.Header["Transfer-Encoding"] = "chunked"
	}

	// TODO: split long values?  (If so, should share code with Conn.Write)
	// TODO: if Header includes values for Host, User-Agent, or Referer, this
	// may conflict with the User-Agent or Referer headers we add manually.
	// One solution would be to remove the Host, UserAgent, and Referer fields
	// from Request, and introduce Request methods along the lines of
	// Response.{GetHeader,AddHeader} and string constants for "Host",
	// "User-Agent" and "Referer".
	for k, v := range req.Header {
		io.WriteString(w, k+": "+v+"\r\n")
	}

	io.WriteString(w, "\r\n");

	if req.Body != nil {
		buf := make([]byte, chunkSize);
	Loop:
		for {
			var nr, nw int;
			var er, ew os.Error;
			if nr, er = req.Body.Read(buf); nr > 0 {
				if er == nil || er == os.EOF {
					fmt.Fprintf(w, "%x\r\n", nr);
					nw, ew = w.Write(buf[0:nr]);
					fmt.Fprint(w, "\r\n");
				}
			}
			switch {
			case er != nil:
				if er == os.EOF {
					break Loop
				}
				return er;
			case ew != nil:
				return ew
			case nw < nr:
				return io.ErrShortWrite
			}
		}
		// last-chunk CRLF
		fmt.Fprint(w, "0\r\n\r\n");
	}

	return nil;
}

// Read a line of bytes (up to \n) from b.
// Give up if the line exceeds maxLineLength.
// The returned bytes are a pointer into storage in
// the bufio, so they are only valid until the next bufio read.
func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) {
	if p, err = b.ReadSlice('\n'); err != nil {
		// We always know when EOF is coming.
		// If the caller asked for a line, there should be a line.
		if err == os.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err;
	}
	if len(p) >= maxLineLength {
		return nil, ErrLineTooLong
	}

	// Chop off trailing white space.
	var i int;
	for i = len(p); i > 0; i-- {
		if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' {
			break
		}
	}
	return p[0:i], nil;
}

// readLineBytes, but convert the bytes into a string.
func readLine(b *bufio.Reader) (s string, err os.Error) {
	p, e := readLineBytes(b);
	if e != nil {
		return "", e
	}
	return string(p), nil;
}

var colon = []byte{':'}

// Read a key/value pair from b.
// A key/value has the form Key: Value\r\n
// and the Value can continue on multiple lines if each continuation line
// starts with a space.
func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) {
	line, e := readLineBytes(b);
	if e != nil {
		return "", "", e
	}
	if len(line) == 0 {
		return "", "", nil
	}

	// Scan first line for colon.
	i := bytes.Index(line, colon);
	if i < 0 {
		goto Malformed
	}

	key = string(line[0:i]);
	if strings.Index(key, " ") >= 0 {
		// Key field has space - no good.
		goto Malformed
	}

	// Skip initial space before value.
	for i++; i < len(line); i++ {
		if line[i] != ' ' {
			break
		}
	}
	value = string(line[i:]);

	// Look for extension lines, which must begin with space.
	for {
		c, e := b.ReadByte();
		if c != ' ' {
			if e != os.EOF {
				b.UnreadByte()
			}
			break;
		}

		// Eat leading space.
		for c == ' ' {
			if c, e = b.ReadByte(); e != nil {
				if e == os.EOF {
					e = io.ErrUnexpectedEOF
				}
				return "", "", e;
			}
		}
		b.UnreadByte();

		// Read the rest of the line and add to value.
		if line, e = readLineBytes(b); e != nil {
			return "", "", e
		}
		value += " " + string(line);

		if len(value) >= maxValueLength {
			return "", "", &badStringError{"value too long for key", key}
		}
	}
	return key, value, nil;

Malformed:
	return "", "", &badStringError{"malformed header line", string(line)};
}

// Convert decimal at s[i:len(s)] to integer,
// returning value, string position where the digits stopped,
// and whether there was a valid number (digits, not too big).
func atoi(s string, i int) (n, i1 int, ok bool) {
	const Big = 1000000;
	if i >= len(s) || s[i] < '0' || s[i] > '9' {
		return 0, 0, false
	}
	n = 0;
	for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
		n = n*10 + int(s[i]-'0');
		if n > Big {
			return 0, 0, false
		}
	}
	return n, i, true;
}

// Parse HTTP version: "HTTP/1.2" -> (1, 2, true).
func parseHTTPVersion(vers string) (int, int, bool) {
	if vers[0:5] != "HTTP/" {
		return 0, 0, false
	}
	major, i, ok := atoi(vers, 5);
	if !ok || i >= len(vers) || vers[i] != '.' {
		return 0, 0, false
	}
	var minor int;
	minor, i, ok = atoi(vers, i+1);
	if !ok || i != len(vers) {
		return 0, 0, false
	}
	return major, minor, true;
}

var cmap = make(map[string]string)

// CanonicalHeaderKey returns the canonical format of the
// HTTP header key s.  The canonicalization converts the first
// letter and any letter following a hyphen to upper case;
// the rest are converted to lowercase.  For example, the
// canonical key for "accept-encoding" is "Accept-Encoding".
func CanonicalHeaderKey(s string) string {
	if t, ok := cmap[s]; ok {
		return t
	}

	// canonicalize: first letter upper case
	// and upper case after each dash.
	// (Host, User-Agent, If-Modified-Since).
	// HTTP headers are ASCII only, so no Unicode issues.
	a := strings.Bytes(s);
	upper := true;
	for i, v := range a {
		if upper && 'a' <= v && v <= 'z' {
			a[i] = v + 'A' - 'a'
		}
		if !upper && 'A' <= v && v <= 'Z' {
			a[i] = v + 'a' - 'A'
		}
		upper = false;
		if v == '-' {
			upper = true
		}
	}
	t := string(a);
	cmap[s] = t;
	return t;
}

type chunkedReader struct {
	r	*bufio.Reader;
	n	uint64;	// unread bytes in chunk
	err	os.Error;
}

func newChunkedReader(r *bufio.Reader) *chunkedReader {
	return &chunkedReader{r: r}
}

func (cr *chunkedReader) beginChunk() {
	// chunk-size CRLF
	var line string;
	line, cr.err = readLine(cr.r);
	if cr.err != nil {
		return
	}
	cr.n, cr.err = strconv.Btoui64(line, 16);
	if cr.err != nil {
		return
	}
	if cr.n == 0 {
		// trailer CRLF
		for {
			line, cr.err = readLine(cr.r);
			if cr.err != nil {
				return
			}
			if line == "" {
				break
			}
		}
		cr.err = os.EOF;
	}
}

func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) {
	if cr.err != nil {
		return 0, cr.err
	}
	if cr.n == 0 {
		cr.beginChunk();
		if cr.err != nil {
			return 0, cr.err
		}
	}
	if uint64(len(b)) > cr.n {
		b = b[0:cr.n]
	}
	n, cr.err = cr.r.Read(b);
	cr.n -= uint64(n);
	if cr.n == 0 && cr.err == nil {
		// end of chunk (CRLF)
		b := make([]byte, 2);
		if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil {
			if b[0] != '\r' || b[1] != '\n' {
				cr.err = os.NewError("malformed chunked encoding")
			}
		}
	}
	return n, cr.err;
}

// ReadRequest reads and parses a request from b.
func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
	req = new(Request);

	// First line: GET /index.html HTTP/1.0
	var s string;
	if s, err = readLine(b); err != nil {
		return nil, err
	}

	var f []string;
	if f = strings.Split(s, " ", 3); len(f) < 3 {
		return nil, &badStringError{"malformed HTTP request", s}
	}
	req.Method, req.RawURL, req.Proto = f[0], f[1], f[2];
	var ok bool;
	if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok {
		return nil, &badStringError{"malformed HTTP version", req.Proto}
	}

	if req.URL, err = ParseURL(req.RawURL); err != nil {
		return nil, err
	}

	// Subsequent lines: Key: value.
	nheader := 0;
	req.Header = make(map[string]string);
	for {
		var key, value string;
		if key, value, err = readKeyValue(b); err != nil {
			return nil, err
		}
		if key == "" {
			break
		}
		if nheader++; nheader >= maxHeaderLines {
			return nil, ErrHeaderTooLong
		}

		key = CanonicalHeaderKey(key);

		// RFC 2616 says that if you send the same header key
		// multiple times, it has to be semantically equivalent
		// to concatenating the values separated by commas.
		oldvalue, present := req.Header[key];
		if present {
			req.Header[key] = oldvalue + "," + value
		} else {
			req.Header[key] = value
		}
	}

	// RFC2616: Must treat
	//	GET /index.html HTTP/1.1
	//	Host: www.google.com
	// and
	//	GET http://www.google.com/index.html HTTP/1.1
	//	Host: doesntmatter
	// the same.  In the second case, any Host line is ignored.
	if v, present := req.Header["Host"]; present && req.URL.Host == "" {
		req.Host = v
	}

	// RFC2616: Should treat
	//	Pragma: no-cache
	// like
	//	Cache-Control: no-cache
	if v, present := req.Header["Pragma"]; present && v == "no-cache" {
		if _, presentcc := req.Header["Cache-Control"]; !presentcc {
			req.Header["Cache-Control"] = "no-cache"
		}
	}

	// Determine whether to hang up after sending the reply.
	if req.ProtoMajor < 1 || (req.ProtoMajor == 1 && req.ProtoMinor < 1) {
		req.Close = true
	} else if v, present := req.Header["Connection"]; present {
		// TODO: Should split on commas, toss surrounding white space,
		// and check each field.
		if v == "close" {
			req.Close = true
		}
	}

	// Pull out useful fields as a convenience to clients.
	if v, present := req.Header["Referer"]; present {
		req.Referer = v
	}
	if v, present := req.Header["User-Agent"]; present {
		req.UserAgent = v
	}

	// TODO: Parse specific header values:
	//	Accept
	//	Accept-Encoding
	//	Accept-Language
	//	Authorization
	//	Cache-Control
	//	Connection
	//	Date
	//	Expect
	//	From
	//	If-Match
	//	If-Modified-Since
	//	If-None-Match
	//	If-Range
	//	If-Unmodified-Since
	//	Max-Forwards
	//	Proxy-Authorization
	//	Referer [sic]
	//	TE (transfer-codings)
	//	Trailer
	//	Transfer-Encoding
	//	Upgrade
	//	User-Agent
	//	Via
	//	Warning

	// A message body exists when either Content-Length or Transfer-Encoding
	// headers are present. Transfer-Encoding trumps Content-Length.
	if v, present := req.Header["Transfer-Encoding"]; present && v == "chunked" {
		req.Body = newChunkedReader(b)
	} else if v, present := req.Header["Content-Length"]; present {
		length, err := strconv.Btoui64(v, 10);
		if err != nil {
			return nil, &badStringError{"invalid Content-Length", v}
		}
		// TODO: limit the Content-Length. This is an easy DoS vector.
		raw := make([]byte, length);
		n, err := b.Read(raw);
		if err != nil || uint64(n) < length {
			return nil, ErrShortBody
		}
		req.Body = bytes.NewBuffer(raw);
	}

	return req, nil;
}

func parseForm(m map[string][]string, query string) (err os.Error) {
	data := make(map[string]*vector.StringVector);
	for _, kv := range strings.Split(query, "&", 0) {
		kvPair := strings.Split(kv, "=", 2);

		var key, value string;
		var e os.Error;
		key, e = URLUnescape(kvPair[0]);
		if e == nil && len(kvPair) > 1 {
			value, e = URLUnescape(kvPair[1])
		}
		if e != nil {
			err = e
		}

		vec, ok := data[key];
		if !ok {
			vec = new(vector.StringVector);
			data[key] = vec;
		}
		vec.Push(value);
	}

	for k, vec := range data {
		m[k] = vec.Data()
	}

	return;
}

// ParseForm parses the request body as a form for POST requests, or the raw query for GET requests.
// It is idempotent.
func (r *Request) ParseForm() (err os.Error) {
	if r.Form != nil {
		return
	}
	r.Form = make(map[string][]string);

	var query string;
	switch r.Method {
	case "GET":
		query = r.URL.RawQuery
	case "POST":
		if r.Body == nil {
			return os.ErrorString("missing form body")
		}
		ct, _ := r.Header["Content-Type"];
		switch strings.Split(ct, ";", 2)[0] {
		case "text/plain", "application/x-www-form-urlencoded", "":
			var b []byte;
			if b, err = ioutil.ReadAll(r.Body); err != nil {
				return err
			}
			query = string(b);
		// TODO(dsymonds): Handle multipart/form-data
		default:
			return &badStringError{"unknown Content-Type", ct}
		}
	}
	return parseForm(r.Form, query);
}

// FormValue returns the first value for the named component of the query.
// FormValue calls ParseForm if necessary.
func (r *Request) FormValue(key string) string {
	if r.Form == nil {
		r.ParseForm()
	}
	if vs, ok := r.Form[key]; ok && len(vs) > 0 {
		return vs[0]
	}
	return "";
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.