Skip to content

Commit

Permalink
Improve header parsing code
Browse files Browse the repository at this point in the history
Because the net/http server removes \r\n from multi-line header values,
there's no need to to check for \r or \n when skipping whitespace in
headers (see https://godoc.org/net/textproto#Reader.ReadMIMEHeader).
Given this fact, the whitespace test can be simplified to b == ' ' || b
== '\t'.  There's no need for the isSpaceOctet bit field in octetTypes.

The isTokenOctet bit field is the only bit field remaining after the
removal of isSpaceOctet.  Simplify the code by replacing the
isTokenOctet bit test in octetTypes with an array of booleans called
isTokenOctet.

Declare isTokenOctet as a composite literal instead of constructing it
at runtime.

Add documentation to core functions for parsing HTTP headers.
  • Loading branch information
Steven Scott authored and garyburd committed Oct 6, 2018
1 parent 3130e8d commit a51a35a
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 43 deletions.
132 changes: 89 additions & 43 deletions util.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,68 +31,113 @@ func generateChallengeKey() (string, error) {
return base64.StdEncoding.EncodeToString(p), nil
}

// Octet types from RFC 2616.
var octetTypes [256]byte

const (
isTokenOctet = 1 << iota
isSpaceOctet
)

func init() {
// From RFC 2616
//
// OCTET = <any 8-bit sequence of data>
// CHAR = <any US-ASCII character (octets 0 - 127)>
// CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
// CR = <US-ASCII CR, carriage return (13)>
// LF = <US-ASCII LF, linefeed (10)>
// SP = <US-ASCII SP, space (32)>
// HT = <US-ASCII HT, horizontal-tab (9)>
// <"> = <US-ASCII double-quote mark (34)>
// CRLF = CR LF
// LWS = [CRLF] 1*( SP | HT )
// TEXT = <any OCTET except CTLs, but including LWS>
// separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <">
// | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT
// token = 1*<any CHAR except CTLs or separators>
// qdtext = <any TEXT except <">>

for c := 0; c < 256; c++ {
var t byte
isCtl := c <= 31 || c == 127
isChar := 0 <= c && c <= 127
isSeparator := strings.IndexRune(" \t\"(),/:;<=>?@[]\\{}", rune(c)) >= 0
if strings.IndexRune(" \t\r\n", rune(c)) >= 0 {
t |= isSpaceOctet
}
if isChar && !isCtl && !isSeparator {
t |= isTokenOctet
}
octetTypes[c] = t
}
// Token octets per RFC 2616.
var isTokenOctet = [256]bool{
'!': true,
'#': true,
'$': true,
'%': true,
'&': true,
'\'': true,
'*': true,
'+': true,
'-': true,
'.': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'W': true,
'V': true,
'X': true,
'Y': true,
'Z': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'|': true,
'~': true,
}

// skipSpace returns a slice of the string s with all leading RFC 2616 linear
// whitespace removed.
func skipSpace(s string) (rest string) {
i := 0
for ; i < len(s); i++ {
if octetTypes[s[i]]&isSpaceOctet == 0 {
if b := s[i]; b != ' ' && b != '\t' {
break
}
}
return s[i:]
}

// nextToken returns the leading RFC 2616 token of s and the string following
// the token.
func nextToken(s string) (token, rest string) {
i := 0
for ; i < len(s); i++ {
if octetTypes[s[i]]&isTokenOctet == 0 {
if !isTokenOctet[s[i]] {
break
}
}
return s[:i], s[i:]
}

// nextTokenOrQuoted returns the leading token or quoted string per RFC 2616
// and the string following the token or quoted string.
func nextTokenOrQuoted(s string) (value string, rest string) {
if !strings.HasPrefix(s, "\"") {
return nextToken(s)
Expand Down Expand Up @@ -128,7 +173,8 @@ func nextTokenOrQuoted(s string) (value string, rest string) {
return "", ""
}

// equalASCIIFold returns true if s is equal to t with ASCII case folding.
// equalASCIIFold returns true if s is equal to t with ASCII case folding as
// defined in RFC 4790.
func equalASCIIFold(s, t string) bool {
for s != "" && t != "" {
sr, size := utf8.DecodeRuneInString(s)
Expand Down
1 change: 1 addition & 0 deletions util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ var equalASCIIFoldTests = []struct {
{"WebSocket", "websocket", true},
{"websocket", "WebSocket", true},
{"Öyster", "öyster", false},
{"WebSocket", "WetSocket", false},
}

func TestEqualASCIIFold(t *testing.T) {
Expand Down

0 comments on commit a51a35a

Please sign in to comment.