pkg/bootparam: add bootparam pkg

This adds the bootparam package which can marshal and unmarshal the Linux
kernel command line into boot parameters and a rest section passed to
init.

This is a very quirky format, thus there is a fuzz testing harness
against the reference implementation from the kernel included to verify
correctness.

A set of weird edge cases is rejected by Unmarshal instead of parsing
to nonsensical data as the reference implementation does to save on
complexity in the parser.

Change-Id: I6debfa67e69ae8db4e0356f34ecb127ea27d18de
Reviewed-on: https://review.monogon.dev/c/monogon/+/1125
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/pkg/bootparam/bootparam.go b/metropolis/pkg/bootparam/bootparam.go
new file mode 100644
index 0000000..3d6b7fa
--- /dev/null
+++ b/metropolis/pkg/bootparam/bootparam.go
@@ -0,0 +1,215 @@
+// Package bootparam implements encoding and decoding of Linux kernel command
+// lines as documented in
+// https://docs.kernel.org/admin-guide/kernel-parameters.html
+//
+// The format is quite quirky and thus the implementation is mostly based
+// on the code in the Linux kernel implementing the decoder and not the
+// specification.
+package bootparam
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+)
+
+// Param represents a single boot parameter with or without a value
+type Param struct {
+	Param, Value string
+	HasValue     bool
+}
+
+// Params represents a list of kernel boot parameters
+type Params []Param
+
+// Linux has for historical reasons an unusual definition of this function
+// Taken from @linux//lib:ctype.c
+func isSpace(r byte) bool {
+	switch r {
+	case '\t', '\n', '\v', '\f', '\r', ' ', 0xa0:
+		return true
+	default:
+		return false
+	}
+}
+
+// Trim spaces as defined by Linux from the left of the string.
+// This is only exported for tests, do not use this. Because of import loops
+// as well as cgo restrictions this cannot be an internal function used by
+// tests.
+func TrimLeftSpace(s string) string {
+	start := 0
+	for ; start < len(s); start++ {
+		c := s[start]
+		if !isSpace(c) {
+			break
+		}
+	}
+
+	return s[start:]
+}
+
+func containsSpace(s string) bool {
+	for i := 0; i < len(s); i++ {
+		if isSpace(s[i]) {
+			return true
+		}
+	}
+	return false
+}
+
+func parseToken(token string) (p Param, err error) {
+	if strings.HasPrefix(token, `=`) || strings.HasPrefix(token, `"=`) {
+		return Param{}, errors.New("param contains `=` at first position, this causes broken behavior")
+	}
+	param, value, hasValue := strings.Cut(token, "=")
+
+	if strings.HasPrefix(param, `"`) {
+		p.Param = strings.TrimPrefix(param, `"`)
+		if !hasValue {
+			p.Param = strings.TrimSuffix(p.Param, `"`)
+		}
+	} else {
+		p.Param = param
+	}
+	if hasValue {
+		if strings.HasPrefix(value, `"`) {
+			p.Value = strings.TrimSuffix(strings.TrimPrefix(value, `"`), `"`)
+		} else if strings.HasPrefix(param, `"`) {
+			p.Value = strings.TrimSuffix(value, `"`)
+		} else {
+			p.Value = value
+		}
+	}
+	return
+}
+
+// Unmarshal decodes a Linux kernel command line and returns a list of kernel
+// parameters as well as a rest section after the "--" parsing terminator.
+func Unmarshal(cmdline string) (params Params, rest string, err error) {
+	cmdline = TrimLeftSpace(cmdline)
+	if pos := strings.IndexByte(cmdline, 0x00); pos != -1 {
+		cmdline = cmdline[:pos]
+	}
+	var lastIdx int
+	var inQuote bool
+	var p Param
+	for i := 0; i < len(cmdline); i++ {
+		if isSpace(cmdline[i]) && !inQuote {
+			token := cmdline[lastIdx:i]
+			lastIdx = i + 1
+			if TrimLeftSpace(token) == "" {
+				continue
+			}
+			p, err = parseToken(token)
+			if err != nil {
+				return
+			}
+
+			// Stop processing and return everything left as rest
+			if p.Param == "--" {
+				rest = TrimLeftSpace(cmdline[lastIdx:])
+				return
+			}
+			params = append(params, p)
+		}
+		if cmdline[i] == '"' {
+			inQuote = !inQuote
+		}
+	}
+	if len(cmdline)-lastIdx > 0 {
+		token := cmdline[lastIdx:]
+		if TrimLeftSpace(token) == "" {
+			return
+		}
+		p, err = parseToken(token)
+		if err != nil {
+			return
+		}
+
+		// Stop processing, do not set rest as there is none
+		if p.Param == "--" {
+			return
+		}
+		params = append(params, p)
+	}
+	return
+}
+
+// Marshal encodes a set of kernel parameters and an optional rest string into
+// a Linux kernel command line. It rejects data which is not encodable, which
+// includes null bytes, double quotes in params as well as characters which
+// contain 0xa0 in their UTF-8 representation (historical Linux quirk of
+// treating that as a space, inherited from Latin-1).
+func Marshal(params Params, rest string) (string, error) {
+	if strings.IndexByte(rest, 0x00) != -1 {
+		return "", errors.New("rest contains 0x00 byte, this is disallowed")
+	}
+	var strb strings.Builder
+	for _, p := range params {
+		if strings.ContainsRune(p.Param, '=') {
+			return "", fmt.Errorf("invalid '=' character in param %q", p.Param)
+		}
+		// Technically a weird subset of double quotes can be encoded, but
+		// this should probably not be done so just reject them all.
+		if strings.ContainsRune(p.Param, '"') {
+			return "", fmt.Errorf("invalid '\"' character in param %q", p.Param)
+		}
+		if strings.ContainsRune(p.Value, '"') {
+			return "", fmt.Errorf("invalid '\"' character in value %q", p.Value)
+		}
+		if strings.IndexByte(p.Param, 0x00) != -1 {
+			return "", fmt.Errorf("invalid null byte in param %q", p.Param)
+		}
+		if strings.IndexByte(p.Value, 0x00) != -1 {
+			return "", fmt.Errorf("invalid null byte in value %q", p.Value)
+		}
+		// Linux treats 0xa0 as a space, even though it is a valid UTF-8
+		// surrogate. This is unfortunate, but passing it through would
+		// break the whole command line.
+		if strings.IndexByte(p.Param, 0xa0) != -1 {
+			return "", fmt.Errorf("invalid 0xa0 byte in param %q", p.Param)
+		}
+		if strings.IndexByte(p.Value, 0xa0) != -1 {
+			return "", fmt.Errorf("invalid 0xa0 byte in value %q", p.Value)
+		}
+		if strings.ContainsRune(p.Param, '"') {
+			return "", fmt.Errorf("invalid '\"' character in value %q", p.Value)
+		}
+		// This should be allowed according to the docs, but is in fact broken.
+		if p.Value != "" && containsSpace(p.Param) {
+			return "", fmt.Errorf("param %q contains spaces and value, this is unsupported", p.Param)
+		}
+		if p.Param == "--" {
+			return "", errors.New("param '--' is reserved and cannot be used")
+		}
+		if p.Param == "" {
+			return "", errors.New("empty params are not supported")
+		}
+		if containsSpace(p.Param) {
+			strb.WriteRune('"')
+			strb.WriteString(p.Param)
+			strb.WriteRune('"')
+		} else {
+			strb.WriteString(p.Param)
+		}
+		if p.Value != "" {
+			strb.WriteRune('=')
+			if containsSpace(p.Value) {
+				strb.WriteRune('"')
+				strb.WriteString(p.Value)
+				strb.WriteRune('"')
+			} else {
+				strb.WriteString(p.Value)
+			}
+		}
+		strb.WriteRune(' ')
+	}
+	if len(rest) > 0 {
+		strb.WriteString("-- ")
+		// Starting whitespace will be dropped by the decoder anyways, do it
+		// here to make the resulting command line nicer.
+		strb.WriteString(TrimLeftSpace(rest))
+	}
+	return strb.String(), nil
+}