pkg/bootparam: add bootparam pkg

This adds the bootparam package which can marshal and unmarshal the Linux
kernel command line into boot parameters and a rest section passed to
init.

This is a very quirky format, thus there is a fuzz testing harness
against the reference implementation from the kernel included to verify
correctness.

A set of weird edge cases is rejected by Unmarshal instead of parsing
to nonsensical data as the reference implementation does to save on
complexity in the parser.

Change-Id: I6debfa67e69ae8db4e0356f34ecb127ea27d18de
Reviewed-on: https://review.monogon.dev/c/monogon/+/1125
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/build/analysis/nogo_config.json b/build/analysis/nogo_config.json
index 885bdef..da28532 100644
--- a/build/analysis/nogo_config.json
+++ b/build/analysis/nogo_config.json
@@ -1,7 +1,8 @@
 {
   "composites": {
     "exclude_files": {
-      "external/": "third_party"
+      "external/": "third_party",
+      "metropolis/pkg/bootparam/": "gofuzz"
     }
   },
   "copylocks": {
diff --git a/metropolis/pkg/bootparam/BUILD.bazel b/metropolis/pkg/bootparam/BUILD.bazel
new file mode 100644
index 0000000..33a9d91
--- /dev/null
+++ b/metropolis/pkg/bootparam/BUILD.bazel
@@ -0,0 +1,19 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "bootparam",
+    srcs = ["bootparam.go"],
+    importpath = "source.monogon.dev/metropolis/pkg/bootparam",
+    visibility = ["//visibility:public"],
+)
+
+go_test(
+    name = "bootparam_test",
+    srcs = ["bootparam_test.go"],
+    gc_goopts = ["-d=libfuzzer"],
+    deps = [
+        ":bootparam",
+        "//metropolis/pkg/bootparam/ref",
+        "@com_github_google_go_cmp//cmp",
+    ],
+)
diff --git a/metropolis/pkg/bootparam/bootparam.go b/metropolis/pkg/bootparam/bootparam.go
new file mode 100644
index 0000000..3d6b7fa
--- /dev/null
+++ b/metropolis/pkg/bootparam/bootparam.go
@@ -0,0 +1,215 @@
+// Package bootparam implements encoding and decoding of Linux kernel command
+// lines as documented in
+// https://docs.kernel.org/admin-guide/kernel-parameters.html
+//
+// The format is quite quirky and thus the implementation is mostly based
+// on the code in the Linux kernel implementing the decoder and not the
+// specification.
+package bootparam
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+)
+
+// Param represents a single boot parameter with or without a value
+type Param struct {
+	Param, Value string
+	HasValue     bool
+}
+
+// Params represents a list of kernel boot parameters
+type Params []Param
+
+// Linux has for historical reasons an unusual definition of this function
+// Taken from @linux//lib:ctype.c
+func isSpace(r byte) bool {
+	switch r {
+	case '\t', '\n', '\v', '\f', '\r', ' ', 0xa0:
+		return true
+	default:
+		return false
+	}
+}
+
+// Trim spaces as defined by Linux from the left of the string.
+// This is only exported for tests, do not use this. Because of import loops
+// as well as cgo restrictions this cannot be an internal function used by
+// tests.
+func TrimLeftSpace(s string) string {
+	start := 0
+	for ; start < len(s); start++ {
+		c := s[start]
+		if !isSpace(c) {
+			break
+		}
+	}
+
+	return s[start:]
+}
+
+func containsSpace(s string) bool {
+	for i := 0; i < len(s); i++ {
+		if isSpace(s[i]) {
+			return true
+		}
+	}
+	return false
+}
+
+func parseToken(token string) (p Param, err error) {
+	if strings.HasPrefix(token, `=`) || strings.HasPrefix(token, `"=`) {
+		return Param{}, errors.New("param contains `=` at first position, this causes broken behavior")
+	}
+	param, value, hasValue := strings.Cut(token, "=")
+
+	if strings.HasPrefix(param, `"`) {
+		p.Param = strings.TrimPrefix(param, `"`)
+		if !hasValue {
+			p.Param = strings.TrimSuffix(p.Param, `"`)
+		}
+	} else {
+		p.Param = param
+	}
+	if hasValue {
+		if strings.HasPrefix(value, `"`) {
+			p.Value = strings.TrimSuffix(strings.TrimPrefix(value, `"`), `"`)
+		} else if strings.HasPrefix(param, `"`) {
+			p.Value = strings.TrimSuffix(value, `"`)
+		} else {
+			p.Value = value
+		}
+	}
+	return
+}
+
+// Unmarshal decodes a Linux kernel command line and returns a list of kernel
+// parameters as well as a rest section after the "--" parsing terminator.
+func Unmarshal(cmdline string) (params Params, rest string, err error) {
+	cmdline = TrimLeftSpace(cmdline)
+	if pos := strings.IndexByte(cmdline, 0x00); pos != -1 {
+		cmdline = cmdline[:pos]
+	}
+	var lastIdx int
+	var inQuote bool
+	var p Param
+	for i := 0; i < len(cmdline); i++ {
+		if isSpace(cmdline[i]) && !inQuote {
+			token := cmdline[lastIdx:i]
+			lastIdx = i + 1
+			if TrimLeftSpace(token) == "" {
+				continue
+			}
+			p, err = parseToken(token)
+			if err != nil {
+				return
+			}
+
+			// Stop processing and return everything left as rest
+			if p.Param == "--" {
+				rest = TrimLeftSpace(cmdline[lastIdx:])
+				return
+			}
+			params = append(params, p)
+		}
+		if cmdline[i] == '"' {
+			inQuote = !inQuote
+		}
+	}
+	if len(cmdline)-lastIdx > 0 {
+		token := cmdline[lastIdx:]
+		if TrimLeftSpace(token) == "" {
+			return
+		}
+		p, err = parseToken(token)
+		if err != nil {
+			return
+		}
+
+		// Stop processing, do not set rest as there is none
+		if p.Param == "--" {
+			return
+		}
+		params = append(params, p)
+	}
+	return
+}
+
+// Marshal encodes a set of kernel parameters and an optional rest string into
+// a Linux kernel command line. It rejects data which is not encodable, which
+// includes null bytes, double quotes in params as well as characters which
+// contain 0xa0 in their UTF-8 representation (historical Linux quirk of
+// treating that as a space, inherited from Latin-1).
+func Marshal(params Params, rest string) (string, error) {
+	if strings.IndexByte(rest, 0x00) != -1 {
+		return "", errors.New("rest contains 0x00 byte, this is disallowed")
+	}
+	var strb strings.Builder
+	for _, p := range params {
+		if strings.ContainsRune(p.Param, '=') {
+			return "", fmt.Errorf("invalid '=' character in param %q", p.Param)
+		}
+		// Technically a weird subset of double quotes can be encoded, but
+		// this should probably not be done so just reject them all.
+		if strings.ContainsRune(p.Param, '"') {
+			return "", fmt.Errorf("invalid '\"' character in param %q", p.Param)
+		}
+		if strings.ContainsRune(p.Value, '"') {
+			return "", fmt.Errorf("invalid '\"' character in value %q", p.Value)
+		}
+		if strings.IndexByte(p.Param, 0x00) != -1 {
+			return "", fmt.Errorf("invalid null byte in param %q", p.Param)
+		}
+		if strings.IndexByte(p.Value, 0x00) != -1 {
+			return "", fmt.Errorf("invalid null byte in value %q", p.Value)
+		}
+		// Linux treats 0xa0 as a space, even though it is a valid UTF-8
+		// surrogate. This is unfortunate, but passing it through would
+		// break the whole command line.
+		if strings.IndexByte(p.Param, 0xa0) != -1 {
+			return "", fmt.Errorf("invalid 0xa0 byte in param %q", p.Param)
+		}
+		if strings.IndexByte(p.Value, 0xa0) != -1 {
+			return "", fmt.Errorf("invalid 0xa0 byte in value %q", p.Value)
+		}
+		if strings.ContainsRune(p.Param, '"') {
+			return "", fmt.Errorf("invalid '\"' character in value %q", p.Value)
+		}
+		// This should be allowed according to the docs, but is in fact broken.
+		if p.Value != "" && containsSpace(p.Param) {
+			return "", fmt.Errorf("param %q contains spaces and value, this is unsupported", p.Param)
+		}
+		if p.Param == "--" {
+			return "", errors.New("param '--' is reserved and cannot be used")
+		}
+		if p.Param == "" {
+			return "", errors.New("empty params are not supported")
+		}
+		if containsSpace(p.Param) {
+			strb.WriteRune('"')
+			strb.WriteString(p.Param)
+			strb.WriteRune('"')
+		} else {
+			strb.WriteString(p.Param)
+		}
+		if p.Value != "" {
+			strb.WriteRune('=')
+			if containsSpace(p.Value) {
+				strb.WriteRune('"')
+				strb.WriteString(p.Value)
+				strb.WriteRune('"')
+			} else {
+				strb.WriteString(p.Value)
+			}
+		}
+		strb.WriteRune(' ')
+	}
+	if len(rest) > 0 {
+		strb.WriteString("-- ")
+		// Starting whitespace will be dropped by the decoder anyways, do it
+		// here to make the resulting command line nicer.
+		strb.WriteString(TrimLeftSpace(rest))
+	}
+	return strb.String(), nil
+}
diff --git a/metropolis/pkg/bootparam/bootparam_test.go b/metropolis/pkg/bootparam/bootparam_test.go
new file mode 100644
index 0000000..a0032a4
--- /dev/null
+++ b/metropolis/pkg/bootparam/bootparam_test.go
@@ -0,0 +1,60 @@
+// If this is bootparam we have an import cycle
+package bootparam_test
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"source.monogon.dev/metropolis/pkg/bootparam"
+	"source.monogon.dev/metropolis/pkg/bootparam/ref"
+)
+
+// Fuzzers can be run with
+// bazel test //metropolis/pkg/bootparam:bootparam_test
+//   --test_arg=-test.fuzz=FuzzMarshal
+//   --test_arg=-test.fuzzcachedir=/tmp/fuzz
+//   --test_arg=-test.fuzztime=60s
+
+func FuzzUnmarshal(f *testing.F) {
+	f.Add(`initrd="\test\some=value" root=yolo "definitely quoted" ro rootflags=`)
+	f.Fuzz(func(t *testing.T, a string) {
+		refOut, refRest := ref.Parse(a)
+		out, rest, err := bootparam.Unmarshal(a)
+		if err != nil {
+			return
+		}
+		if diff := cmp.Diff(refOut, out); diff != "" {
+			t.Errorf("Parse(%q): params mismatch (-want +got):\n%s", a, diff)
+		}
+		if refRest != rest {
+			t.Errorf("Parse(%q): expected rest to be %q, got %q", a, refRest, rest)
+		}
+	})
+}
+
+func FuzzMarshal(f *testing.F) {
+	// Choose delimiters which mean nothing to the parser
+	f.Add("a:b;assd:9dsf;1234", "some fancy rest")
+	f.Fuzz(func(t *testing.T, paramsRaw string, rest string) {
+		paramsSeparated := strings.Split(paramsRaw, ";")
+		var params bootparam.Params
+		for _, p := range paramsSeparated {
+			a, b, _ := strings.Cut(p, ":")
+			params = append(params, bootparam.Param{Param: a, Value: b})
+		}
+		rest = bootparam.TrimLeftSpace(rest)
+		encoded, err := bootparam.Marshal(params, rest)
+		if err != nil {
+			return // Invalid input
+		}
+		refOut, refRest := ref.Parse(encoded)
+		if diff := cmp.Diff(refOut, params); diff != "" {
+			t.Errorf("Marshal(%q): params mismatch (-want +got):\n%s", paramsRaw, diff)
+		}
+		if refRest != rest {
+			t.Errorf("Parse(%q, %q): expected rest to be %q, got %q", paramsRaw, rest, refRest, rest)
+		}
+	})
+}
diff --git a/metropolis/pkg/bootparam/ref/BUILD.bazel b/metropolis/pkg/bootparam/ref/BUILD.bazel
new file mode 100644
index 0000000..d22540a
--- /dev/null
+++ b/metropolis/pkg/bootparam/ref/BUILD.bazel
@@ -0,0 +1,11 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "ref",
+    srcs = ["ref.go"],
+    cgo = True,
+    gc_goopts = ["-d=libfuzzer"],
+    importpath = "source.monogon.dev/metropolis/pkg/bootparam/ref",
+    visibility = ["//visibility:public"],
+    deps = ["//metropolis/pkg/bootparam"],
+)
diff --git a/metropolis/pkg/bootparam/ref/ref.go b/metropolis/pkg/bootparam/ref/ref.go
new file mode 100644
index 0000000..9842ecd
--- /dev/null
+++ b/metropolis/pkg/bootparam/ref/ref.go
@@ -0,0 +1,140 @@
+// Package ref provides the reference implementation for kernel command line
+// parsing as present in the Linux kernel. This is a separate package and
+// not part of the bootparam tests because Go does not let you use cgo in
+// tests.
+package ref
+
+// Reference implementation from the kernel
+
+/*
+#include <stdlib.h>
+#include <ctype.h>
+#include <stddef.h>
+
+#define _U	0x01
+#define _L	0x02
+#define _D	0x04
+#define _C	0x08
+#define _P	0x10
+#define _S	0x20
+#define _X	0x40
+#define _SP	0x80
+
+#define __ismask(x) (_ctype[(int)(unsigned char)(x)])
+#define kisspace(c)	((__ismask(c)&(_S)) != 0)
+
+const unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C,
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,
+_C,_C,_C,_C,_C,_C,_C,_C,
+_C,_C,_C,_C,_C,_C,_C,_C,
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,
+_P,_P,_P,_P,_P,_P,_P,_P,
+_D,_D,_D,_D,_D,_D,_D,_D,
+_D,_D,_P,_P,_P,_P,_P,_P,
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,
+_U,_U,_U,_U,_U,_U,_U,_U,
+_U,_U,_U,_U,_U,_U,_U,_U,
+_U,_U,_U,_P,_P,_P,_P,_P,
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,
+_L,_L,_L,_L,_L,_L,_L,_L,
+_L,_L,_L,_L,_L,_L,_L,_L,
+_L,_L,_L,_P,_P,_P,_P,_C,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};
+
+
+
+char *skip_spaces(const char *str)
+{
+	while (kisspace(*str))
+		++str;
+	return (char *)str;
+}
+
+
+// * Parse a string to get a param value pair.
+// * You can use " around spaces, but can't escape ".
+// * Hyphens and underscores equivalent in parameter names.
+ char *next_arg(char *args, char **param, char **val)
+ {
+	 unsigned int i, equals = 0;
+	 int in_quote = 0, quoted = 0;
+
+	 if (*args == '"') {
+		 args++;
+		 in_quote = 1;
+		 quoted = 1;
+	 }
+
+	 for (i = 0; args[i]; i++) {
+		 if (kisspace(args[i]) && !in_quote)
+			 break;
+		 if (equals == 0) {
+			 if (args[i] == '=')
+				 equals = i;
+		 }
+		 if (args[i] == '"')
+			 in_quote = !in_quote;
+	 }
+
+	 *param = args;
+	 if (!equals)
+		 *val = NULL;
+	 else {
+		 args[equals] = '\0';
+		 *val = args + equals + 1;
+
+		 // Don't include quotes in value.
+		 if (**val == '"') {
+			 (*val)++;
+			 if (args[i-1] == '"')
+				 args[i-1] = '\0';
+		 }
+	 }
+	 if (quoted && i > 0 && args[i-1] == '"')
+		 args[i-1] = '\0';
+
+	 if (args[i]) {
+		 args[i] = '\0';
+		 args += i + 1;
+	 } else
+		 args += i;
+
+	 // Chew up trailing spaces.
+	 return skip_spaces(args);
+ }
+*/
+import "C"
+import (
+	"unsafe"
+
+	"source.monogon.dev/metropolis/pkg/bootparam"
+)
+
+func Parse(str string) (params bootparam.Params, rest string) {
+	cs := C.CString(bootparam.TrimLeftSpace(str))
+	csAllocPtr := cs
+	var param, val *C.char
+	for *cs != 0 {
+		var p bootparam.Param
+		cs = C.next_arg(cs, &param, &val)
+		p.Param = C.GoString(param)
+		if val != nil {
+			p.Value = C.GoString(val)
+		}
+		if p.Param == "--" {
+			rest = C.GoString(cs)
+			return
+		}
+		params = append(params, p)
+	}
+	C.free(unsafe.Pointer(csAllocPtr))
+	return
+}