pkg/bootparam: add bootparam pkg
This adds the bootparam package which can marshal and unmarshal the Linux
kernel command line into boot parameters and a rest section passed to
init.
This is a very quirky format, thus there is a fuzz testing harness
against the reference implementation from the kernel included to verify
correctness.
A set of weird edge cases is rejected by Unmarshal instead of parsing
to nonsensical data as the reference implementation does to save on
complexity in the parser.
Change-Id: I6debfa67e69ae8db4e0356f34ecb127ea27d18de
Reviewed-on: https://review.monogon.dev/c/monogon/+/1125
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/build/analysis/nogo_config.json b/build/analysis/nogo_config.json
index 885bdef..da28532 100644
--- a/build/analysis/nogo_config.json
+++ b/build/analysis/nogo_config.json
@@ -1,7 +1,8 @@
{
"composites": {
"exclude_files": {
- "external/": "third_party"
+ "external/": "third_party",
+ "metropolis/pkg/bootparam/": "gofuzz"
}
},
"copylocks": {
diff --git a/metropolis/pkg/bootparam/BUILD.bazel b/metropolis/pkg/bootparam/BUILD.bazel
new file mode 100644
index 0000000..33a9d91
--- /dev/null
+++ b/metropolis/pkg/bootparam/BUILD.bazel
@@ -0,0 +1,19 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "bootparam",
+ srcs = ["bootparam.go"],
+ importpath = "source.monogon.dev/metropolis/pkg/bootparam",
+ visibility = ["//visibility:public"],
+)
+
+go_test(
+ name = "bootparam_test",
+ srcs = ["bootparam_test.go"],
+ gc_goopts = ["-d=libfuzzer"],
+ deps = [
+ ":bootparam",
+ "//metropolis/pkg/bootparam/ref",
+ "@com_github_google_go_cmp//cmp",
+ ],
+)
diff --git a/metropolis/pkg/bootparam/bootparam.go b/metropolis/pkg/bootparam/bootparam.go
new file mode 100644
index 0000000..3d6b7fa
--- /dev/null
+++ b/metropolis/pkg/bootparam/bootparam.go
@@ -0,0 +1,215 @@
+// Package bootparam implements encoding and decoding of Linux kernel command
+// lines as documented in
+// https://docs.kernel.org/admin-guide/kernel-parameters.html
+//
+// The format is quite quirky and thus the implementation is mostly based
+// on the code in the Linux kernel implementing the decoder and not the
+// specification.
+package bootparam
+
+import (
+ "errors"
+ "fmt"
+ "strings"
+)
+
+// Param represents a single boot parameter with or without a value
+type Param struct {
+ Param, Value string
+ HasValue bool
+}
+
+// Params represents a list of kernel boot parameters
+type Params []Param
+
+// Linux has for historical reasons an unusual definition of this function
+// Taken from @linux//lib:ctype.c
+func isSpace(r byte) bool {
+ switch r {
+ case '\t', '\n', '\v', '\f', '\r', ' ', 0xa0:
+ return true
+ default:
+ return false
+ }
+}
+
+// Trim spaces as defined by Linux from the left of the string.
+// This is only exported for tests, do not use this. Because of import loops
+// as well as cgo restrictions this cannot be an internal function used by
+// tests.
+func TrimLeftSpace(s string) string {
+ start := 0
+ for ; start < len(s); start++ {
+ c := s[start]
+ if !isSpace(c) {
+ break
+ }
+ }
+
+ return s[start:]
+}
+
+func containsSpace(s string) bool {
+ for i := 0; i < len(s); i++ {
+ if isSpace(s[i]) {
+ return true
+ }
+ }
+ return false
+}
+
+func parseToken(token string) (p Param, err error) {
+ if strings.HasPrefix(token, `=`) || strings.HasPrefix(token, `"=`) {
+ return Param{}, errors.New("param contains `=` at first position, this causes broken behavior")
+ }
+ param, value, hasValue := strings.Cut(token, "=")
+
+ if strings.HasPrefix(param, `"`) {
+ p.Param = strings.TrimPrefix(param, `"`)
+ if !hasValue {
+ p.Param = strings.TrimSuffix(p.Param, `"`)
+ }
+ } else {
+ p.Param = param
+ }
+ if hasValue {
+ if strings.HasPrefix(value, `"`) {
+ p.Value = strings.TrimSuffix(strings.TrimPrefix(value, `"`), `"`)
+ } else if strings.HasPrefix(param, `"`) {
+ p.Value = strings.TrimSuffix(value, `"`)
+ } else {
+ p.Value = value
+ }
+ }
+ return
+}
+
+// Unmarshal decodes a Linux kernel command line and returns a list of kernel
+// parameters as well as a rest section after the "--" parsing terminator.
+func Unmarshal(cmdline string) (params Params, rest string, err error) {
+ cmdline = TrimLeftSpace(cmdline)
+ if pos := strings.IndexByte(cmdline, 0x00); pos != -1 {
+ cmdline = cmdline[:pos]
+ }
+ var lastIdx int
+ var inQuote bool
+ var p Param
+ for i := 0; i < len(cmdline); i++ {
+ if isSpace(cmdline[i]) && !inQuote {
+ token := cmdline[lastIdx:i]
+ lastIdx = i + 1
+ if TrimLeftSpace(token) == "" {
+ continue
+ }
+ p, err = parseToken(token)
+ if err != nil {
+ return
+ }
+
+ // Stop processing and return everything left as rest
+ if p.Param == "--" {
+ rest = TrimLeftSpace(cmdline[lastIdx:])
+ return
+ }
+ params = append(params, p)
+ }
+ if cmdline[i] == '"' {
+ inQuote = !inQuote
+ }
+ }
+ if len(cmdline)-lastIdx > 0 {
+ token := cmdline[lastIdx:]
+ if TrimLeftSpace(token) == "" {
+ return
+ }
+ p, err = parseToken(token)
+ if err != nil {
+ return
+ }
+
+ // Stop processing, do not set rest as there is none
+ if p.Param == "--" {
+ return
+ }
+ params = append(params, p)
+ }
+ return
+}
+
+// Marshal encodes a set of kernel parameters and an optional rest string into
+// a Linux kernel command line. It rejects data which is not encodable, which
+// includes null bytes, double quotes in params as well as characters which
+// contain 0xa0 in their UTF-8 representation (historical Linux quirk of
+// treating that as a space, inherited from Latin-1).
+func Marshal(params Params, rest string) (string, error) {
+ if strings.IndexByte(rest, 0x00) != -1 {
+ return "", errors.New("rest contains 0x00 byte, this is disallowed")
+ }
+ var strb strings.Builder
+ for _, p := range params {
+ if strings.ContainsRune(p.Param, '=') {
+ return "", fmt.Errorf("invalid '=' character in param %q", p.Param)
+ }
+ // Technically a weird subset of double quotes can be encoded, but
+ // this should probably not be done so just reject them all.
+ if strings.ContainsRune(p.Param, '"') {
+ return "", fmt.Errorf("invalid '\"' character in param %q", p.Param)
+ }
+ if strings.ContainsRune(p.Value, '"') {
+ return "", fmt.Errorf("invalid '\"' character in value %q", p.Value)
+ }
+ if strings.IndexByte(p.Param, 0x00) != -1 {
+ return "", fmt.Errorf("invalid null byte in param %q", p.Param)
+ }
+ if strings.IndexByte(p.Value, 0x00) != -1 {
+ return "", fmt.Errorf("invalid null byte in value %q", p.Value)
+ }
+ // Linux treats 0xa0 as a space, even though it is a valid UTF-8
+ // surrogate. This is unfortunate, but passing it through would
+ // break the whole command line.
+ if strings.IndexByte(p.Param, 0xa0) != -1 {
+ return "", fmt.Errorf("invalid 0xa0 byte in param %q", p.Param)
+ }
+ if strings.IndexByte(p.Value, 0xa0) != -1 {
+ return "", fmt.Errorf("invalid 0xa0 byte in value %q", p.Value)
+ }
+ if strings.ContainsRune(p.Param, '"') {
+ return "", fmt.Errorf("invalid '\"' character in value %q", p.Value)
+ }
+ // This should be allowed according to the docs, but is in fact broken.
+ if p.Value != "" && containsSpace(p.Param) {
+ return "", fmt.Errorf("param %q contains spaces and value, this is unsupported", p.Param)
+ }
+ if p.Param == "--" {
+ return "", errors.New("param '--' is reserved and cannot be used")
+ }
+ if p.Param == "" {
+ return "", errors.New("empty params are not supported")
+ }
+ if containsSpace(p.Param) {
+ strb.WriteRune('"')
+ strb.WriteString(p.Param)
+ strb.WriteRune('"')
+ } else {
+ strb.WriteString(p.Param)
+ }
+ if p.Value != "" {
+ strb.WriteRune('=')
+ if containsSpace(p.Value) {
+ strb.WriteRune('"')
+ strb.WriteString(p.Value)
+ strb.WriteRune('"')
+ } else {
+ strb.WriteString(p.Value)
+ }
+ }
+ strb.WriteRune(' ')
+ }
+ if len(rest) > 0 {
+ strb.WriteString("-- ")
+ // Starting whitespace will be dropped by the decoder anyways, do it
+ // here to make the resulting command line nicer.
+ strb.WriteString(TrimLeftSpace(rest))
+ }
+ return strb.String(), nil
+}
diff --git a/metropolis/pkg/bootparam/bootparam_test.go b/metropolis/pkg/bootparam/bootparam_test.go
new file mode 100644
index 0000000..a0032a4
--- /dev/null
+++ b/metropolis/pkg/bootparam/bootparam_test.go
@@ -0,0 +1,60 @@
+// If this is bootparam we have an import cycle
+package bootparam_test
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+
+ "source.monogon.dev/metropolis/pkg/bootparam"
+ "source.monogon.dev/metropolis/pkg/bootparam/ref"
+)
+
+// Fuzzers can be run with
+// bazel test //metropolis/pkg/bootparam:bootparam_test
+// --test_arg=-test.fuzz=FuzzMarshal
+// --test_arg=-test.fuzzcachedir=/tmp/fuzz
+// --test_arg=-test.fuzztime=60s
+
+func FuzzUnmarshal(f *testing.F) {
+ f.Add(`initrd="\test\some=value" root=yolo "definitely quoted" ro rootflags=`)
+ f.Fuzz(func(t *testing.T, a string) {
+ refOut, refRest := ref.Parse(a)
+ out, rest, err := bootparam.Unmarshal(a)
+ if err != nil {
+ return
+ }
+ if diff := cmp.Diff(refOut, out); diff != "" {
+ t.Errorf("Parse(%q): params mismatch (-want +got):\n%s", a, diff)
+ }
+ if refRest != rest {
+ t.Errorf("Parse(%q): expected rest to be %q, got %q", a, refRest, rest)
+ }
+ })
+}
+
+func FuzzMarshal(f *testing.F) {
+ // Choose delimiters which mean nothing to the parser
+ f.Add("a:b;assd:9dsf;1234", "some fancy rest")
+ f.Fuzz(func(t *testing.T, paramsRaw string, rest string) {
+ paramsSeparated := strings.Split(paramsRaw, ";")
+ var params bootparam.Params
+ for _, p := range paramsSeparated {
+ a, b, _ := strings.Cut(p, ":")
+ params = append(params, bootparam.Param{Param: a, Value: b})
+ }
+ rest = bootparam.TrimLeftSpace(rest)
+ encoded, err := bootparam.Marshal(params, rest)
+ if err != nil {
+ return // Invalid input
+ }
+ refOut, refRest := ref.Parse(encoded)
+ if diff := cmp.Diff(refOut, params); diff != "" {
+ t.Errorf("Marshal(%q): params mismatch (-want +got):\n%s", paramsRaw, diff)
+ }
+ if refRest != rest {
+ t.Errorf("Parse(%q, %q): expected rest to be %q, got %q", paramsRaw, rest, refRest, rest)
+ }
+ })
+}
diff --git a/metropolis/pkg/bootparam/ref/BUILD.bazel b/metropolis/pkg/bootparam/ref/BUILD.bazel
new file mode 100644
index 0000000..d22540a
--- /dev/null
+++ b/metropolis/pkg/bootparam/ref/BUILD.bazel
@@ -0,0 +1,11 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "ref",
+ srcs = ["ref.go"],
+ cgo = True,
+ gc_goopts = ["-d=libfuzzer"],
+ importpath = "source.monogon.dev/metropolis/pkg/bootparam/ref",
+ visibility = ["//visibility:public"],
+ deps = ["//metropolis/pkg/bootparam"],
+)
diff --git a/metropolis/pkg/bootparam/ref/ref.go b/metropolis/pkg/bootparam/ref/ref.go
new file mode 100644
index 0000000..9842ecd
--- /dev/null
+++ b/metropolis/pkg/bootparam/ref/ref.go
@@ -0,0 +1,140 @@
+// Package ref provides the reference implementation for kernel command line
+// parsing as present in the Linux kernel. This is a separate package and
+// not part of the bootparam tests because Go does not let you use cgo in
+// tests.
+package ref
+
+// Reference implementation from the kernel
+
+/*
+#include <stdlib.h>
+#include <ctype.h>
+#include <stddef.h>
+
+#define _U 0x01
+#define _L 0x02
+#define _D 0x04
+#define _C 0x08
+#define _P 0x10
+#define _S 0x20
+#define _X 0x40
+#define _SP 0x80
+
+#define __ismask(x) (_ctype[(int)(unsigned char)(x)])
+#define kisspace(c) ((__ismask(c)&(_S)) != 0)
+
+const unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C,
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,
+_C,_C,_C,_C,_C,_C,_C,_C,
+_C,_C,_C,_C,_C,_C,_C,_C,
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,
+_P,_P,_P,_P,_P,_P,_P,_P,
+_D,_D,_D,_D,_D,_D,_D,_D,
+_D,_D,_P,_P,_P,_P,_P,_P,
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,
+_U,_U,_U,_U,_U,_U,_U,_U,
+_U,_U,_U,_U,_U,_U,_U,_U,
+_U,_U,_U,_P,_P,_P,_P,_P,
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,
+_L,_L,_L,_L,_L,_L,_L,_L,
+_L,_L,_L,_L,_L,_L,_L,_L,
+_L,_L,_L,_P,_P,_P,_P,_C,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};
+
+
+
+char *skip_spaces(const char *str)
+{
+ while (kisspace(*str))
+ ++str;
+ return (char *)str;
+}
+
+
+// * Parse a string to get a param value pair.
+// * You can use " around spaces, but can't escape ".
+// * Hyphens and underscores equivalent in parameter names.
+ char *next_arg(char *args, char **param, char **val)
+ {
+ unsigned int i, equals = 0;
+ int in_quote = 0, quoted = 0;
+
+ if (*args == '"') {
+ args++;
+ in_quote = 1;
+ quoted = 1;
+ }
+
+ for (i = 0; args[i]; i++) {
+ if (kisspace(args[i]) && !in_quote)
+ break;
+ if (equals == 0) {
+ if (args[i] == '=')
+ equals = i;
+ }
+ if (args[i] == '"')
+ in_quote = !in_quote;
+ }
+
+ *param = args;
+ if (!equals)
+ *val = NULL;
+ else {
+ args[equals] = '\0';
+ *val = args + equals + 1;
+
+ // Don't include quotes in value.
+ if (**val == '"') {
+ (*val)++;
+ if (args[i-1] == '"')
+ args[i-1] = '\0';
+ }
+ }
+ if (quoted && i > 0 && args[i-1] == '"')
+ args[i-1] = '\0';
+
+ if (args[i]) {
+ args[i] = '\0';
+ args += i + 1;
+ } else
+ args += i;
+
+ // Chew up trailing spaces.
+ return skip_spaces(args);
+ }
+*/
+import "C"
+import (
+ "unsafe"
+
+ "source.monogon.dev/metropolis/pkg/bootparam"
+)
+
+func Parse(str string) (params bootparam.Params, rest string) {
+ cs := C.CString(bootparam.TrimLeftSpace(str))
+ csAllocPtr := cs
+ var param, val *C.char
+ for *cs != 0 {
+ var p bootparam.Param
+ cs = C.next_arg(cs, ¶m, &val)
+ p.Param = C.GoString(param)
+ if val != nil {
+ p.Value = C.GoString(val)
+ }
+ if p.Param == "--" {
+ rest = C.GoString(cs)
+ return
+ }
+ params = append(params, p)
+ }
+ C.free(unsafe.Pointer(csAllocPtr))
+ return
+}