osbase/structfs: add package

This adds the structfs package, which defines a data structure for a
file system and a blob interface.

Change-Id: I646205faf7d28ec016d2038b0a8931b64e7afc83
Reviewed-on: https://review.monogon.dev/c/monogon/+/4036
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/osbase/structfs/BUILD.bazel b/osbase/structfs/BUILD.bazel
new file mode 100644
index 0000000..aba32ba
--- /dev/null
+++ b/osbase/structfs/BUILD.bazel
@@ -0,0 +1,17 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "structfs",
+    srcs = [
+        "blob.go",
+        "structfs.go",
+    ],
+    importpath = "source.monogon.dev/osbase/structfs",
+    visibility = ["//visibility:public"],
+)
+
+go_test(
+    name = "structfs_test",
+    srcs = ["structfs_test.go"],
+    deps = [":structfs"],
+)
diff --git a/osbase/structfs/blob.go b/osbase/structfs/blob.go
new file mode 100644
index 0000000..74fd0f5
--- /dev/null
+++ b/osbase/structfs/blob.go
@@ -0,0 +1,68 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+package structfs
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"io/fs"
+	"os"
+)
+
+// Blob is a binary large object, a read-only sequence of bytes of a known size.
+type Blob interface {
+	Open() (io.ReadCloser, error)
+	Size() int64
+}
+
+// Bytes implements [Blob] for a byte slice.
+type Bytes []byte
+
+func (b Bytes) Open() (io.ReadCloser, error) {
+	return &bytesReadCloser{*bytes.NewReader(b)}, nil
+}
+
+func (b Bytes) Size() int64 {
+	return int64(len(b))
+}
+
+type bytesReadCloser struct {
+	bytes.Reader
+}
+
+func (*bytesReadCloser) Close() error {
+	return nil
+}
+
+var errNotRegular = errors.New("not a regular file")
+
+// OSPathBlob creates a [Blob] for an OS path.
+func OSPathBlob(path string) (Blob, error) {
+	info, err := os.Stat(path)
+	if err != nil {
+		return nil, err
+	}
+	if !info.Mode().IsRegular() {
+		return nil, &fs.PathError{Op: "blob", Path: path, Err: errNotRegular}
+	}
+	b := &osPathBlob{
+		path: path,
+		size: info.Size(),
+	}
+	return b, nil
+}
+
+type osPathBlob struct {
+	path string
+	size int64
+}
+
+func (b *osPathBlob) Open() (io.ReadCloser, error) {
+	return os.Open(b.path)
+}
+
+func (b *osPathBlob) Size() int64 {
+	return b.size
+}
diff --git a/osbase/structfs/structfs.go b/osbase/structfs/structfs.go
new file mode 100644
index 0000000..ab77e80
--- /dev/null
+++ b/osbase/structfs/structfs.go
@@ -0,0 +1,215 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package structfs defines a data structure for a file system, similar to the
+// [fs] package but based on structs instead of interfaces.
+//
+// The entire tree structure and directory entry metadata is stored in memory.
+// File content is represented with [Blob], and may come from various sources.
+package structfs
+
+import (
+	"io/fs"
+	"iter"
+	pathlib "path"
+	"strings"
+	"syscall"
+	"time"
+	"unicode/utf8"
+)
+
+// Tree represents a file system tree.
+type Tree []*Node
+
+// Node is a node in a file system tree, which is either a file or directory.
+type Node struct {
+	// Name of this node, which must be valid according to [ValidName].
+	Name string
+	// Mode contains the file type and permissions.
+	Mode fs.FileMode
+	// ModTime is the modification time.
+	ModTime time.Time
+	// Content is the file content, must be set for regular files.
+	Content Blob
+	// Children of a directory, must be empty if this is not a directory.
+	Children Tree
+	// Sys contains any system-specific directory entry fields.
+	//
+	// It should be accessed using interface type assertions, to allow combining
+	// information for multiple target systems with struct embedding.
+	Sys any
+}
+
+type Option func(*Node)
+
+// WithModTime sets the ModTime of the Node.
+func WithModTime(t time.Time) Option {
+	return func(n *Node) {
+		n.ModTime = t
+	}
+}
+
+const permMask = fs.ModePerm | fs.ModeSetuid | fs.ModeSetgid | fs.ModeSticky
+
+// WithPerm sets the permission bits of the Node.
+func WithPerm(perm fs.FileMode) Option {
+	return func(n *Node) {
+		n.Mode = (n.Mode & ^permMask) | (perm & permMask)
+	}
+}
+
+// WithSys sets the Sys field of the Node.
+func WithSys(sys any) Option {
+	return func(n *Node) {
+		n.Sys = sys
+	}
+}
+
+// File creates a regular file node with the given name and content.
+//
+// Permission defaults to 644.
+func File(name string, content Blob, opts ...Option) *Node {
+	n := &Node{
+		Name:    name,
+		Mode:    0o644,
+		Content: content,
+	}
+	for _, f := range opts {
+		f(n)
+	}
+	return n
+}
+
+// Dir creates a directory node with the given name and children.
+//
+// Permission defaults to 755.
+func Dir(name string, children Tree, opts ...Option) *Node {
+	n := &Node{
+		Name:     name,
+		Mode:     fs.ModeDir | 0o755,
+		Children: children,
+	}
+	for _, f := range opts {
+		f(n)
+	}
+	return n
+}
+
+// PlaceFile creates parent directories if necessary and places a file with the
+// given content at the path. It fails if path already exists.
+func (t *Tree) PlaceFile(path string, content Blob, opts ...Option) error {
+	path, name, err := splitPlacePath(path)
+	if err != nil {
+		return err
+	}
+	return t.Place(path, File(name, content, opts...))
+}
+
+// PlaceDir creates parent directories if necessary and places a directory with
+// the given children at the path. It fails if path already exists.
+func (t *Tree) PlaceDir(path string, children Tree, opts ...Option) error {
+	path, name, err := splitPlacePath(path)
+	if err != nil {
+		return err
+	}
+	return t.Place(path, Dir(name, children, opts...))
+}
+
+func splitPlacePath(path string) (dir string, name string, err error) {
+	if !fs.ValidPath(path) || path == "." {
+		return "", "", &fs.PathError{Op: "place", Path: path, Err: fs.ErrInvalid}
+	}
+	dir, name = pathlib.Split(path)
+	if dir == "" {
+		dir = "."
+	} else {
+		dir = dir[:len(dir)-1]
+	}
+	return
+}
+
+// Place creates directories if necessary and places the node in the directory
+// at the path.
+//
+// The special path "." indicates the root.
+func (t *Tree) Place(path string, node *Node) error {
+	if !fs.ValidPath(path) {
+		return &fs.PathError{Op: "place", Path: path, Err: fs.ErrInvalid}
+	}
+	treeRef := t
+	if path != "." {
+		pathlen := 0
+	outer:
+		for name := range strings.SplitSeq(path, "/") {
+			pathlen += len(name) + 1
+			for _, nodeRef := range *treeRef {
+				if nodeRef.Name == name {
+					if !nodeRef.Mode.IsDir() {
+						return &fs.PathError{Op: "mkdir", Path: path[:pathlen-1], Err: syscall.ENOTDIR}
+					}
+					treeRef = &nodeRef.Children
+					continue outer
+				}
+			}
+			dir := Dir(name, nil)
+			*treeRef = append(*treeRef, dir)
+			treeRef = &dir.Children
+		}
+	}
+	for _, nodeRef := range *treeRef {
+		if nodeRef.Name == node.Name {
+			return &fs.PathError{Op: "place", Path: path + "/" + nodeRef.Name, Err: fs.ErrExist}
+		}
+	}
+	*treeRef = append(*treeRef, node)
+	return nil
+}
+
+// Walk returns an iterator over all nodes in the tree in DFS pre-order.
+// The key is the path of the node.
+//
+// Entries with invalid name are skipped.
+func (t Tree) Walk() iter.Seq2[string, *Node] {
+	return func(yield func(string, *Node) bool) {
+		walk(t, ".", yield)
+	}
+}
+
+func walk(t Tree, path string, yield func(string, *Node) bool) bool {
+	for _, node := range t {
+		if !ValidName(node.Name) {
+			// Skip entries with invalid name.
+			continue
+		}
+		nodePath := node.Name
+		if path != "." {
+			nodePath = path + "/" + nodePath
+		}
+		if !yield(nodePath, node) {
+			return false
+		}
+		if node.Mode.IsDir() {
+			if !walk(node.Children, nodePath, yield) {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+// ValidName reports whether the given name is a valid node name.
+//
+// The name must be UTF-8-encoded, must not be empty, "." or "..", and must not
+// contain "/". These are the same rules as for a path element in [fs.ValidPath].
+func ValidName(name string) bool {
+	if !utf8.ValidString(name) {
+		return false
+	}
+	if name == "" || name == "." || name == ".." {
+		return false
+	}
+	if strings.ContainsRune(name, '/') {
+		return false
+	}
+	return true
+}
diff --git a/osbase/structfs/structfs_test.go b/osbase/structfs/structfs_test.go
new file mode 100644
index 0000000..e28f9c5
--- /dev/null
+++ b/osbase/structfs/structfs_test.go
@@ -0,0 +1,300 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+package structfs_test
+
+import (
+	"errors"
+	"io"
+	"io/fs"
+	"slices"
+	"syscall"
+	"testing"
+	"time"
+
+	. "source.monogon.dev/osbase/structfs"
+)
+
+func TestOptions(t *testing.T) {
+	testTimestamp := time.Date(2022, 03, 04, 5, 6, 8, 0, time.UTC)
+	var tree Tree
+	tree.PlaceDir("dir", Tree{},
+		WithModTime(testTimestamp),
+		WithPerm(0o700|fs.ModeSetuid|fs.ModeDevice),
+		WithSys("fakesys"),
+	)
+	node := tree[0]
+	if node.ModTime != testTimestamp {
+		t.Errorf("Got ModTime %v, expected %v", node.ModTime, testTimestamp)
+	}
+	expectMode := 0o700 | fs.ModeSetuid | fs.ModeDir
+	if node.Mode != expectMode {
+		t.Errorf("Got Mode %s, expected %s", node.Mode, expectMode)
+	}
+	if node.Sys != "fakesys" {
+		t.Errorf("Got Sys %v, expected %v", node.Sys, "fakesys")
+	}
+}
+
+func treeToStrings(t *testing.T, tree Tree) []string {
+	var out []string
+	for path, node := range tree.Walk() {
+		s := path + " " + node.Mode.String()[:1]
+		if node.Mode.IsRegular() {
+			content, err := node.Content.Open()
+			if err != nil {
+				t.Errorf("Failed to open %q: %v", path, err)
+				continue
+			}
+			b, err := io.ReadAll(content)
+			if err != nil {
+				t.Errorf("Failed to read %q: %v", path, err)
+				continue
+			}
+			s += " " + string(b)
+			content.Close()
+		}
+		out = append(out, s)
+	}
+	return out
+}
+
+func TestWalk(t *testing.T) {
+	testCases := []struct {
+		desc     string
+		tree     Tree
+		expected []string
+	}{
+		{
+			desc: "example",
+			tree: Tree{
+				File("file1a", Bytes("content1a")),
+				Dir("dir1", Tree{
+					File("file2", Bytes("content2")),
+					Dir("dir2", nil),
+				}),
+				File("file1b", Bytes("content1b")),
+			},
+			expected: []string{
+				"file1a - content1a",
+				"dir1 d",
+				"dir1/file2 - content2",
+				"dir1/dir2 d",
+				"file1b - content1b",
+			},
+		},
+		{
+			desc:     "empty",
+			tree:     nil,
+			expected: nil,
+		},
+		{
+			desc: "ignore file children",
+			// Non-directories should not have children and Walk should ignore them.
+			tree: Tree{{
+				Name:    "file1",
+				Content: Bytes("content1"),
+				Children: Tree{
+					File("file2", Bytes("content2")),
+					Dir("dir2", nil),
+				},
+			}},
+			expected: []string{
+				"file1 - content1",
+			},
+		},
+		{
+			desc: "skip invalid name",
+			tree: Tree{
+				File("", Bytes("invalid")),
+				File(".", Bytes("invalid")),
+				File("a/b", Bytes("invalid")),
+				File("file1a", Bytes("content1a")),
+				Dir("..", Tree{
+					File("file2", Bytes("content2")),
+					Dir("dir2", nil),
+				}),
+				File("file1b", Bytes("content1b")),
+			},
+			expected: []string{
+				"file1a - content1a",
+				"file1b - content1b",
+			},
+		},
+	}
+	for _, tC := range testCases {
+		t.Run(tC.desc, func(t *testing.T) {
+			actual := treeToStrings(t, tC.tree)
+			if !slices.Equal(actual, tC.expected) {
+				t.Errorf("Walk result %v differs from expected %v", actual, tC.expected)
+			}
+		})
+	}
+}
+
+func TestPlace(t *testing.T) {
+	testCases := []struct {
+		desc     string
+		tree     Tree
+		place    func(*Tree) error
+		expected []string
+		err      error
+		errPath  string
+	}{
+		{
+			desc: "file",
+			tree: Tree{
+				File("file1a", Bytes("content1a")),
+				Dir("dir1", Tree{
+					File("file2", Bytes("content2")),
+					Dir("dir2", nil),
+				}),
+				File("file1b", Bytes("content1b")),
+			},
+			place: func(tree *Tree) error {
+				return tree.PlaceFile("dir1/dir3/file4", Bytes("content4"))
+			},
+			expected: []string{
+				"file1a - content1a",
+				"dir1 d",
+				"dir1/file2 - content2",
+				"dir1/dir2 d",
+				"dir1/dir3 d",
+				"dir1/dir3/file4 - content4",
+				"file1b - content1b",
+			},
+		},
+		{
+			desc: "dir",
+			tree: Tree{
+				File("file1a", Bytes("content1a")),
+				Dir("dir1", Tree{
+					File("file2", Bytes("content2")),
+				}),
+			},
+			place: func(tree *Tree) error {
+				return tree.PlaceDir("dir1/dir3", Tree{
+					File("file4", Bytes("content4")),
+					Dir("dir4", nil),
+				})
+			},
+			expected: []string{
+				"file1a - content1a",
+				"dir1 d",
+				"dir1/file2 - content2",
+				"dir1/dir3 d",
+				"dir1/dir3/file4 - content4",
+				"dir1/dir3/dir4 d",
+			},
+		},
+		{
+			desc: "empty",
+			tree: nil,
+			place: func(tree *Tree) error {
+				return tree.PlaceFile("dir1/dir2/file3", Bytes("content"))
+			},
+			expected: []string{
+				"dir1 d",
+				"dir1/dir2 d",
+				"dir1/dir2/file3 - content",
+			},
+		},
+		{
+			desc: "root",
+			tree: Tree{
+				File("file1", Bytes("content1")),
+			},
+			place: func(tree *Tree) error {
+				return tree.PlaceFile("file2", Bytes("content2"))
+			},
+			expected: []string{
+				"file1 - content1",
+				"file2 - content2",
+			},
+		},
+		{
+			desc: "invalid path",
+			place: func(tree *Tree) error {
+				return tree.PlaceFile(".", Bytes("content"))
+			},
+			err:     fs.ErrInvalid,
+			errPath: ".",
+		},
+		{
+			desc: "not a directory",
+			tree: Tree{
+				Dir("dir1", Tree{
+					File("file2", Bytes("content")),
+				}),
+			},
+			place: func(tree *Tree) error {
+				return tree.PlaceFile("dir1/file2/dir3/file4", Bytes("content"))
+			},
+			err:     syscall.ENOTDIR,
+			errPath: "dir1/file2",
+		},
+		{
+			desc: "already exists",
+			tree: Tree{
+				Dir("dir1", Tree{
+					Dir("dir2", nil),
+				}),
+			},
+			place: func(tree *Tree) error {
+				return tree.PlaceDir("dir1/dir2", nil)
+			},
+			err:     fs.ErrExist,
+			errPath: "dir1/dir2",
+		},
+	}
+	for _, tC := range testCases {
+		t.Run(tC.desc, func(t *testing.T) {
+			err := tC.place(&tC.tree)
+			if err != nil {
+				if tC.err == nil {
+					t.Fatalf("Place failed unexpectedly: %v", err)
+				}
+				if !errors.Is(err, tC.err) {
+					t.Errorf("Place failed with error %v, expected %v", err, tC.err)
+				}
+				var pe *fs.PathError
+				if !errors.As(err, &pe) {
+					t.Fatalf("Place(): error is %T, want *fs.PathError", err)
+				}
+				if pe.Path != tC.errPath {
+					t.Errorf("Place(): err.Path = %q, want %q", pe.Path, tC.errPath)
+				}
+			} else if tC.err != nil {
+				t.Error("Expected place to fail but it did not")
+			} else {
+				actual := treeToStrings(t, tC.tree)
+				if !slices.Equal(actual, tC.expected) {
+					t.Errorf("Result %v differs from expected %v", actual, tC.expected)
+				}
+			}
+		})
+	}
+}
+
+func TestValidName(t *testing.T) {
+	isValidNameTests := []struct {
+		name string
+		ok   bool
+	}{
+		{"x", true},
+		{"", false},
+		{"..", false},
+		{".", false},
+		{"x/y", false},
+		{"/", false},
+		{"x/", false},
+		{"/x", false},
+		{`x\y`, true},
+	}
+	for _, tt := range isValidNameTests {
+		ok := ValidName(tt.name)
+		if ok != tt.ok {
+			t.Errorf("ValidName(%q) = %v, want %v", tt.name, ok, tt.ok)
+		}
+	}
+}