osbase/oci: add package

This adds the oci package, which contains types and tools for working
with OCI images.

Change-Id: Ie2a1d82c7ac007f5d1ad47666880dbf8a8bd931d
Reviewed-on: https://review.monogon.dev/c/monogon/+/4085
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/osbase/oci/BUILD.bazel b/osbase/oci/BUILD.bazel
new file mode 100644
index 0000000..ec1d590
--- /dev/null
+++ b/osbase/oci/BUILD.bazel
@@ -0,0 +1,23 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "oci",
+    srcs = [
+        "layout.go",
+        "oci.go",
+    ],
+    importpath = "source.monogon.dev/osbase/oci",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//osbase/structfs",
+        "@com_github_opencontainers_go_digest//:go-digest",
+        "@com_github_opencontainers_image_spec//specs-go",
+        "@com_github_opencontainers_image_spec//specs-go/v1:specs-go",
+    ],
+)
+
+go_test(
+    name = "oci_test",
+    srcs = ["oci_test.go"],
+    embed = [":oci"],
+)
diff --git a/osbase/oci/layout.go b/osbase/oci/layout.go
new file mode 100644
index 0000000..128c4d1
--- /dev/null
+++ b/osbase/oci/layout.go
@@ -0,0 +1,152 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+package oci
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path"
+	"path/filepath"
+
+	"github.com/opencontainers/go-digest"
+	ocispec "github.com/opencontainers/image-spec/specs-go"
+	ocispecv1 "github.com/opencontainers/image-spec/specs-go/v1"
+
+	"source.monogon.dev/osbase/structfs"
+)
+
+// ReadLayout reads an image from an OS path to an OCI layout directory.
+func ReadLayout(path string) (*Image, error) {
+	// Read the oci-layout marker file.
+	layoutBytes, err := os.ReadFile(filepath.Join(path, "oci-layout"))
+	if err != nil {
+		return nil, err
+	}
+	layout := ocispecv1.ImageLayout{}
+	err = json.Unmarshal(layoutBytes, &layout)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse oci-layout: %w", err)
+	}
+	if layout.Version != "1.0.0" {
+		return nil, fmt.Errorf("unknown oci-layout version %q", layout.Version)
+	}
+
+	// Read the index.
+	imageIndexBytes, err := os.ReadFile(filepath.Join(path, "index.json"))
+	if err != nil {
+		return nil, err
+	}
+	imageIndex := ocispecv1.Index{}
+	err = json.Unmarshal(imageIndexBytes, &imageIndex)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse index.json: %w", err)
+	}
+	if imageIndex.MediaType != ocispecv1.MediaTypeImageIndex {
+		return nil, fmt.Errorf("unknown index.json mediaType %q", imageIndex.MediaType)
+	}
+	if len(imageIndex.Manifests) == 0 {
+		return nil, fmt.Errorf("index.json contains no manifests")
+	}
+	if len(imageIndex.Manifests) != 1 {
+		return nil, fmt.Errorf("index.json files containing multiple manifests are not supported")
+	}
+	manifestDescriptor := &imageIndex.Manifests[0]
+	if manifestDescriptor.MediaType != ocispecv1.MediaTypeImageManifest {
+		return nil, fmt.Errorf("unexpected manifest media type %q", manifestDescriptor.MediaType)
+	}
+
+	// Read the image manifest.
+	imageManifestPath, err := layoutBlobPath(path, manifestDescriptor)
+	if err != nil {
+		return nil, err
+	}
+	imageManifestBytes, err := os.ReadFile(imageManifestPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read image manifest: %w", err)
+	}
+
+	blobs := &layoutBlobs{path: path}
+	return NewImage(imageManifestBytes, string(manifestDescriptor.Digest), blobs)
+}
+
+type layoutBlobs struct {
+	path string
+}
+
+func (r *layoutBlobs) Blob(descriptor *ocispecv1.Descriptor) (io.ReadCloser, error) {
+	blobPath, err := layoutBlobPath(r.path, descriptor)
+	if err != nil {
+		return nil, err
+	}
+	return os.Open(blobPath)
+}
+
+func layoutBlobPath(layoutPath string, descriptor *ocispecv1.Descriptor) (string, error) {
+	algorithm, encoded, err := ParseDigest(string(descriptor.Digest))
+	if err != nil {
+		return "", fmt.Errorf("failed to parse digest in image manifest: %w", err)
+	}
+	return filepath.Join(layoutPath, "blobs", algorithm, encoded), nil
+}
+
+// CreateLayout builds an OCI layout from an Image.
+func CreateLayout(image *Image) (structfs.Tree, error) {
+	// Build the index.
+	artifactType := image.Manifest.Config.MediaType
+	if artifactType == ocispecv1.MediaTypeImageConfig {
+		artifactType = ""
+	}
+	imageIndex := ocispecv1.Index{
+		Versioned: ocispec.Versioned{SchemaVersion: 2},
+		MediaType: ocispecv1.MediaTypeImageIndex,
+		Manifests: []ocispecv1.Descriptor{{
+			MediaType:    ocispecv1.MediaTypeImageManifest,
+			ArtifactType: artifactType,
+			Digest:       digest.Digest(image.ManifestDigest),
+			Size:         int64(len(image.RawManifest)),
+		}},
+	}
+	imageIndexBytes, err := json.MarshalIndent(imageIndex, "", "\t")
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal image index: %w", err)
+	}
+	imageIndexBytes = append(imageIndexBytes, '\n')
+
+	root := structfs.Tree{
+		structfs.File("oci-layout", structfs.Bytes(`{"imageLayoutVersion": "1.0.0"}`+"\n")),
+		structfs.File("index.json", structfs.Bytes(imageIndexBytes)),
+	}
+
+	algorithm, encoded, err := ParseDigest(image.ManifestDigest)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse manifest digest: %w", err)
+	}
+	imageManifestPath := path.Join("blobs", algorithm, encoded)
+	err = root.PlaceFile(imageManifestPath, structfs.Bytes(image.RawManifest))
+	if err != nil {
+		return nil, err
+	}
+
+	hasBlob := map[string]bool{}
+	for descriptor := range image.Descriptors() {
+		algorithm, encoded, err := ParseDigest(string(descriptor.Digest))
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse digest in image manifest: %w", err)
+		}
+		blobPath := path.Join("blobs", algorithm, encoded)
+		if hasBlob[blobPath] {
+			// If multiple blobs have the same hash, we only need the first one.
+			continue
+		}
+		hasBlob[blobPath] = true
+		err = root.PlaceFile(blobPath, image.StructfsBlob(descriptor))
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return root, nil
+}
diff --git a/osbase/oci/oci.go b/osbase/oci/oci.go
new file mode 100644
index 0000000..a62b527
--- /dev/null
+++ b/osbase/oci/oci.go
@@ -0,0 +1,168 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package oci contains tools for handling OCI images.
+package oci
+
+import (
+	"crypto/sha256"
+	"encoding/json"
+	"fmt"
+	"io"
+	"iter"
+	"strings"
+
+	ocispecv1 "github.com/opencontainers/image-spec/specs-go/v1"
+
+	"source.monogon.dev/osbase/structfs"
+)
+
+// Image represents an OCI image.
+type Image struct {
+	// Manifest contains the parsed image manifest.
+	Manifest *ocispecv1.Manifest
+	// RawManifest contains the bytes of the image manifest.
+	RawManifest []byte
+	// ManifestDigest contains the computed digest of RawManifest.
+	ManifestDigest string
+
+	blobs Blobs
+}
+
+// Blobs is the interface which image sources implement to retrieve the content
+// of blobs.
+type Blobs interface {
+	// Blob returns the contents of a blob from its descriptor.
+	// It does not verify the contents against the digest.
+	Blob(*ocispecv1.Descriptor) (io.ReadCloser, error)
+}
+
+// NewImage verifies the manifest against the expected digest if not empty,
+// then parses it and returns an [Image].
+func NewImage(rawManifest []byte, expectedDigest string, blobs Blobs) (*Image, error) {
+	digest := fmt.Sprintf("sha256:%x", sha256.Sum256(rawManifest))
+	if expectedDigest != "" && expectedDigest != digest {
+		return nil, fmt.Errorf("failed verification of manifest: expected digest %q, computed %q", expectedDigest, digest)
+	}
+
+	manifest := &ocispecv1.Manifest{}
+	err := json.Unmarshal(rawManifest, &manifest)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse image manifest: %w", err)
+	}
+	if manifest.MediaType != ocispecv1.MediaTypeImageManifest {
+		return nil, fmt.Errorf("unexpected manifest media type %q", manifest.MediaType)
+	}
+	image := &Image{
+		Manifest:       manifest,
+		RawManifest:    rawManifest,
+		ManifestDigest: digest,
+		blobs:          blobs,
+	}
+	for descriptor := range image.Descriptors() {
+		if descriptor.Size < 0 {
+			return nil, fmt.Errorf("invalid manifest: contains descriptor with negative size")
+		}
+	}
+
+	return image, nil
+}
+
+// Descriptors returns an iterator over all descriptors in the image (config and
+// layers).
+func (i *Image) Descriptors() iter.Seq[*ocispecv1.Descriptor] {
+	return func(yield func(*ocispecv1.Descriptor) bool) {
+		if !yield(&i.Manifest.Config) {
+			return
+		}
+		for l := range i.Manifest.Layers {
+			if !yield(&i.Manifest.Layers[l]) {
+				return
+			}
+		}
+	}
+}
+
+// Blob returns the contents of a blob from its descriptor.
+// It does not verify the contents against the digest.
+func (i *Image) Blob(descriptor *ocispecv1.Descriptor) (io.ReadCloser, error) {
+	if int64(len(descriptor.Data)) == descriptor.Size {
+		return structfs.Bytes(descriptor.Data).Open()
+	} else if len(descriptor.Data) != 0 {
+		return nil, fmt.Errorf("descriptor has embedded data of wrong length")
+	}
+	return i.blobs.Blob(descriptor)
+}
+
+// ReadBlobVerified reads a blob into a byte slice and verifies it against the
+// digest.
+func (i *Image) ReadBlobVerified(descriptor *ocispecv1.Descriptor) ([]byte, error) {
+	if descriptor.Size < 0 {
+		return nil, fmt.Errorf("invalid descriptor size %d", descriptor.Size)
+	}
+	if descriptor.Size > 50*1024*1024 {
+		return nil, fmt.Errorf("refusing to read blob of size %d into memory", descriptor.Size)
+	}
+	expectedDigest := string(descriptor.Digest)
+	if _, _, err := ParseDigest(expectedDigest); err != nil {
+		return nil, err
+	}
+	blob, err := i.Blob(descriptor)
+	if err != nil {
+		return nil, err
+	}
+	defer blob.Close()
+	content := make([]byte, descriptor.Size)
+	_, err = io.ReadFull(blob, content)
+	if err != nil {
+		return nil, err
+	}
+	digest := fmt.Sprintf("sha256:%x", sha256.Sum256(content))
+	if expectedDigest != digest {
+		return nil, fmt.Errorf("failed verification of blob: expected digest %q, computed %q", expectedDigest, digest)
+	}
+	return content, nil
+}
+
+// StructfsBlob wraps an image and descriptor into a [structfs.Blob].
+func (i *Image) StructfsBlob(descriptor *ocispecv1.Descriptor) structfs.Blob {
+	return &structfsBlob{
+		image:      i,
+		descriptor: descriptor,
+	}
+}
+
+type structfsBlob struct {
+	image      *Image
+	descriptor *ocispecv1.Descriptor
+}
+
+func (b *structfsBlob) Open() (io.ReadCloser, error) {
+	return b.image.Blob(b.descriptor)
+}
+
+func (b *structfsBlob) Size() int64 {
+	return b.descriptor.Size
+}
+
+// ParseDigest splits a digest into its components. It returns an error if the
+// algorithm is not supported, or if encoded is not valid for the algorithm.
+func ParseDigest(digest string) (algorithm string, encoded string, err error) {
+	algorithm, encoded, ok := strings.Cut(digest, ":")
+	if !ok {
+		return "", "", fmt.Errorf("invalid digest")
+	}
+	switch algorithm {
+	case "sha256":
+		rest := strings.TrimLeft(encoded, "0123456789abcdef")
+		if len(rest) != 0 {
+			return "", "", fmt.Errorf("invalid character in sha256 digest")
+		}
+		if len(encoded) != sha256.Size*2 {
+			return "", "", fmt.Errorf("invalid sha256 digest length")
+		}
+	default:
+		return "", "", fmt.Errorf("unknown digest algorithm %q", algorithm)
+	}
+	return
+}
diff --git a/osbase/oci/oci_test.go b/osbase/oci/oci_test.go
new file mode 100644
index 0000000..93976ed
--- /dev/null
+++ b/osbase/oci/oci_test.go
@@ -0,0 +1,96 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+package oci
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+)
+
+func TestEmbeddedContent(t *testing.T) {
+	manifest := `{
+	"schemaVersion": 2,
+	"mediaType": "application/vnd.oci.image.manifest.v1+json",
+	"config": {
+		"mediaType": "application/vnd.oci.empty.v1+json",
+		"digest": "sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a",
+		"size": 2,
+		"data": "e30="
+	},
+	"layers": [
+		{
+			"digest": "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
+			"size": 0
+		},
+		{
+			"digest": "sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff80",
+			"size": 2,
+			"data": "e30="
+		}
+	]
+}`
+	// Pass nil for blobs, which means reading can only work if it uses the
+	// embedded content.
+	image, err := NewImage([]byte(manifest), "", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	configBytes, err := image.ReadBlobVerified(&image.Manifest.Config)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got, want := string(configBytes), "{}"; got != want {
+		t.Errorf("Got config %q, expected %q", got, want)
+	}
+	layerBytes, err := image.ReadBlobVerified(&image.Manifest.Layers[0])
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(layerBytes) != 0 {
+		t.Errorf("Got layer %q, expected to be empty", layerBytes)
+	}
+	// Layer 1 has a wrong digest.
+	_, err = image.ReadBlobVerified(&image.Manifest.Layers[1])
+	if !strings.Contains(fmt.Sprintf("%v", err), "failed verification") {
+		t.Errorf("Expected failed verification, got %v", err)
+	}
+}
+
+func TestParseDigest(t *testing.T) {
+	testCases := []struct {
+		input     string
+		algorithm string
+		encoded   string
+		err       string
+	}{
+		{input: "", err: `invalid digest`},
+		{input: "1234", err: `invalid digest`},
+		{input: "x:y", err: `unknown digest algorithm "x"`},
+		{input: "sha256:1234", err: `invalid sha256 digest length`},
+		{input: "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b8550", err: `invalid sha256 digest length`},
+		{input: "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b85x", err: `invalid character in sha256 digest`},
+		{
+			input:     "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
+			algorithm: "sha256",
+			encoded:   "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
+		},
+	}
+	for _, tC := range testCases {
+		algorithm, encoded, err := ParseDigest(tC.input)
+		if algorithm != tC.algorithm {
+			t.Errorf("ParseDigest(%q): algorithm = %q, expected %q", tC.input, algorithm, tC.algorithm)
+		}
+		if encoded != tC.encoded {
+			t.Errorf("ParseDigest(%q): encoded = %q, expected %q", tC.input, encoded, tC.encoded)
+		}
+		errStr := ""
+		if err != nil {
+			errStr = err.Error()
+		}
+		if errStr != tC.err {
+			t.Errorf("ParseDigest(%q): err = %q, expected %q", tC.input, errStr, tC.err)
+		}
+	}
+}