| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 1 | // Copyright The Monogon Project Authors. |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | // Package oci contains tools for handling OCI images. |
| 5 | package oci |
| 6 | |
| 7 | import ( |
| 8 | "crypto/sha256" |
| 9 | "encoding/json" |
| 10 | "fmt" |
| 11 | "io" |
| 12 | "iter" |
| 13 | "strings" |
| 14 | |
| 15 | ocispecv1 "github.com/opencontainers/image-spec/specs-go/v1" |
| 16 | |
| 17 | "source.monogon.dev/osbase/structfs" |
| 18 | ) |
| 19 | |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 20 | // Index represents an OCI image index. |
| 21 | type Index struct { |
| 22 | // Manifest contains the parsed index manifest. |
| 23 | Manifest *ocispecv1.Index |
| 24 | rawManifest []byte |
| 25 | digest string |
| 26 | blobs Blobs |
| 27 | } |
| 28 | |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 29 | // Image represents an OCI image. |
| 30 | type Image struct { |
| 31 | // Manifest contains the parsed image manifest. |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 32 | Manifest *ocispecv1.Manifest |
| 33 | rawManifest []byte |
| 34 | digest string |
| 35 | blobs Blobs |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 36 | } |
| 37 | |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 38 | // Ref is either an [*Index] or [*Image]. |
| 39 | type Ref interface { |
| 40 | // RawManifest returns the bytes of the manifest. |
| 41 | // The returned value is shared and must not be modified. |
| 42 | RawManifest() []byte |
| 43 | // Digest returns the computed digest of RawManifest, in the default digest |
| 44 | // algorithm. Only sha256 is supported currently. |
| 45 | Digest() string |
| 46 | // MediaType returns the media type of the manifest. |
| 47 | MediaType() string |
| 48 | // isRef is an unexported marker to disallow implementations of the interface |
| 49 | // outside this package. |
| 50 | isRef() |
| 51 | } |
| 52 | |
| 53 | func (i *Index) RawManifest() []byte { return i.rawManifest } |
| 54 | func (i *Index) Digest() string { return i.digest } |
| 55 | func (i *Index) MediaType() string { return ocispecv1.MediaTypeImageIndex } |
| 56 | func (i *Index) isRef() {} |
| 57 | |
| 58 | func (i *Image) RawManifest() []byte { return i.rawManifest } |
| 59 | func (i *Image) Digest() string { return i.digest } |
| 60 | func (i *Image) MediaType() string { return ocispecv1.MediaTypeImageManifest } |
| 61 | func (i *Image) isRef() {} |
| 62 | |
| 63 | // Blobs is the interface which image sources implement |
| 64 | // to retrieve the content of blobs and manifests. |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 65 | type Blobs interface { |
| 66 | // Blob returns the contents of a blob from its descriptor. |
| 67 | // It does not verify the contents against the digest. |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 68 | // |
| 69 | // This is only called on images. |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 70 | Blob(*ocispecv1.Descriptor) (io.ReadCloser, error) |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 71 | // Manifest returns the contents of a manifest from its descriptor. |
| 72 | // It does not verify the contents against the digest. |
| 73 | // |
| 74 | // This is only called on indexes. |
| 75 | Manifest(*ocispecv1.Descriptor) ([]byte, error) |
| 76 | // Blobs returns the [Blobs] for the manifest from its descriptor. |
| 77 | // Most implementations simply return the receiver itself, but this |
| 78 | // allows combining Refs from different sources into an Index. |
| 79 | // |
| 80 | // This is only called on indexes. |
| 81 | Blobs(*ocispecv1.Descriptor) (Blobs, error) |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 82 | } |
| 83 | |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 84 | // NewRef verifies the manifest against the expected digest if not empty, |
| 85 | // then parses it according to mediaType and returns a [Ref]. |
| 86 | func NewRef(rawManifest []byte, mediaType string, expectedDigest string, blobs Blobs) (Ref, error) { |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 87 | digest := fmt.Sprintf("sha256:%x", sha256.Sum256(rawManifest)) |
| 88 | if expectedDigest != "" && expectedDigest != digest { |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 89 | if _, _, err := ParseDigest(expectedDigest); err != nil { |
| 90 | return nil, err |
| 91 | } |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 92 | return nil, fmt.Errorf("failed verification of manifest: expected digest %q, computed %q", expectedDigest, digest) |
| 93 | } |
| 94 | |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 95 | switch mediaType { |
| 96 | case ocispecv1.MediaTypeImageManifest: |
| 97 | manifest := &ocispecv1.Manifest{} |
| 98 | if err := json.Unmarshal(rawManifest, manifest); err != nil { |
| 99 | return nil, fmt.Errorf("failed to parse image manifest: %w", err) |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 100 | } |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 101 | if manifest.MediaType != ocispecv1.MediaTypeImageManifest { |
| 102 | return nil, fmt.Errorf("unexpected manifest media type %q, expected %q", manifest.MediaType, ocispecv1.MediaTypeImageManifest) |
| 103 | } |
| 104 | image := &Image{ |
| 105 | Manifest: manifest, |
| 106 | rawManifest: rawManifest, |
| 107 | digest: digest, |
| 108 | blobs: blobs, |
| 109 | } |
| 110 | for descriptor := range image.Descriptors() { |
| 111 | // We validate this here such that StructfsBlob does not need an error return. |
| 112 | if descriptor.Size < 0 { |
| 113 | return nil, fmt.Errorf("invalid manifest: contains descriptor with negative size") |
| 114 | } |
| 115 | } |
| 116 | return image, nil |
| 117 | case ocispecv1.MediaTypeImageIndex: |
| 118 | manifest := &ocispecv1.Index{} |
| 119 | if err := json.Unmarshal(rawManifest, manifest); err != nil { |
| 120 | return nil, fmt.Errorf("failed to parse index manifest: %w", err) |
| 121 | } |
| 122 | if manifest.MediaType != ocispecv1.MediaTypeImageIndex { |
| 123 | return nil, fmt.Errorf("unexpected manifest media type %q, expected %q", manifest.MediaType, ocispecv1.MediaTypeImageIndex) |
| 124 | } |
| 125 | index := &Index{ |
| 126 | Manifest: manifest, |
| 127 | rawManifest: rawManifest, |
| 128 | digest: digest, |
| 129 | blobs: blobs, |
| 130 | } |
| 131 | return index, nil |
| 132 | default: |
| 133 | return nil, fmt.Errorf("unknown manifest media type %q", mediaType) |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | // AsImage can be conveniently wrapped around a call which returns a [Ref] or |
| 138 | // error, when only [*Image] can be handled. |
| 139 | func AsImage(ref Ref, err error) (*Image, error) { |
| 140 | if err != nil { |
| 141 | return nil, err |
| 142 | } |
| 143 | image, ok := ref.(*Image) |
| 144 | if !ok { |
| 145 | return nil, fmt.Errorf("unexpected manifest media type %q, only image is supported", ref.MediaType()) |
| 146 | } |
| 147 | return image, nil |
| 148 | } |
| 149 | |
| 150 | // WalkRefs iterates over all Refs reachable from ref in DFS post-order. |
| 151 | // Each digest is only visited once, even if reachable multiple times. |
| 152 | // |
| 153 | // For each Ref, we also pass the digest by which it is referenced. This may be |
| 154 | // different from ref.Digest() if we ever support multiple digest algorithms. |
| 155 | func WalkRefs(digest string, ref Ref, fn func(digest string, ref Ref) error) error { |
| 156 | visited := make(map[string]bool) |
| 157 | return walkRefs(digest, ref, fn, visited) |
| 158 | } |
| 159 | |
| 160 | func walkRefs(digest string, ref Ref, fn func(digest string, ref Ref) error, visited map[string]bool) error { |
| 161 | if visited[digest] { |
| 162 | return nil |
| 163 | } |
| 164 | visited[digest] = true |
| 165 | switch ref := ref.(type) { |
| 166 | case *Image: |
| 167 | case *Index: |
| 168 | for i := range ref.Manifest.Manifests { |
| 169 | descriptor := &ref.Manifest.Manifests[i] |
| 170 | childRef, err := ref.Ref(descriptor) |
| 171 | if err != nil { |
| 172 | return err |
| 173 | } |
| 174 | err = walkRefs(string(descriptor.Digest), childRef, fn, visited) |
| 175 | if err != nil { |
| 176 | return err |
| 177 | } |
| 178 | } |
| 179 | default: |
| 180 | return fmt.Errorf("unknown manifest media type %q", ref.MediaType()) |
| 181 | } |
| 182 | return fn(digest, ref) |
| 183 | } |
| 184 | |
| 185 | // Ref reads a manifest from its descriptor and wraps it in a [Ref]. |
| 186 | // The manifest is verified against the digest. |
| 187 | func (i *Index) Ref(descriptor *ocispecv1.Descriptor) (Ref, error) { |
| 188 | if descriptor.Size < 0 { |
| 189 | return nil, fmt.Errorf("invalid descriptor size %d", descriptor.Size) |
| 190 | } |
| 191 | if descriptor.Size > 50*1024*1024 { |
| 192 | return nil, fmt.Errorf("refusing to read manifest of size %d into memory", descriptor.Size) |
| 193 | } |
| 194 | switch descriptor.MediaType { |
| 195 | case ocispecv1.MediaTypeImageManifest: |
| 196 | case ocispecv1.MediaTypeImageIndex: |
| 197 | default: |
| 198 | return nil, fmt.Errorf("unknown manifest media type %q", descriptor.MediaType) |
| 199 | } |
| 200 | if descriptor.Digest == "" { // NewRef treats empty digest as unknown. |
| 201 | return nil, fmt.Errorf("invalid digest") |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 202 | } |
| 203 | |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 204 | var rawManifest []byte |
| 205 | if int64(len(descriptor.Data)) == descriptor.Size { |
| 206 | rawManifest = descriptor.Data |
| 207 | } else if len(descriptor.Data) != 0 { |
| 208 | return nil, fmt.Errorf("descriptor has embedded data of wrong length") |
| 209 | } else { |
| 210 | var err error |
| 211 | rawManifest, err = i.blobs.Manifest(descriptor) |
| 212 | if err != nil { |
| 213 | return nil, err |
| 214 | } |
| 215 | } |
| 216 | if int64(len(rawManifest)) != descriptor.Size { |
| 217 | return nil, fmt.Errorf("manifest has wrong length, expected %d, got %d bytes", descriptor.Size, len(rawManifest)) |
| 218 | } |
| 219 | blobs, err := i.blobs.Blobs(descriptor) |
| 220 | if err != nil { |
| 221 | return nil, err |
| 222 | } |
| 223 | return NewRef(rawManifest, descriptor.MediaType, string(descriptor.Digest), blobs) |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 224 | } |
| 225 | |
| 226 | // Descriptors returns an iterator over all descriptors in the image (config and |
| 227 | // layers). |
| 228 | func (i *Image) Descriptors() iter.Seq[*ocispecv1.Descriptor] { |
| 229 | return func(yield func(*ocispecv1.Descriptor) bool) { |
| 230 | if !yield(&i.Manifest.Config) { |
| 231 | return |
| 232 | } |
| 233 | for l := range i.Manifest.Layers { |
| 234 | if !yield(&i.Manifest.Layers[l]) { |
| 235 | return |
| 236 | } |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | // Blob returns the contents of a blob from its descriptor. |
| 242 | // It does not verify the contents against the digest. |
| 243 | func (i *Image) Blob(descriptor *ocispecv1.Descriptor) (io.ReadCloser, error) { |
| Jan Schär | 2963b68 | 2025-07-17 17:03:44 +0200 | [diff] [blame] | 244 | if descriptor.Size < 0 { |
| 245 | return nil, fmt.Errorf("invalid descriptor size %d", descriptor.Size) |
| 246 | } |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 247 | if int64(len(descriptor.Data)) == descriptor.Size { |
| 248 | return structfs.Bytes(descriptor.Data).Open() |
| 249 | } else if len(descriptor.Data) != 0 { |
| 250 | return nil, fmt.Errorf("descriptor has embedded data of wrong length") |
| 251 | } |
| 252 | return i.blobs.Blob(descriptor) |
| 253 | } |
| 254 | |
| 255 | // ReadBlobVerified reads a blob into a byte slice and verifies it against the |
| 256 | // digest. |
| 257 | func (i *Image) ReadBlobVerified(descriptor *ocispecv1.Descriptor) ([]byte, error) { |
| Jan Schär | b48174d | 2025-04-14 10:13:02 +0000 | [diff] [blame] | 258 | if descriptor.Size > 50*1024*1024 { |
| 259 | return nil, fmt.Errorf("refusing to read blob of size %d into memory", descriptor.Size) |
| 260 | } |
| 261 | expectedDigest := string(descriptor.Digest) |
| 262 | if _, _, err := ParseDigest(expectedDigest); err != nil { |
| 263 | return nil, err |
| 264 | } |
| 265 | blob, err := i.Blob(descriptor) |
| 266 | if err != nil { |
| 267 | return nil, err |
| 268 | } |
| 269 | defer blob.Close() |
| 270 | content := make([]byte, descriptor.Size) |
| 271 | _, err = io.ReadFull(blob, content) |
| 272 | if err != nil { |
| 273 | return nil, err |
| 274 | } |
| 275 | digest := fmt.Sprintf("sha256:%x", sha256.Sum256(content)) |
| 276 | if expectedDigest != digest { |
| 277 | return nil, fmt.Errorf("failed verification of blob: expected digest %q, computed %q", expectedDigest, digest) |
| 278 | } |
| 279 | return content, nil |
| 280 | } |
| 281 | |
| 282 | // StructfsBlob wraps an image and descriptor into a [structfs.Blob]. |
| 283 | func (i *Image) StructfsBlob(descriptor *ocispecv1.Descriptor) structfs.Blob { |
| 284 | return &structfsBlob{ |
| 285 | image: i, |
| 286 | descriptor: descriptor, |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | type structfsBlob struct { |
| 291 | image *Image |
| 292 | descriptor *ocispecv1.Descriptor |
| 293 | } |
| 294 | |
| 295 | func (b *structfsBlob) Open() (io.ReadCloser, error) { |
| 296 | return b.image.Blob(b.descriptor) |
| 297 | } |
| 298 | |
| 299 | func (b *structfsBlob) Size() int64 { |
| 300 | return b.descriptor.Size |
| 301 | } |
| 302 | |
| 303 | // ParseDigest splits a digest into its components. It returns an error if the |
| 304 | // algorithm is not supported, or if encoded is not valid for the algorithm. |
| 305 | func ParseDigest(digest string) (algorithm string, encoded string, err error) { |
| 306 | algorithm, encoded, ok := strings.Cut(digest, ":") |
| 307 | if !ok { |
| 308 | return "", "", fmt.Errorf("invalid digest") |
| 309 | } |
| 310 | switch algorithm { |
| 311 | case "sha256": |
| 312 | rest := strings.TrimLeft(encoded, "0123456789abcdef") |
| 313 | if len(rest) != 0 { |
| 314 | return "", "", fmt.Errorf("invalid character in sha256 digest") |
| 315 | } |
| 316 | if len(encoded) != sha256.Size*2 { |
| 317 | return "", "", fmt.Errorf("invalid sha256 digest length") |
| 318 | } |
| 319 | default: |
| 320 | return "", "", fmt.Errorf("unknown digest algorithm %q", algorithm) |
| 321 | } |
| 322 | return |
| 323 | } |