third_party/sandboxroot: add mirror tool
This mirrors the sandbox RPMs into a GCS bucket any time we regenerate
it. Hopefully this stops the constant barrage of random 404s when Fedora
just happened to bump a library and all the mirrors lost its previous
version.
This tool is currently specific to our bazeldnf-based sandboxroot setup,
but could be extended to mirror all of our dependencies at some point.
As our mirror is the last in the list, it should only be used when a
file is missing from other mirrors. In the future, we should have some
job that alerts us when too many of our deps are missing from upstream
mirrors.
Change-Id: I08ccbdf99ec868363918e30f3d2ae94f463e045f
Reviewed-on: https://review.monogon.dev/c/monogon/+/1473
Tested-by: Jenkins CI
Reviewed-by: Leopold Schabel <leo@monogon.tech>
diff --git a/go.mod b/go.mod
index eb53e74..f7a076c 100644
--- a/go.mod
+++ b/go.mod
@@ -66,6 +66,7 @@
replace github.com/vishvananda/netlink => github.com/monogon-dev/netlink v0.0.0-20230125113930-88977c3ff4b3
require (
+ cloud.google.com/go/storage v1.28.0
github.com/adrg/xdg v0.4.0
github.com/bazelbuild/rules_go v0.30.0
github.com/cavaliergopher/cpio v1.0.1
@@ -147,6 +148,11 @@
)
require (
+ cloud.google.com/go v0.107.0 // indirect
+ cloud.google.com/go/iam v0.8.0 // indirect
+)
+
+require (
cloud.google.com/go/compute v1.18.0 // indirect
cloud.google.com/go/compute/metadata v0.2.3 // indirect
github.com/Azure/azure-sdk-for-go v63.4.0+incompatible // indirect
@@ -376,7 +382,7 @@
go.opentelemetry.io/otel/sdk/metric v0.20.0 // indirect
go.opentelemetry.io/otel/trace v1.3.0 // indirect
go.opentelemetry.io/proto/otlp v0.11.0 // indirect
- go.starlark.net v0.0.0-20210223155950-e043a3d3c984 // indirect
+ go.starlark.net v0.0.0-20210223155950-e043a3d3c984
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/zap v1.19.1 // indirect
golang.org/x/arch v0.0.0-20190927153633-4e8777c89be4 // indirect
diff --git a/go.sum b/go.sum
index ce89360..071fb34 100644
--- a/go.sum
+++ b/go.sum
@@ -41,6 +41,7 @@
cloud.google.com/go v0.98.0/go.mod h1:ua6Ush4NALrHk5QXDWnjvZHN93OuF0HfuEPq9I1X0cM=
cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA=
cloud.google.com/go v0.107.0 h1:qkj22L7bgkl6vIeZDlOY2po43Mx/TIa2Wsa7VR+PEww=
+cloud.google.com/go v0.107.0/go.mod h1:wpc2eNrD7hXUTy8EKS10jkxpZBjASrORK7goS+3YX2I=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
@@ -54,6 +55,8 @@
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk=
+cloud.google.com/go/iam v0.8.0 h1:E2osAkZzxI/+8pZcxVLcDtAQx/u+hZXVryUaYQ5O0Kk=
+cloud.google.com/go/iam v0.8.0/go.mod h1:lga0/y3iH6CX7sYqypWJ33hf7kkfXJag67naqGESjkE=
cloud.google.com/go/longrunning v0.3.0 h1:NjljC+FYPV3uh5/OwWT6pVU+doBqMg2x/rZlE+CamDs=
cloud.google.com/go/monitoring v0.1.0/go.mod h1:Hpm3XfzJv+UTiXzCG5Ffp0wijzHTC7Cv4eR7o3x/fEE=
cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
@@ -72,6 +75,8 @@
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
+cloud.google.com/go/storage v1.28.0 h1:DLrIZ6xkeZX6K70fU/boWx5INJumt6f+nwwWSHXzzGY=
+cloud.google.com/go/storage v1.28.0/go.mod h1:qlgZML35PXA3zoEnIkiPLY4/TOkUleufRlu6qmcf7sI=
cloud.google.com/go/trace v0.1.0/go.mod h1:wxEwsoeRVPbeSkt7ZC9nWCgmoKQRAoySN7XHW2AmI7g=
code.gitea.io/sdk/gitea v0.11.3/go.mod h1:z3uwDV/b9Ls47NGukYM9XhnHtqPh/J+t40lsUrR6JDY=
contrib.go.opencensus.io/exporter/aws v0.0.0-20181029163544-2befc13012d0/go.mod h1:uu1P0UCM/6RbsMrgPa98ll8ZcHM858i/AD06a9aLRCA=
@@ -1044,9 +1049,11 @@
github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
github.com/google/licenseclassifier v0.0.0-20210325184830-bb04aff29e72/go.mod h1:qsqn2hxC+vURpyBRygGUuinTO42MFRLcsmQ/P8v94+M=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
+github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible h1:xmapqc1AyLoB+ddYT6r04bD9lIjlOqGaREovi0SzFaE=
github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
+github.com/google/martian/v3 v3.2.1 h1:d8MncMlErDFTwQGBK1xhv026j9kqhvw1Qv9IbWT1VLQ=
github.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk=
github.com/google/nftables v0.0.0-20190906062827-5d14089d2edc/go.mod h1:DfTD7lq9Gq5pLrgCmJDbGtrcWF/h7i5XWgEC/6bQu0s=
github.com/google/nftables v0.0.0-20200316075819-7127d9d22474/go.mod h1:cfspEyr/Ap+JDIITA+N9a0ernqG0qZ4W1aqMRgDZa1g=
diff --git a/third_party/go/repositories.bzl b/third_party/go/repositories.bzl
index 051b2b2..2b780d7 100644
--- a/third_party/go/repositories.bzl
+++ b/third_party/go/repositories.bzl
@@ -5775,8 +5775,8 @@
go_repository(
name = "com_google_cloud_go_storage",
importpath = "cloud.google.com/go/storage",
- sum = "h1:STgFzyU5/8miMl0//zKh2aQeTyeaUH3WN9bSUiJ09bA=",
- version = "v1.10.0",
+ sum = "h1:DLrIZ6xkeZX6K70fU/boWx5INJumt6f+nwwWSHXzzGY=",
+ version = "v1.28.0",
)
go_repository(
name = "com_google_cloud_go_storagetransfer",
diff --git a/third_party/sandboxroot/mirror/BUILD.bazel b/third_party/sandboxroot/mirror/BUILD.bazel
new file mode 100644
index 0000000..ea62a45
--- /dev/null
+++ b/third_party/sandboxroot/mirror/BUILD.bazel
@@ -0,0 +1,26 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+ name = "mirror_lib",
+ srcs = [
+ "bazeldnf.go",
+ "external.go",
+ "main.go",
+ ],
+ importpath = "source.monogon.dev/third_party/sandboxroot/mirror",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//build/toolbase",
+ "@com_github_cenkalti_backoff_v4//:backoff",
+ "@com_github_spf13_cobra//:cobra",
+ "@com_google_cloud_go_storage//:storage",
+ "@io_k8s_klog_v2//:klog",
+ "@net_starlark_go//starlark",
+ ],
+)
+
+go_binary(
+ name = "mirror",
+ embed = [":mirror_lib"],
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/sandboxroot/mirror/README.md b/third_party/sandboxroot/mirror/README.md
new file mode 100644
index 0000000..002d9ae
--- /dev/null
+++ b/third_party/sandboxroot/mirror/README.md
@@ -0,0 +1,37 @@
+sandboxroot mirror
+===
+
+Fedora mirrors tend to drop RPMs very quickly. As we don't want to be constantly
+chasing every single tiny update, we have decided to set up our own mirror on GCS.
+
+The mirror only contains RPMs that the sandboxroot actually uses, and is managed
+by running the `mirror` tool from this directory.
+
+Using the mirror
+---
+
+The mirror is enabled by default whenever you use Bazel (see repositories.bzl in this directory).
+
+Updating the mirror
+---
+
+Any time you run `third_party/sandboxroot/regenerate.sh`, the last step calls `mirror sync`. If that fails for some reason (eg. you were not logged into GCS), you can run it manually:
+
+```
+$ bazel run :mirror sync
+```
+
+Checking the mirror
+---
+
+If you want to just check whether everything's properly synced, you can run:
+
+```
+$ bazel run :mirror check
+```
+
+To do a full scan (downloading and checking SHA256 sums) do:
+
+```
+$ bazel run :mirror check --deep
+```
diff --git a/third_party/sandboxroot/mirror/bazeldnf.go b/third_party/sandboxroot/mirror/bazeldnf.go
new file mode 100644
index 0000000..4f85581
--- /dev/null
+++ b/third_party/sandboxroot/mirror/bazeldnf.go
@@ -0,0 +1,69 @@
+package main
+
+import (
+ "fmt"
+
+ "go.starlark.net/starlark"
+)
+
+// getBazelDNFFiles parses third_party/sandboxroot/repositories.bzl (at the given
+// path) into a list of rpmDefs. It does so by loading the .bzl file into a
+// minimal starlark interpreter that emulates enough of the Bazel internal API to
+// get things going.
+func getBazelDNFFiles(path string) ([]*rpmDef, error) {
+ var res []*rpmDef
+
+ // rpm will be called any time the Starlark code calls rpm() from
+ // @bazeldnf//:deps.bzl.
+ rpm := func(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var name, sha256 starlark.String
+ var urls *starlark.List
+ if err := starlark.UnpackArgs("rpm", args, kwargs, "name", &name, "sha256", &sha256, "urls", &urls); err != nil {
+ return nil, err
+ }
+ it := urls.Iterate()
+ defer it.Done()
+
+ var urlsS []string
+ var url starlark.Value
+ for it.Next(&url) {
+ if url.Type() != "string" {
+ return nil, fmt.Errorf("urls must be a list of strings")
+ }
+ urlS := url.(starlark.String)
+ urlsS = append(urlsS, urlS.GoString())
+ }
+
+ ext, err := newRPMDef(name.GoString(), sha256.GoString(), urlsS)
+ if err != nil {
+ return nil, fmt.Errorf("invalid rpm: %v", err)
+ }
+ res = append(res, ext)
+ return starlark.None, nil
+ }
+
+ thread := &starlark.Thread{
+ Name: "fakebazel",
+ Load: func(thread *starlark.Thread, module string) (starlark.StringDict, error) {
+ switch module {
+ case "@bazeldnf//:deps.bzl":
+ return map[string]starlark.Value{
+ "rpm": starlark.NewBuiltin("rpm", rpm),
+ }, nil
+ }
+ return nil, fmt.Errorf("not implemented in fakebazel")
+ },
+ }
+ globals, err := starlark.ExecFile(thread, path, nil, nil)
+ if err != nil {
+ return nil, fmt.Errorf("executing failed: %w", err)
+ }
+ if !globals.Has("sandbox_dependencies") {
+ return nil, fmt.Errorf("does not contain sandbox_dupendencies")
+ }
+ _, err = starlark.Call(thread, globals["sandbox_dependencies"], nil, nil)
+ if err != nil {
+ return nil, fmt.Errorf("failed to call sandbox_dependencies: %w", err)
+ }
+ return res, nil
+}
diff --git a/third_party/sandboxroot/mirror/external.go b/third_party/sandboxroot/mirror/external.go
new file mode 100644
index 0000000..2c8bcdf
--- /dev/null
+++ b/third_party/sandboxroot/mirror/external.go
@@ -0,0 +1,234 @@
+package main
+
+import (
+ "context"
+ "crypto/sha256"
+ "encoding/hex"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "net/url"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "cloud.google.com/go/storage"
+ "github.com/cenkalti/backoff/v4"
+ "k8s.io/klog/v2"
+)
+
+// rpmDef is a definition of an RPM dependency, containing the internal
+// bazeldnf/bazel name of the dependency, an expected SHA256 sum of the RPM file,
+// and a list of URLs of where that file should be downloaded from. This
+// structure is parsed from repositories.bzl.
+type rpmDef struct {
+ name string
+ sha256 string
+ mpath string
+ urls []*url.URL
+}
+
+// newRPMDef builds and validates an rpmDef based on raw data from
+// repositories.bzl.
+func newRPMDef(name string, sha256 string, urls []string) (*rpmDef, error) {
+ if len(urls) < 1 {
+ return nil, fmt.Errorf("needs at least one URL")
+ }
+ var urlsP []*url.URL
+
+ // Look through all URLs and make sure they're valid Fedora mirror paths, and
+ // that all the mirror paths are the same.
+ path := ""
+ for _, us := range urls {
+ u, err := url.Parse(us)
+ if err != nil {
+ return nil, fmt.Errorf("url invalid %w", err)
+ }
+
+ mpath, err := getFedoraMirrorPath(u)
+ if err != nil {
+ return nil, fmt.Errorf("unexpected url %s: %w", us, err)
+ }
+
+ // If this isn't the first mirror path we've seen, make sure they're the same.
+ if path == "" {
+ path = mpath
+ } else {
+ if path != mpath {
+ return nil, fmt.Errorf("url path difference, %s vs %s", path, mpath)
+ }
+ }
+ urlsP = append(urlsP, u)
+ }
+ return &rpmDef{
+ name: name,
+ sha256: sha256,
+ urls: urlsP,
+ mpath: path,
+ }, nil
+}
+
+// getFedoraMirrorPath takes a full URL to a mirrored RPM and returns its
+// mirror-root-relative path, ie. the path which starts with fedora/linux/....
+func getFedoraMirrorPath(u *url.URL) (string, error) {
+ parts := strings.Split(u.Path, "/")
+
+ // Find fedora/linux/...
+ found := false
+ for i, p := range parts {
+ if p == "fedora" && (i+1) < len(parts) && parts[i+1] == "linux" {
+ parts = parts[i:]
+ found = true
+ break
+ }
+ }
+ if !found || len(parts) < 7 {
+ return "", fmt.Errorf("does not look like a fedora mirror URL")
+ }
+ // Make sure the rest of the path makes some vague sense.
+ switch parts[2] {
+ case "releases", "updates":
+ default:
+ return "", fmt.Errorf("unexpected category %q", parts[2])
+ }
+ switch parts[4] {
+ case "Everything":
+ default:
+ return "", fmt.Errorf("unexpected category %q", parts[3])
+ }
+ switch parts[5] {
+ case "x86_64":
+ default:
+ return "", fmt.Errorf("unexpected architecture %q", parts[5])
+ }
+
+ // Return the path rebuilt and starting at fedora/linux/...
+ return strings.Join(parts, "/"), nil
+}
+
+// validateOurs checks if our mirror has a copy of this RPM. If deep is true, the
+// file will be downloaded and its SHA256 verified. Otherwise, a simple HEAD
+// request is used.
+func (e *rpmDef) validateOurs(ctx context.Context, deep bool) (bool, error) {
+ ctxT, ctxC := context.WithTimeout(ctx, 2*time.Second)
+ defer ctxC()
+
+ url := ourMirrorURL(e.mpath)
+
+ bo := backoff.NewExponentialBackOff()
+ var found bool
+ err := backoff.Retry(func() error {
+ method := "HEAD"
+ if deep {
+ method = "GET"
+ }
+ req, err := http.NewRequestWithContext(ctxT, method, url, nil)
+ if err != nil {
+ return backoff.Permanent(err)
+ }
+ res, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return err
+ }
+ defer res.Body.Close()
+ if res.StatusCode == 200 {
+ found = true
+ } else {
+ found = false
+ }
+
+ if !deep || !found {
+ return nil
+ }
+
+ data, err := io.ReadAll(res.Body)
+ if err != nil {
+ return err
+ }
+
+ h := sha256.New()
+ h.Write(data)
+ got := hex.EncodeToString(h.Sum(nil))
+ want := strings.ToLower(e.sha256)
+ if want != got {
+ log.Printf("SHA256 mismatch: wanted %s, got %s", want, got)
+ found = false
+ }
+ return nil
+
+ }, backoff.WithContext(bo, ctxT))
+ if err != nil {
+ return false, err
+ }
+ return found, nil
+}
+
+// mirrorToOurs attempts to download this RPM from a mirror that's not ours and
+// upload it to our mirror via the given bucket.
+func (e *rpmDef) mirrorToOurs(ctx context.Context, bucket *storage.BucketHandle) error {
+ log.Printf("Mirroring %s ...", e.name)
+ for _, source := range e.urls {
+ // Skip our own mirror as a source.
+ if strings.HasPrefix(source.String(), ourMirrorURL()) {
+ continue
+ }
+
+ log.Printf(" Getting %s ...", source)
+ data, err := e.get(ctx, source.String())
+ if err != nil {
+ klog.Errorf(" Failed: %v", err)
+ continue
+ }
+
+ objName := filepath.Join(flagMirrorBucketSubdir, e.mpath)
+ obj := bucket.Object(objName)
+ log.Printf(" Uploading to %s...", objName)
+ wr := obj.NewWriter(ctx)
+ if _, err := wr.Write(data); err != nil {
+ return fmt.Errorf("Write failed: %w", err)
+ }
+ if err := wr.Close(); err != nil {
+ return fmt.Errorf("Close failed: %w", err)
+ }
+ return nil
+ }
+ return fmt.Errorf("all mirrors failed")
+}
+
+// get downloads the given RPM from the given URL and checks its SHA256.
+func (e *rpmDef) get(ctx context.Context, url string) ([]byte, error) {
+ ctxT, ctxC := context.WithTimeout(ctx, 60*time.Second)
+ defer ctxC()
+
+ bo := backoff.NewExponentialBackOff()
+ var data []byte
+ err := backoff.Retry(func() error {
+ req, err := http.NewRequestWithContext(ctxT, "GET", url, nil)
+ if err != nil {
+ return backoff.Permanent(err)
+ }
+ res, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return err
+ }
+ defer res.Body.Close()
+ data, err = io.ReadAll(res.Body)
+ if err != nil {
+ return err
+ }
+ return nil
+ }, backoff.WithContext(bo, ctxT))
+ if err != nil {
+ return nil, err
+ }
+
+ h := sha256.New()
+ h.Write(data)
+ got := hex.EncodeToString(h.Sum(nil))
+ want := strings.ToLower(e.sha256)
+ if want != got {
+ return nil, fmt.Errorf("sha256 mismatch: wanted %s, got %s", want, got)
+ }
+ return data, nil
+}
diff --git a/third_party/sandboxroot/mirror/main.go b/third_party/sandboxroot/mirror/main.go
new file mode 100644
index 0000000..c7e823e
--- /dev/null
+++ b/third_party/sandboxroot/mirror/main.go
@@ -0,0 +1,203 @@
+package main
+
+import (
+ "fmt"
+ "log"
+ "net/url"
+ "path/filepath"
+ "strings"
+
+ "cloud.google.com/go/storage"
+ "github.com/spf13/cobra"
+
+ "source.monogon.dev/build/toolbase"
+)
+
+var (
+ flagDeep bool
+ flagMirrorBucketName string
+ flagMirrorBucketSubdir string
+)
+
+var rootCmd = &cobra.Command{
+ Use: "mirror",
+ Short: "Developer/CI tool to make sure our RPM mirror for the sandboxroot is up to date",
+ SilenceUsage: true,
+}
+
+// ourMirrorURL returns a fully formed URL-string to our mirror (as defined by
+// flags), optionally appending the given parts as file path parts.
+func ourMirrorURL(parts ...string) string {
+ u := url.URL{}
+ u.Scheme = "https"
+ u.Host = "storage.googleapis.com"
+
+ path := []string{
+ flagMirrorBucketName,
+ flagMirrorBucketSubdir,
+ }
+ path = append(path, parts...)
+ u.Path = filepath.Join(path...)
+ return u.String()
+}
+
+// progress is used to notify the user about operational progress.
+func progress(done, total int) {
+ fmt.Printf("%d/%d files done...\r", done, total)
+}
+
+func checkMirrorURLs(rpms []*rpmDef) error {
+ log.Printf("Checking all RPMs are using our mirror...")
+ allCorrect := true
+ for _, rpm := range rpms {
+ urls := rpm.urls
+
+ haveOur := false
+ haveExternal := false
+ for _, u := range urls {
+ if strings.HasPrefix(u.String(), ourMirrorURL()) {
+ haveOur = true
+ } else {
+ haveExternal = true
+ }
+ if haveOur && haveExternal {
+ break
+ }
+ }
+ if !haveOur {
+ allCorrect = false
+ log.Printf("RPM %s does not contain our mirror in its URLs", rpm.name)
+ }
+ if !haveExternal {
+ allCorrect = false
+ log.Printf("RPM %s does not contain any upstream mirror in its URLs", rpm.name)
+ }
+ }
+ if !allCorrect {
+ return fmt.Errorf("some RPMs have incorrect mirror urls")
+ }
+ return nil
+}
+
+func getRepositoriesBzl() string {
+ ws, err := toolbase.WorkspaceDirectory()
+ if err != nil {
+ log.Fatalf("Failed to figure out workspace location: %v", err)
+ }
+ return filepath.Join(ws, "third_party/sandboxroot/repositories.bzl")
+}
+
+var checkCmd = &cobra.Command{
+ Use: "check",
+ Short: "Check that everything is okay (without performing actual mirroring)",
+ RunE: func(cmd *cobra.Command, args []string) error {
+ path := getRepositoriesBzl()
+ rpms, err := getBazelDNFFiles(path)
+ if err != nil {
+ return fmt.Errorf("could not get RPMs from %s: %v", path, err)
+ }
+
+ if err := checkMirrorURLs(rpms); err != nil {
+ return err
+ }
+
+ if !flagDeep {
+ log.Printf("Checking if all files are present on mirror... (use --deep to download and check hashes)")
+ } else {
+ log.Printf("Verifying contents of all mirrored files...")
+ }
+
+ hasAll := true
+ for i, rpm := range rpms {
+ has, err := rpm.validateOurs(cmd.Context(), flagDeep)
+ if err != nil {
+ return fmt.Errorf("checking %s failed: %v", rpm.name, err)
+ }
+ if !has {
+ log.Printf("Missing %s in mirror", rpm.name)
+ hasAll = false
+ }
+ progress(i+1, len(rpms))
+ }
+ if !hasAll {
+ return fmt.Errorf("some packages missing in mirror, run `mirror sync`")
+ } else {
+ log.Printf("All good.")
+ }
+
+ return nil
+ },
+}
+
+var syncCmd = &cobra.Command{
+ Use: "sync",
+ Short: "Mirror all missing dependencies",
+ Long: `
+Check existence of (or download and verify when --deep) of every file in our
+mirror and upload it if it's missing. If an upload occured, a full re-download
+will be performed for verification.
+`,
+ RunE: func(cmd *cobra.Command, args []string) error {
+ ctx := cmd.Context()
+
+ path := getRepositoriesBzl()
+ rpms, err := getBazelDNFFiles(path)
+ if err != nil {
+ return fmt.Errorf("could not get RPMs from %s: %v", path, err)
+ }
+
+ if err := checkMirrorURLs(rpms); err != nil {
+ return err
+ }
+
+ client, err := storage.NewClient(ctx)
+ if err != nil {
+ if strings.Contains(err.Error(), "could not find default credentials") {
+ log.Printf("Try running gcloud auth application-default login --no-browser")
+ }
+ return fmt.Errorf("could not build google cloud storage client: %v", err)
+ }
+ bucket := client.Bucket(flagMirrorBucketName)
+
+ if !flagDeep {
+ log.Printf("Checking for any missing files...")
+ } else {
+ log.Printf("Verifying all files and uploading if missing or corrupted...")
+ }
+
+ for i, rpm := range rpms {
+ has, err := rpm.validateOurs(ctx, flagDeep)
+ if err != nil {
+ return err
+ }
+ if !has {
+ log.Printf("Mirroring %s...", rpm.name)
+ if err := rpm.mirrorToOurs(ctx, bucket); err != nil {
+ return err
+ }
+ log.Printf("Verifying %s...", rpm.name)
+ has, err = rpm.validateOurs(ctx, true)
+ if err != nil {
+ return err
+ }
+ if !has {
+ return fmt.Errorf("post-mirror validation of %s failed", rpm.name)
+ }
+ }
+ progress(i+1, len(rpms))
+ }
+
+ log.Printf("All good.")
+ return nil
+ },
+}
+
+func main() {
+ rootCmd.PersistentFlags().StringVar(&flagMirrorBucketName, "bucket_name", "monogon-infra-public", "Name of GCS bucket to mirror into.")
+ rootCmd.PersistentFlags().StringVar(&flagMirrorBucketSubdir, "bucket_subdir", "mirror", "Subpath in bucket to upload data to.")
+ rootCmd.PersistentFlags().BoolVar(&flagDeep, "deep", false, "Always download files fully during check/sync to make sure the SHA256 matches.")
+ rootCmd.AddCommand(checkCmd)
+ rootCmd.AddCommand(syncCmd)
+ rootCmd.Execute()
+
+}
diff --git a/third_party/sandboxroot/regenerate.sh b/third_party/sandboxroot/regenerate.sh
index 76b9a90..7224d4e 100755
--- a/third_party/sandboxroot/regenerate.sh
+++ b/third_party/sandboxroot/regenerate.sh
@@ -113,3 +113,8 @@
mv ${DIR}/BUILD.bazel.in ${DIR}/BUILD.bazel
rm ${DIR}/repositories.bzl.in
+
+
+# Mirror everything
+bazel ${BAZEL_ARGS} \
+ run //third_party/sandboxroot/mirror sync
\ No newline at end of file