workspace: introduce bazel downloader config and replace bazeldnf mirror

This rather small change does rework the way we cache our dependencies
by instructing bazel to fetch everything through a custom proxy. See
//build/mirror_proxy:README.me for more infos.

Closes monogon-dev/monogon#178

Change-Id: Ic671fc8233a1cbf37427bbc96339ea8108310e21
Reviewed-on: https://review.monogon.dev/c/monogon/+/3686
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/build/bazel/bazel_downloader.cfg b/build/bazel/bazel_downloader.cfg
new file mode 100644
index 0000000..bdbad08
--- /dev/null
+++ b/build/bazel/bazel_downloader.cfg
@@ -0,0 +1,13 @@
+# Allow request to go.dev for finding the current go sdk versions.
+allow go.dev
+
+# Allow requests to the bazel registry for ensuring we can update our
+# bzlmod deps.
+allow bcr.bazel.build
+
+# Allow requests to our mirror and rewrite all urls to use said mirror.
+allow mirror.monogon.dev
+rewrite ^((?!go\.dev|bcr\.bazel\.build).*) mirror.monogon.dev/$1
+
+# Block all other URLs. You can comment out this one to allow a fallback.
+block *
diff --git a/build/bazel/go.MODULE.bazel b/build/bazel/go.MODULE.bazel
index 19b2bfb..de8ad0d 100644
--- a/build/bazel/go.MODULE.bazel
+++ b/build/bazel/go.MODULE.bazel
@@ -88,7 +88,7 @@
     "io_k8s_kubernetes",
     "io_k8s_pod_security_admission",
     "io_k8s_utils",
-    "net_starlark_go",
+    "org_golang_google_api",
     "org_golang_google_genproto_googleapis_api",
     "org_golang_google_grpc",
     "org_golang_google_protobuf",
diff --git a/build/mirror_proxy/BUILD.bazel b/build/mirror_proxy/BUILD.bazel
new file mode 100644
index 0000000..f3c7778
--- /dev/null
+++ b/build/mirror_proxy/BUILD.bazel
@@ -0,0 +1,45 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "mirror_proxy_lib",
+    srcs = ["main.go"],
+    importpath = "source.monogon.dev/build/mirror_proxy",
+    visibility = ["//visibility:private"],
+    deps = [
+        "@com_google_cloud_go_storage//:storage",
+        "@org_golang_google_api//option",
+    ],
+)
+
+go_binary(
+    name = "mirror_proxy",
+    embed = [":mirror_proxy_lib"],
+    visibility = ["//visibility:public"],
+)
+
+load("@rules_pkg//pkg:tar.bzl", "pkg_tar")
+
+pkg_tar(
+    name = "mirror_proxy_layer",
+    srcs = [":mirror_proxy"],
+)
+
+load("@rules_oci//oci:defs.bzl", "oci_image")
+
+oci_image(
+    name = "mirror_proxy_image",
+    base = "@distroless_base",
+    entrypoint = ["/mirror_proxy"],
+    tars = [":mirror_proxy_layer"],
+    visibility = ["//visibility:public"],
+    workdir = "/app",
+)
+
+load("@rules_oci//oci:defs.bzl", "oci_push")
+
+oci_push(
+    name = "mirror_proxy_push",
+    image = ":mirror_proxy_image",
+    remote_tags = ["latest"],
+    repository = "gcr.io/monogon-infra/build/mirror_proxy",
+)
diff --git a/build/mirror_proxy/README.md b/build/mirror_proxy/README.md
new file mode 100644
index 0000000..55ce82c
--- /dev/null
+++ b/build/mirror_proxy/README.md
@@ -0,0 +1,19 @@
+Bazel downloader mirror
+===
+
+This is a small tool which acts as a transparent proxy-ish mirror for use in the bazel downloader.
+By using a bazel_downloader.cfg we can instruct bazel to rewrite the download URLs and use a custom target instead. We use this to mirror all dependencies to our S3 storage.
+
+Usage
+---
+
+This is expected to run with a given bucket name and a hardcoded set of credentials which are used to authenticate requests. When an authenticated request is received, the mirror will download uncached data if it isn't in the cache yet. This is expected to be used by trusted users, e.g. employees.
+
+Users should deploy a .netrc inside their home folder based on the following template to allow bazel to authenticate against the mirror.
+
+`~/.netrc`
+```
+machine mirror.monogon.dev
+login myfancyusername
+password mysecretpassword
+```
\ No newline at end of file
diff --git a/build/mirror_proxy/main.go b/build/mirror_proxy/main.go
new file mode 100644
index 0000000..9dd0d0e
--- /dev/null
+++ b/build/mirror_proxy/main.go
@@ -0,0 +1,184 @@
+package main
+
+import (
+	"context"
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+
+	"cloud.google.com/go/storage"
+	"google.golang.org/api/option"
+)
+
+var (
+	flagUser             string
+	flagPass             string
+	flagMirrorBucketName string
+	flagCredentialsFile  string
+)
+
+func main() {
+	flag.StringVar(&flagUser, "username", "", "Username required to enable s3 upload")
+	flag.StringVar(&flagPass, "password", "", "Password required to enable s3 upload")
+	flag.StringVar(&flagCredentialsFile, "credentials_file", "", "Credentials file to use for GCS")
+	flag.StringVar(&flagMirrorBucketName, "bucket_name", "monogon-bazel-mirror", "Name of GCS bucket to mirror into.")
+	flag.Parse()
+
+	if flagUser == "" || flagPass == "" {
+		log.Fatalf("Missing username or password flag")
+	}
+
+	if flagCredentialsFile == "" {
+		log.Fatalf("Missing credentials flag")
+	}
+
+	client, err := storage.NewClient(context.Background(), option.WithCredentialsFile(flagCredentialsFile))
+	if err != nil {
+		log.Fatalf("Could not build google cloud storage client: %v", err)
+	}
+
+	bucketClient := client.Bucket(flagMirrorBucketName)
+	handlerFunc := func(w http.ResponseWriter, r *http.Request) {
+		mirrorHandler(bucketClient, w, r)
+	}
+
+	log.Panic(http.ListenAndServe(":80", http.HandlerFunc(handlerFunc)))
+}
+
+func mirrorHandler(m *storage.BucketHandle, w http.ResponseWriter, r *http.Request) {
+	targetPath := strings.TrimPrefix(r.URL.Path, "/")
+	targetURL := "https://" + targetPath
+	if len(r.URL.Query()) != 0 {
+		targetURL += "?" + r.URL.Query().Encode()
+	}
+
+	if r.Method != http.MethodGet {
+		log.Printf("%s: invalid method %q: %v", r.RemoteAddr, targetURL, r.Method)
+		http.Error(w, "invalid method", http.StatusMethodNotAllowed)
+		return
+	}
+
+	if len(r.URL.Query()) != 0 {
+		log.Printf("%s: invalid query url: %q", r.RemoteAddr, targetURL)
+		http.Error(w, "URLs with query parameters are not supported", http.StatusNotAcceptable)
+		return
+	}
+
+	obj := m.Object(targetPath)
+	objR, err := obj.NewReader(r.Context())
+	if err != nil && !errors.Is(err, storage.ErrObjectNotExist) {
+		log.Printf("%s: fetching %q from bucket: %v", r.RemoteAddr, obj.ObjectName(), err)
+		http.Error(w, "internal server error", http.StatusInternalServerError)
+		return
+	}
+
+	// If not found and not authenticated, return 404
+	if errors.Is(err, storage.ErrObjectNotExist) && !isAuthenticated(r) {
+		http.Error(w, "object not found in mirror", http.StatusNotFound)
+		return
+	}
+
+	// If found, return mirror content
+	if err == nil {
+		log.Printf("%s: serving cached object %q", r.RemoteAddr, targetURL)
+
+		w.Header().Set("Content-Type", objR.Attrs.ContentType)
+		w.Header().Set("Content-Length", fmt.Sprintf("%d", objR.Attrs.Size))
+		w.WriteHeader(http.StatusOK)
+
+		_, _ = io.Copy(w, objR)
+		return
+	}
+
+	// If I am not reading the logic wrong, this should not happen, but
+	// better to be sure.
+	if !isAuthenticated(r) {
+		http.Error(w, "upstream fetch requires authentication", http.StatusUnauthorized)
+		return
+	}
+
+	// If not found, try download.
+	outReq, err := http.NewRequest(r.Method, targetURL, r.Body)
+	if err != nil {
+		log.Printf("%s: forwarding to %q failed: %v", r.RemoteAddr, targetURL, err)
+		http.Error(w, "internal server error", http.StatusInternalServerError)
+		return
+	}
+
+	copyHeader(outReq.Header, r.Header)
+	outReq.Header.Del("Authorization") // Don't forward our basic auth
+
+	res, err := http.DefaultClient.Do(outReq)
+	if err != nil {
+		log.Printf("%s: forwarding to %q failed: %v", r.RemoteAddr, targetURL, err)
+		http.Error(w, "could not reach endpoint", http.StatusBadGateway)
+		return
+	}
+	defer res.Body.Close()
+
+	// If not StatusOK, return upstream error
+	if res.StatusCode != http.StatusOK {
+		log.Printf("%s: serving upstream error %q: %s", r.RemoteAddr, targetURL, res.Status)
+
+		copyHeader(w.Header(), res.Header)
+		w.WriteHeader(res.StatusCode)
+
+		_, _ = io.Copy(w, res.Body)
+		return
+	}
+
+	var outW io.Writer = w
+	if objR == nil {
+		// If not exist and authenticated, create
+
+		log.Printf("%s: populating object %q", r.RemoteAddr, targetURL)
+		objW := obj.If(storage.Conditions{DoesNotExist: true}).NewWriter(r.Context())
+		defer objW.Close()
+
+		outW = io.MultiWriter(outW, objW)
+	} else if res.ContentLength != -1 && res.ContentLength != objR.Attrs.Size {
+		// If diff and authenticated, update
+
+		log.Printf("%s: replacing object %q: size differs (orig, mirror) %d != %d", r.RemoteAddr, targetURL, res.ContentLength, objR.Attrs.Size)
+		objW := obj.If(storage.Conditions{GenerationMatch: objR.Attrs.Generation}).NewWriter(r.Context())
+		defer objW.Close()
+
+		outW = io.MultiWriter(outW, objW)
+	} else {
+		// If same and authenticated, return cached
+		log.Printf("%s: serving cached object %q", r.RemoteAddr, targetURL)
+
+		w.Header().Set("Content-Type", objR.Attrs.ContentType)
+		w.Header().Set("Content-Length", fmt.Sprintf("%d", objR.Attrs.Size))
+		w.WriteHeader(http.StatusOK)
+
+		_, _ = io.Copy(w, objR)
+		return
+	}
+
+	copyHeader(w.Header(), res.Header)
+	w.WriteHeader(res.StatusCode)
+
+	_, _ = io.Copy(outW, res.Body)
+}
+
+func isAuthenticated(r *http.Request) bool {
+	user, pass, ok := r.BasicAuth()
+	if !ok {
+		return false
+	}
+
+	return user == flagUser && pass == flagPass
+}
+
+func copyHeader(dst, src http.Header) {
+	for k, vv := range src {
+		for _, v := range vv {
+			dst.Add(k, v)
+		}
+	}
+}