build: new workspace status script, new versioning scheme
This reimplements the workspace status script and fixes a bunch of
issues along the way:
1. The script is now in Python, which allows us to better express some
complexity.
2. We now retrieve the Kubernetes version from
third_party/go/repositories.bzl, thereby unhardcoding the Kubernetes
version.
3. We now support for multiple 'products' with different versions.
Currently we have only one product, which is metropolis. This is the
basis for tying Git tags to releases of a particular codebase within
the monorepo.
4. Git tags are now parsed to retrieve the version of a product, instead
of it being hardcoded in the workspace status script.
5. We use a vX.Y.Z-devNNN versioning system for builds which have
happened in between tags. NNN is the number of commits since vX.Y.Z.
This changes the current version of Metropolis from 0.1 to 0.1.0-dev440.
In an upcoming change we will implement a Go-side library to caputure
information about the workspace state, both the per-repo data like VCS
state and per-product data like versions. For now we keep a stopgap in
the new script to emit version data as expected by the current codebase.
Change-Id: I07cb17efc7d60aa269627a549f56ffa1bfdeb5b7
Reviewed-on: https://review.monogon.dev/c/monogon/+/2304
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Reviewed-by: Leopold Schabel <leo@monogon.tech>
Tested-by: Jenkins CI
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
diff --git a/.bazelrc b/.bazelrc
index 28b7303..3fdf8b7 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -67,7 +67,7 @@
build --cxxopt=-std=c++17
# Set workspace status file and stamp
-build --stamp --workspace_status_command=./build/print-workspace-status.sh
+build --stamp --workspace_status_command=./build/print-workspace-status.py
# Load CI bazelrc if present.
try-import %workspace%/ci.bazelrc
diff --git a/build/print-workspace-status.py b/build/print-workspace-status.py
new file mode 100755
index 0000000..8339cd5
--- /dev/null
+++ b/build/print-workspace-status.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python3
+"""Workspace status script used for build stamping."""
+
+# Treat this script as shell code, but with Python syntax. We want to remain as
+# simple as possible, and absolutely never use any non-standard Python library.
+# This script should be able to run on any 'modern' Linux distribution with
+# Python 3.8 or newer.
+
+# The following versioning concepts apply:
+# 1. Version numbers follow the Semantic Versioning 2.0 spec.
+# 2. Git tags in the form `<product>-vX.Y.Z` will be used as a basis for
+# versioning a build. If the currently built release is exactly the same as
+# such a tag, it will be versioned at vX.Y.Z. Otherwise, a -devNNN suffix
+# will be appended to signify the amount of commits since the release.
+# 3. Product git tags are only made up of a major/minor/patch version.
+# Prerelease and build tags are assigned by the build system and this
+# script, Git tags have no influence on them.
+# 4. 'Products' are release numbering trains within the Monogon monorepo. This
+# means there is no such thing as a 'version' for the monorepo by itself,
+# only within the context of some product.
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+import os
+import re
+import subprocess
+import time
+
+from typing import Optional
+
+
+# Variables to output. These will be printed to stdout at the end of the script
+# runtime, sorted by key.
+variables: dict[str, str] = {}
+
+# Git build tree status: clean or dirty.
+git_tree_state: str = "clean"
+if subprocess.call(["git", "status", "--porcelain"], stdout=subprocess.PIPE) == 0:
+ git_tree_state = "dirty"
+
+# Git commit hash.
+git_commit: str = (
+ subprocess.check_output(["git", "rev-parse", "HEAD^{commit}"]).decode().strip()
+)
+
+# Git tags pointing at this commit.
+git_tags_b: [bytes] = subprocess.check_output(
+ ["git", "tag", "--points-at", "HEAD"]
+).split(b"\n")
+git_tags: [str] = [t.decode().strip() for t in git_tags_b if t.decode().strip() != ""]
+
+# Build timestamp, respecting SOURCE_DATE_EPOCH for reproducible builds.
+build_timestamp = int(time.time())
+sde = os.environ.get("SOURCE_DATE_EPOCH")
+if sde is not None:
+ build_timestamp = int(sde)
+
+# Image tag to use in rules_docker. Since USER might not be set on CI, we have
+# to craft this ourselves.
+user = os.environ.get("USER", "unknown")
+image_tag = f"{user}-{build_timestamp}"
+
+variables["STABLE_MONOGON_gitCommit"] = git_commit
+variables["STABLE_MONOGON_gitTreeState"] = git_tree_state
+variables["IMAGE_TAG"] = image_tag
+
+# Per product. Each product has it's own semver-style version number, which is
+# deduced from git tags.
+#
+# For example: metropolis v. 1.2.3 would be tagged 'metropolis-v1.2.3'.
+@dataclass
+class Version:
+ """Describes a semver version for a given product."""
+
+ product: str
+ version: str
+
+
+def parse_tag(tag: str, product: str) -> Optional[Version]:
+ prefix = product + "-"
+ if not tag.startswith(prefix):
+ return None
+ version = tag[len(prefix) :]
+ # The first release of Metropolis was v0.1, which we extend to v0.1.0.
+ if product == "metropolis" and version == "v0.1":
+ version = "v0.1.0"
+ # Only care about proper semver tags. Or at least proper enough (this
+ # will still accept v01.01.01 which it probably shouldn't).
+ if not re.match(r"^v[0-9]+\.[0-9]+\.[0-9]+$", version):
+ return None
+ return Version(product, version)
+
+
+for product in ["metropolis"]:
+ versions = []
+ # Get exact versions from tags.
+ for tag in git_tags:
+ version = parse_tag(tag, product)
+ if version is None:
+ continue
+ versions.append(version)
+ if len(versions) == 0:
+ # No exact version found. Use latest tag for the given product and
+ # append a '-devXXX' tag based on number of commits since that tag.
+ for tag in (
+ subprocess.check_output(
+ ["git", "tag", "--sort=-refname", "--merged", "HEAD"]
+ )
+ .decode()
+ .strip()
+ .split("\n")
+ ):
+ version = parse_tag(tag, product)
+ if version is None:
+ continue
+ # Found the latest tag for this product. Augment it with the
+ # -devXXX suffix and add it to our versions.
+ count = (
+ subprocess.check_output(["git", "rev-list", tag + "..HEAD", "--count"])
+ .decode()
+ .strip()
+ )
+ version.version += f"-dev{count}"
+ versions.append(version)
+ break
+ if len(versions) == 0:
+ # This product never had a release! Use v0.0.0 as a fallback.
+ versions.append(Version(product, "v0.0.0"))
+ # Find the highest version and use that. Lexicographic sort is good enough
+ # for the limited subset of semver we support.
+ versions.sort(reverse=True)
+ version = versions[0]
+ variables[f"STABLE_MONOGON_{product}_gitVersion"] = version.version
+
+
+# Special treatment for Kubernetes, which uses these stamp values in its build
+# system. We populate the Kubernetes version from whatever is in
+# //third_party/go/repositories.bzl.
+def parse_repositories_bzl(path: str) -> dict[str, str]:
+ """
+ Shoddily parse a Gazelle-created repositories.bzl into a map of
+ name->version.
+
+ This relies heavily on repositories.bzl being correctly formatted and
+ sorted.
+
+ If this breaks, it's probably best to try to use the actual Python parser
+ to deal with this, eg. by creating a fake environment for the .bzl file to
+ be parsed.
+ """
+
+ # Main parser state: None where we don't expect a version line, set to some
+ # value otherwise.
+ name: Optional[str] = None
+
+ res = {}
+ for line in open(path):
+ line = line.strip()
+ if line == "go_repository(":
+ name = None
+ continue
+ if line.startswith("name ="):
+ if name is not None:
+ raise Exception("parse error in repositories.bzl: repeated name?")
+ if line.count('"') != 2:
+ raise Exception(
+ "parse error in repositories.bzl: invalid name line: " + name
+ )
+ name = line.split('"')[1]
+ continue
+ if line.startswith("version ="):
+ if name is None:
+ raise Exception("parse error in repositories.bzl: version before name")
+ if line.count('"') != 2:
+ raise Exception(
+ "parse error in repositories.bzl: invalid name line: " + name
+ )
+ version = line.split('"')[1]
+ res[name] = version
+ name = None
+ return res
+
+
+# Parse repositories.bzl.
+go_versions = parse_repositories_bzl("third_party/go/repositories.bzl")
+
+# Find Kubernetes version.
+kubernetes_version: str = go_versions.get("io_k8s_kubernetes")
+if kubernetes_version is None:
+ raise Exception("could not figure out Kubernetes version")
+kubernetes_version_parsed = re.match(
+ r"^v([0-9]+)\.([0-9]+)\.[0-9]+$", kubernetes_version
+)
+if not kubernetes_version_parsed:
+ raise Exception("invalid Kubernetes version: " + kubernetes_version)
+
+# The Kubernetes build tree is considered clean iff the monorepo build tree is
+# considered clean.
+variables["KUBERNETES_gitTreeState"] = git_tree_state
+variables["KUBERNETES_buildDate"] = datetime.fromtimestamp(
+ build_timestamp, timezone.utc
+).strftime("%Y-%m-%dT%H:%M:%SZ")
+variables["STABLE_KUBERNETES_gitMajor"] = kubernetes_version_parsed[1]
+variables["STABLE_KUBERNETES_gitMinor"] = kubernetes_version_parsed[2]
+variables["STABLE_KUBERNETES_gitVersion"] = kubernetes_version + "+mngn"
+
+# Backwards compat with existing stamping data as expected by the monorepo codebase.
+# TODO(q3k): remove this once we migrate away into the new versioning data format in metropolis.
+variables["STABLE_METROPOLIS_gitCommit"] = variables["STABLE_MONOGON_gitCommit"]
+variables["STABLE_METROPOLIS_gitTreeState"] = variables["STABLE_MONOGON_gitTreeState"]
+# Skip the 'v.'.
+variables["STABLE_METROPOLIS_version"] = variables["STABLE_MONOGON_metropolis_gitVersion"][1:]
+
+# Emit variables to stdout for consumption by Bazel and targets.
+for key in sorted(variables.keys()):
+ print("{} {}".format(key, variables[key]))
diff --git a/build/print-workspace-status.sh b/build/print-workspace-status.sh
deleted file mode 100755
index 1aeebe0..0000000
--- a/build/print-workspace-status.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env bash
-
-# Workspace status used for build stamping.
-set -o errexit
-set -o nounset
-set -o pipefail
-
-# TODO: Figure out how to version Metropolis
-METROPOLIS_VERSION=0.1
-
-KUBERNETES_gitTreeState="clean"
-if [ ! -z "$(git status --porcelain)" ]; then
- KUBERNETES_gitTreeState="dirty"
-fi
-
-# TODO(q3k): unify with //third_party/go/repsitories.bzl.
-KUBERNETES_gitMajor="1"
-KUBERNETES_gitMinor="24"
-KUBERNETES_gitVersion="v1.24.2+mngn"
-
-# CI doesnt have the user set...
-IMAGE_TAG=${IMAGE_TAG:-${USER:-unknown}-$(date +%s)}
-
-cat <<EOF
-KUBERNETES_gitCommit $(git rev-parse "HEAD^{commit}")
-KUBERNETES_gitTreeState $KUBERNETES_gitTreeState
-KUBERNETES_buildDate $(date \
- ${SOURCE_DATE_EPOCH:+"--date=@${SOURCE_DATE_EPOCH}"} \
- -u +'%Y-%m-%dT%H:%M:%SZ')
-STABLE_KUBERNETES_gitVersion $KUBERNETES_gitVersion
-STABLE_KUBERNETES_gitMajor $KUBERNETES_gitMajor
-STABLE_KUBERNETES_gitMinor $KUBERNETES_gitMinor
-
-STABLE_METROPOLIS_gitCommit $(git rev-parse "HEAD^{commit}")
-STABLE_METROPOLIS_gitTreeState $KUBERNETES_gitTreeState
-STABLE_METROPOLIS_version $METROPOLIS_VERSION
-
-IMAGE_TAG $IMAGE_TAG
-EOF