Add C/C++ header rewriter
This adds a C/C++ header rewriter utility. See the top comment on a quick description of how it works.
No workspace rule is provided yet, that will come later.
Test Plan: This is a build utility, doesn't really matter.
X-Origin-Diff: phab/D705
GitOrigin-RevId: 4bf274d8301f3a38a1ec7512bf310be9815fb647
diff --git a/build/bazel_cc_fix/BUILD.bazel b/build/bazel_cc_fix/BUILD.bazel
new file mode 100644
index 0000000..28b6438
--- /dev/null
+++ b/build/bazel_cc_fix/BUILD.bazel
@@ -0,0 +1,19 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["main.go"],
+ importpath = "source.monogon.dev/build/bazel_cc_fix",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//build/bazel_cc_fix/ccfixspec:go_default_library",
+ "@com_github_golang_protobuf//proto:go_default_library",
+ "@com_github_mattn_go_shellwords//:go_default_library",
+ ],
+)
+
+go_binary(
+ name = "bazel_cc_fix",
+ embed = [":go_default_library"],
+ visibility = ["//visibility:public"],
+)
diff --git a/build/bazel_cc_fix/ccfixspec/BUILD.bazel b/build/bazel_cc_fix/ccfixspec/BUILD.bazel
new file mode 100644
index 0000000..f477071
--- /dev/null
+++ b/build/bazel_cc_fix/ccfixspec/BUILD.bazel
@@ -0,0 +1,23 @@
+load("@rules_proto//proto:defs.bzl", "proto_library")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library")
+
+proto_library(
+ name = "build_bazel_cc_fix_ccfixspec_proto",
+ srcs = ["ccfixspec.proto"],
+ visibility = ["//visibility:public"],
+)
+
+go_proto_library(
+ name = "build_bazel_cc_fix_ccfixspec_go_proto",
+ importpath = "source.monogon.dev/build/bazel_cc_fix/ccfixspec",
+ proto = ":build_bazel_cc_fix_ccfixspec_proto",
+ visibility = ["//visibility:public"],
+)
+
+go_library(
+ name = "go_default_library",
+ embed = [":build_bazel_cc_fix_ccfixspec_go_proto"],
+ importpath = "source.monogon.dev/build/bazel_cc_fix/ccfixspec",
+ visibility = ["//visibility:public"],
+)
diff --git a/build/bazel_cc_fix/ccfixspec/ccfixspec.proto b/build/bazel_cc_fix/ccfixspec/ccfixspec.proto
new file mode 100644
index 0000000..d3ef979
--- /dev/null
+++ b/build/bazel_cc_fix/ccfixspec/ccfixspec.proto
@@ -0,0 +1,56 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package ccfixspec;
+
+// CCFixSpec is the root configuration message for bazel_cc_fix
+message CCFixSpec {
+ // replace contains replace directives which modify normal include file resolution. They can be used to for example
+ // redirect system includes to third-party library to the correct inter-workspace path or to change the location
+ // of certain generated files.
+ repeated Replace replace = 1;
+
+ // See GeneratedFile
+ repeated GeneratedFile generated_file = 2;
+
+ // If set, all files in this directory are treated as generated files. Useful for out-of-tree build systems like
+ // meson and cmake. Shouldn't be set on build systems which build in-tree.
+ string build_dir = 3;
+}
+
+message Replace {
+ enum Type {
+ UNKNOWN = 0;
+ // SYSTEM replaces included system headers (within < >) with the given
+ // workspace or inter-workspace (external/<otherworkspace>) paths. It
+ // matches literally as these files are generally not resolvable.
+ SYSTEM = 1;
+ // WORKSPACE replaces included workspace-relative headers (after resolving)
+ // with the given workspace or inter-workspace paths. It matches
+ // pre-resolved workspace-relative paths.
+ WORKSPACE = 2;
+ }
+ Type type = 1;
+ string from = 2;
+ string to = 3;
+}
+
+// GeneratedFile represents a generated file which is not present in the
+// workspace as it has not been generated yet. Specifying it explicitly allows
+// the resolver to know about it an resolve it properly.
+message GeneratedFile { string path = 1; }
diff --git a/build/bazel_cc_fix/main.go b/build/bazel_cc_fix/main.go
new file mode 100644
index 0000000..244e849
--- /dev/null
+++ b/build/bazel_cc_fix/main.go
@@ -0,0 +1,369 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// bazel_cc_fix rewrites include directives in C and C++ code. It rewrites all includes in the target workspace to be
+// workspace-relative and additionally supports rewriting includes via a prototxt-based spec file to for example
+// fix up includes for external libraries.
+// The rewritten code can then be used in Bazel intra- and inter-workspace without dealing with any copts or include-
+// related attributes.
+// To know where an include would resolve to it expects a compilation database (see
+// https://clang.llvm.org/docs/JSONCompilationDatabase.html) as an input. It looks at all files in that database and
+// their transitive dependencies and rewrites all of them according to the include paths specified in the compilation
+// command from the database.
+// The compilation database itself is either generated by the original build system or by using intercept-build, which
+// intercepts calls to the compiler and records them into a compilation database.
+package main
+
+import (
+ "encoding/json"
+ "flag"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+
+ "github.com/golang/protobuf/proto"
+ "github.com/mattn/go-shellwords"
+
+ "source.monogon.dev/build/bazel_cc_fix/ccfixspec"
+)
+
+// compilationDBEntry is a single entry from the compilation database which represents a single compiler invocation on
+// a C/C++ source file. It contains the compiler working directory, arguments and input file path.
+type compilationDBEntry struct {
+ Directory string `json:"directory"`
+ Command string `json:"command"`
+ Arguments []string `json:"arguments"`
+ File string `json:"file"`
+ Output string `json:"output"`
+}
+
+// compilationDB is a collection of compilationDBEntries usually stored in a big JSON-serialized document.
+// https://clang.llvm.org/docs/JSONCompilationDatabase.html
+type compilationDB []compilationDBEntry
+
+// rewrites represents a list of include rewrites with the key being the original include statement
+// (like "#include <xyz.h>", with whitespace trimmed on both sides) and the value being another
+type rewrites map[string]string
+
+// replacer returns a strings.Replacer which efficiently performs all replacements in a single pass
+func (r rewrites) replacer() *strings.Replacer {
+ var replacerArgs []string
+ for from, to := range r {
+ replacerArgs = append(replacerArgs, from, to)
+ }
+ return strings.NewReplacer(replacerArgs...)
+}
+
+// addWorkspace adds a rewrite from a given directive to a workspace-relative path.
+func (r rewrites) addWorkspace(oldDirective, workspaceRelativePath string) {
+ normalizedDirective := strings.TrimSpace(oldDirective)
+ replacementDirective := fmt.Sprintf("#include \"%s\"", workspaceRelativePath)
+ oldRewrite, ok := r[normalizedDirective]
+ if !ok {
+ r[normalizedDirective] = replacementDirective
+ } else if oldRewrite != replacementDirective {
+ log.Printf("WARNING: inconsistent rewrite detected: %s => %s | %s", normalizedDirective, oldRewrite, replacementDirective)
+ }
+}
+
+// Type rewriteMetadata is a map of a file path to rewrite metadata for that file
+type rewriteMetadata map[string]rewriteMetadataFile
+
+type rewriteMetadataFile struct {
+ rewrites rewrites
+ source string
+}
+
+var (
+ compilationDBPath = flag.String("compilation_db", "", "Path the the compilation_database.json file for the project")
+ workspacePath = flag.String("workspace", "", "Path to the workspace root")
+ specPath = flag.String("spec", "", "Path to the spec (ccfixspec.CCFixSpec)")
+)
+
+var (
+ reGlobalInclude = regexp.MustCompile("^-I(.*)")
+ reSystemInclude = regexp.MustCompile("^-isystem(.*)")
+ reQuoteInclude = regexp.MustCompile("^-iquote(.*)")
+)
+
+var (
+ reIncludeDirective = regexp.MustCompile(`(?m:^\s*#\s*include\s*([<"])(.*)([>"]))`)
+)
+
+// applyReplaceDirectives applies all directives of the given replaceType in directives to originalPath and returns the
+// resulting string. If returnUnmodified is unset, it returns an empty string when no replacements were performed,
+// otherwise it returns the unmodified originalPath.
+// The first rewrite wins, it does not do any recursive processing.
+func applyReplaceDirectives(directives []*ccfixspec.Replace, replaceType ccfixspec.Replace_Type, originalPath string, returnUnmodified bool) string {
+ for _, d := range directives {
+ if d.Type != replaceType {
+ continue
+ }
+ if d.From == originalPath {
+ return d.To
+ } else if strings.HasSuffix(d.From, "/") && strings.HasPrefix(originalPath, d.From) {
+ return d.To + strings.TrimPrefix(originalPath, d.From)
+ }
+ }
+ if returnUnmodified {
+ return originalPath
+ }
+ return ""
+}
+
+// findFileInWorkspace takes a path from a C include directive and uses the given search path to find its absolute
+// path. If that absolute path is outside the workspace, it returns an empty string, otherwise it returns the path
+// of the file relative to the workspace. It pretends that all files in isGeneratedFile exist on the filesystem.
+func findFileInWorkspace(searchPath []string, inclFile string, isGeneratedFile map[string]bool) string {
+ var inclPath string
+ for _, path := range searchPath {
+ inclPathTry := filepath.Join(path, inclFile)
+ if isGeneratedFile[inclPathTry] {
+ inclPath = inclPathTry
+ break
+ }
+ if _, err := os.Stat(inclPathTry); err == nil {
+ inclPath = inclPathTry
+ break
+ }
+ }
+ if inclPath == "" {
+ // We haven't found the included file. This can happen for system includes (<stdio.h>) or includes from
+ // other operating systems.
+ return ""
+ }
+
+ // Ignore all include directives that don't resolve into our workspace after processing
+ if !filepath.HasPrefix(inclPath, *workspacePath) {
+ return ""
+ }
+
+ workspaceRelativeFilePath, err := filepath.Rel(*workspacePath, inclPath)
+ if err != nil {
+ panic(err)
+ }
+ return workspaceRelativeFilePath
+}
+
+// fixIncludesAndGetRefs opens a file, looks at all its includes, records rewriting data into rewriteMetadata and
+// returns all files included by the file for further analysis.
+func (m rewriteMetadata) fixIncludesAndGetRefs(filePath string, quoteIncludes, systemIncludes []string, spec *ccfixspec.CCFixSpec, isGeneratedFile map[string]bool) []string {
+ meta, ok := m[filePath]
+ if !ok {
+ cSourceRaw, err := ioutil.ReadFile(filePath)
+ if err != nil {
+ log.Printf("failed to open source file: %v", err)
+ return nil
+ }
+ cSource := string(cSourceRaw)
+ m[filePath] = rewriteMetadataFile{
+ rewrites: make(rewrites),
+ source: cSource,
+ }
+ meta = m[filePath]
+ }
+ var includeFiles []string
+ // Find all include directives
+ out := reIncludeDirective.FindAllStringSubmatch(meta.source, -1)
+ for _, incl := range out {
+ inclDirective := incl[0]
+ inclType := incl[1]
+ inclFile := incl[2]
+ var workspaceRelativeFilePath string
+ var searchPath []string
+ if inclType == "\"" {
+ searchPath = quoteIncludes
+ } else if inclType == "<" {
+ searchPath = systemIncludes
+ workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_SYSTEM, inclFile, false)
+ }
+ if workspaceRelativeFilePath == "" {
+ workspaceRelativeFilePath = findFileInWorkspace(searchPath, inclFile, isGeneratedFile)
+ }
+ workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_WORKSPACE, workspaceRelativeFilePath, true)
+
+ // Mark generated files as generated
+ foundGenerated := isGeneratedFile[filepath.Join(*workspacePath, workspaceRelativeFilePath)]
+
+ if !foundGenerated {
+ includeFiles = append(includeFiles, filepath.Join(*workspacePath, workspaceRelativeFilePath))
+ }
+
+ // Pretend that a generated file exists at the given path when stripping the BuildDir prefix. This is
+ // generally true for all out-of-tree build systems and saves the user from needing to manually specify
+ // lots of GeneratedFiles.
+ if spec.BuildDir != "" && filepath.HasPrefix(workspaceRelativeFilePath, spec.BuildDir+"/") {
+ workspaceRelativeFilePath = filepath.Clean(strings.TrimPrefix(workspaceRelativeFilePath, spec.BuildDir+"/"))
+ foundGenerated = true
+ }
+
+ // Shorten include paths when both files are in the same directory except when a generated file is involved
+ // as these end up in physically different locations and need to be referenced using a full workspace-
+ // relative path
+ if !foundGenerated && filepath.Dir(filePath) == filepath.Dir(filepath.Join(*workspacePath, workspaceRelativeFilePath)) {
+ workspaceRelativeFilePath = filepath.Base(workspaceRelativeFilePath)
+ }
+ // Don't perform rewrites when both include directives are semantically equivalent
+ if workspaceRelativeFilePath == inclFile && inclType == "\"" {
+ continue
+ }
+ meta.rewrites.addWorkspace(inclDirective, workspaceRelativeFilePath)
+ }
+ return includeFiles
+}
+
+// getIncludeDirs takes a compilation database entry and returns the search paths for both system and quote includes
+func getIncludeDirs(entry compilationDBEntry) (quoteIncludes []string, systemIncludes []string, err error) {
+ // Normalize arguments
+ if len(entry.Arguments) == 0 {
+ commandArgs, err := shellwords.Parse(entry.Command)
+ if err != nil {
+ return []string{}, []string{}, fmt.Errorf("failed to parse command: %w", err)
+ }
+ entry.Arguments = commandArgs
+ }
+
+ // Parse out and generate include search paths
+ var preSystemIncludes []string
+ var systemIncludesRaw []string
+ var quoteIncludesRaw []string
+ filePath := entry.File
+ if !filepath.IsAbs(entry.File) {
+ filePath = filepath.Join(entry.Directory, entry.File)
+ }
+ quoteIncludesRaw = append(quoteIncludesRaw, filepath.Dir(filePath))
+ for i, arg := range entry.Arguments {
+ includeMatch := reGlobalInclude.FindStringSubmatch(arg)
+ if len(includeMatch) > 0 {
+ if len(includeMatch[1]) == 0 {
+ preSystemIncludes = append(preSystemIncludes, entry.Arguments[i+1])
+ } else {
+ preSystemIncludes = append(preSystemIncludes, includeMatch[1])
+ }
+ }
+ includeMatch = reSystemInclude.FindStringSubmatch(arg)
+ if len(includeMatch) > 0 {
+ if len(includeMatch[1]) == 0 {
+ systemIncludesRaw = append(systemIncludesRaw, entry.Arguments[i+1])
+ } else {
+ systemIncludesRaw = append(systemIncludesRaw, includeMatch[1])
+ }
+ }
+ includeMatch = reQuoteInclude.FindStringSubmatch(arg)
+ if len(includeMatch) > 0 {
+ if len(includeMatch[1]) == 0 {
+ quoteIncludesRaw = append(quoteIncludesRaw, entry.Arguments[i+1])
+ } else {
+ quoteIncludesRaw = append(quoteIncludesRaw, includeMatch[1])
+ }
+ }
+ }
+ systemIncludesRaw = append(preSystemIncludes, systemIncludesRaw...)
+ quoteIncludesRaw = append(quoteIncludesRaw, systemIncludesRaw...)
+
+ // Deduplicate and keep the first one
+ systemIncludeSeen := make(map[string]bool)
+ quoteIncludeSeen := make(map[string]bool)
+ for _, systemInclude := range systemIncludesRaw {
+ if !filepath.IsAbs(systemInclude) {
+ systemInclude = filepath.Join(entry.Directory, systemInclude)
+ }
+ if !systemIncludeSeen[systemInclude] {
+ systemIncludeSeen[systemInclude] = true
+ systemIncludes = append(systemIncludes, systemInclude)
+ }
+ }
+ for _, quoteInclude := range quoteIncludesRaw {
+ if !filepath.IsAbs(quoteInclude) {
+ quoteInclude = filepath.Join(entry.Directory, quoteInclude)
+ }
+ if !quoteIncludeSeen[quoteInclude] {
+ quoteIncludeSeen[quoteInclude] = true
+ quoteIncludes = append(quoteIncludes, quoteInclude)
+ }
+ }
+ return
+}
+
+func main() {
+ flag.Parse()
+ compilationDBFile, err := os.Open(*compilationDBPath)
+ if err != nil {
+ log.Fatalf("failed to open compilation db: %v", err)
+ }
+ var compilationDB compilationDB
+ if err := json.NewDecoder(compilationDBFile).Decode(&compilationDB); err != nil {
+ log.Fatalf("failed to read compilation db: %v", err)
+ }
+ specRaw, err := ioutil.ReadFile(*specPath)
+ var spec ccfixspec.CCFixSpec
+ if err := proto.UnmarshalText(string(specRaw), &spec); err != nil {
+ log.Fatalf("failed to load spec: %v", err)
+ }
+
+ isGeneratedFile := make(map[string]bool)
+ for _, entry := range spec.GeneratedFile {
+ isGeneratedFile[filepath.Join(*workspacePath, entry.Path)] = true
+ }
+
+ rewriteMetadata := make(rewriteMetadata)
+
+ // Iterate over all source files in the compilation database and analyze them one-by-one
+ for _, entry := range compilationDB {
+ quoteIncludes, systemIncludes, err := getIncludeDirs(entry)
+ if err != nil {
+ log.Println(err)
+ continue
+ }
+ filePath := entry.File
+ if !filepath.IsAbs(entry.File) {
+ filePath = filepath.Join(entry.Directory, entry.File)
+ }
+ includedFiles := rewriteMetadata.fixIncludesAndGetRefs(filePath, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
+
+ // seen stores the path of already-visited files, similar to #pragma once
+ seen := make(map[string]bool)
+ // rec recursively resolves includes and records rewrites
+ var rec func([]string)
+ rec = func(files []string) {
+ for _, f := range files {
+ if seen[f] {
+ continue
+ }
+ seen[f] = true
+ icf2 := rewriteMetadata.fixIncludesAndGetRefs(f, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
+ rec(icf2)
+ }
+ }
+ rec(includedFiles)
+ }
+
+ // Perform all recorded rewrites on the actual files
+ for file, rew := range rewriteMetadata {
+ outFile, err := os.Create(file)
+ if err != nil {
+ log.Fatalf("failed to open file for writing output: %v", err)
+ }
+ defer outFile.Close()
+ if _, err := rew.rewrites.replacer().WriteString(outFile, rew.source); err != nil {
+ log.Fatalf("failed to write file %v: %v", file, err)
+ }
+ }
+}
diff --git a/build/fietsje/main.go b/build/fietsje/main.go
index 6bd1e49..1345644 100644
--- a/build/fietsje/main.go
+++ b/build/fietsje/main.go
@@ -81,6 +81,9 @@
p.collect("github.com/rekby/gpt", "a930afbc6edcc89c83d39b79e52025698156178d")
p.collect("github.com/yalue/native_endian", "51013b03be4fd97b0aabf29a6923e60359294186")
+ // Used by //build/bazel_cc_fix, override to make sure we use the latest version
+ p.collectOverride("github.com/mattn/go-shellwords", "v1.0.11")
+
// Used by //metropolis/build/mkimage
p.collect("github.com/diskfs/go-diskfs", "v1.0.0").use(
"gopkg.in/djherbis/times.v1",