blob: 2e6911018e991dd4596f027528537d9c9d0fdf38 [file] [log] [blame] [edit]
// Copyright 2020 The Monogon Project Authors.
//
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// bazel_cc_fix rewrites include directives in C and C++ code. It rewrites all
// includes in the target workspace to be workspace-relative and additionally
// supports rewriting includes via a prototxt-based spec file to for example
// fix up includes for external libraries.
// The rewritten code can then be used in Bazel intra- and inter-workspace
// without dealing with any copts or include- related attributes.
// To know where an include would resolve to it expects a compilation database
// (see https://clang.llvm.org/docs/JSONCompilationDatabase.html) as an input.
// It looks at all files in that database and their transitive dependencies and
// rewrites all of them according to the include paths specified in the
// compilation command from the database.
// The compilation database itself is either generated by the original build
// system or by using intercept-build, which intercepts calls to the compiler
// and records them into a compilation database.
package main
import (
"encoding/json"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/mattn/go-shellwords"
"google.golang.org/protobuf/encoding/prototext"
"source.monogon.dev/build/bazel_cc_fix/ccfixspec"
)
// compilationDBEntry is a single entry from the compilation database which
// represents a single compiler invocation on a C/C++ source file. It contains
// the compiler working directory, arguments and input file path.
type compilationDBEntry struct {
Directory string `json:"directory"`
Command string `json:"command"`
Arguments []string `json:"arguments"`
File string `json:"file"`
Output string `json:"output"`
}
// compilationDB is a collection of compilationDBEntries usually stored in a
// big JSON-serialized document.
// https://clang.llvm.org/docs/JSONCompilationDatabase.html
type compilationDB []compilationDBEntry
// rewrites represents a list of include rewrites with the key being the
// original include statement (like "#include <xyz.h>", with whitespace trimmed
// on both sides) and the value being another
type rewrites map[string]string
// replacer returns a strings.Replacer which efficiently performs all
// replacements in a single pass
func (r rewrites) replacer() *strings.Replacer {
var replacerArgs []string
for from, to := range r {
replacerArgs = append(replacerArgs, from, to)
}
return strings.NewReplacer(replacerArgs...)
}
// addWorkspace adds a rewrite from a given directive to a workspace-relative path.
func (r rewrites) addWorkspace(oldDirective, workspaceRelativePath string) {
normalizedDirective := strings.TrimSpace(oldDirective)
replacementDirective := fmt.Sprintf("#include \"%s\"", workspaceRelativePath)
oldRewrite, ok := r[normalizedDirective]
if !ok {
r[normalizedDirective] = replacementDirective
} else if oldRewrite != replacementDirective {
log.Printf("WARNING: inconsistent rewrite detected: %s => %s | %s", normalizedDirective, oldRewrite, replacementDirective)
}
}
// Type rewriteMetadata is a map of a file path to rewrite metadata for that file
type rewriteMetadata map[string]rewriteMetadataFile
type rewriteMetadataFile struct {
rewrites rewrites
source string
}
var (
compilationDBPath = flag.String("compilation_db", "", "Path the the compilation_database.json file for the project")
workspacePath = flag.String("workspace", "", "Path to the workspace root")
specPath = flag.String("spec", "", "Path to the spec (ccfixspec.CCFixSpec)")
)
var (
reGlobalInclude = regexp.MustCompile("^-I(.*)")
reSystemInclude = regexp.MustCompile("^-isystem(.*)")
reQuoteInclude = regexp.MustCompile("^-iquote(.*)")
)
var (
reIncludeDirective = regexp.MustCompile(`(?m:^\s*#\s*include\s*([<"])(.*)([>"]))`)
)
// applyReplaceDirectives applies all directives of the given replaceType in
// directives to originalPath and returns the resulting string. If
// returnUnmodified is unset, it returns an empty string when no replacements
// were performed, otherwise it returns the unmodified originalPath.
// The first rewrite wins, it does not do any recursive processing.
func applyReplaceDirectives(directives []*ccfixspec.Replace, replaceType ccfixspec.Replace_Type, originalPath string, returnUnmodified bool) string {
for _, d := range directives {
if d.Type != replaceType {
continue
}
if d.From == originalPath {
return d.To
} else if strings.HasSuffix(d.From, "/") && strings.HasPrefix(originalPath, d.From) {
return d.To + strings.TrimPrefix(originalPath, d.From)
}
}
if returnUnmodified {
return originalPath
}
return ""
}
// findFileInWorkspace takes a path from a C include directive and uses the
// given search path to find its absolute path. If that absolute path is
// outside the workspace, it returns an empty string, otherwise it returns the
// path of the file relative to the workspace. It pretends that all files in
// isGeneratedFile exist on the filesystem.
func findFileInWorkspace(searchPath []string, inclFile string, isGeneratedFile map[string]bool) string {
var inclPath string
for _, path := range searchPath {
inclPathTry := filepath.Join(path, inclFile)
if isGeneratedFile[inclPathTry] {
inclPath = inclPathTry
break
}
if _, err := os.Stat(inclPathTry); err == nil {
inclPath = inclPathTry
break
}
}
if inclPath == "" {
// We haven't found the included file. This can happen for system
// includes (<stdio.h>) or includes from other operating systems.
return ""
}
// Ignore all include directives that don't resolve into our workspace
// after processing
if !filepath.HasPrefix(inclPath, *workspacePath) {
return ""
}
workspaceRelativeFilePath, err := filepath.Rel(*workspacePath, inclPath)
if err != nil {
panic(err)
}
return workspaceRelativeFilePath
}
// fixIncludesAndGetRefs opens a file, looks at all its includes, records
// rewriting data into rewriteMetadata and returns all files included by the
// file for further analysis.
func (m rewriteMetadata) fixIncludesAndGetRefs(filePath string, quoteIncludes, systemIncludes []string, spec *ccfixspec.CCFixSpec, isGeneratedFile map[string]bool) []string {
meta, ok := m[filePath]
if !ok {
cSourceRaw, err := os.ReadFile(filePath)
if err != nil {
log.Printf("failed to open source file: %v", err)
return nil
}
cSource := string(cSourceRaw)
m[filePath] = rewriteMetadataFile{
rewrites: make(rewrites),
source: cSource,
}
meta = m[filePath]
}
var includeFiles []string
// Find all include directives
out := reIncludeDirective.FindAllStringSubmatch(meta.source, -1)
for _, incl := range out {
inclDirective := incl[0]
inclType := incl[1]
inclFile := incl[2]
var workspaceRelativeFilePath string
var searchPath []string
if inclType == "\"" {
searchPath = quoteIncludes
} else if inclType == "<" {
searchPath = systemIncludes
workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_SYSTEM, inclFile, false)
}
if workspaceRelativeFilePath == "" {
workspaceRelativeFilePath = findFileInWorkspace(searchPath, inclFile, isGeneratedFile)
}
workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_WORKSPACE, workspaceRelativeFilePath, true)
// Mark generated files as generated
foundGenerated := isGeneratedFile[filepath.Join(*workspacePath, workspaceRelativeFilePath)]
if !foundGenerated {
includeFiles = append(includeFiles, filepath.Join(*workspacePath, workspaceRelativeFilePath))
}
// Pretend that a generated file exists at the given path when
// stripping the BuildDir prefix. This is generally true for all
// out-of-tree build systems and saves the user from needing to
// manually specify lots of GeneratedFiles.
if spec.BuildDir != "" && filepath.HasPrefix(workspaceRelativeFilePath, spec.BuildDir+"/") {
workspaceRelativeFilePath = filepath.Clean(strings.TrimPrefix(workspaceRelativeFilePath, spec.BuildDir+"/"))
foundGenerated = true
}
// Shorten include paths when both files are in the same directory
// except when a generated file is involved as these end up in
// physically different locations and need to be referenced using a
// full workspace- relative path
if !foundGenerated && filepath.Dir(filePath) == filepath.Dir(filepath.Join(*workspacePath, workspaceRelativeFilePath)) {
workspaceRelativeFilePath = filepath.Base(workspaceRelativeFilePath)
}
// Don't perform rewrites when both include directives are semantically
// equivalent
if workspaceRelativeFilePath == inclFile && inclType == "\"" {
continue
}
meta.rewrites.addWorkspace(inclDirective, workspaceRelativeFilePath)
}
return includeFiles
}
// getIncludeDirs takes a compilation database entry and returns the search
// paths for both system and quote includes
func getIncludeDirs(entry compilationDBEntry) (quoteIncludes []string, systemIncludes []string, err error) {
// Normalize arguments
if len(entry.Arguments) == 0 {
commandArgs, err := shellwords.Parse(entry.Command)
if err != nil {
return []string{}, []string{}, fmt.Errorf("failed to parse command: %w", err)
}
entry.Arguments = commandArgs
}
// Parse out and generate include search paths
var preSystemIncludes []string
var systemIncludesRaw []string
var quoteIncludesRaw []string
filePath := entry.File
if !filepath.IsAbs(entry.File) {
filePath = filepath.Join(entry.Directory, entry.File)
}
quoteIncludesRaw = append(quoteIncludesRaw, filepath.Dir(filePath))
for i, arg := range entry.Arguments {
includeMatch := reGlobalInclude.FindStringSubmatch(arg)
if len(includeMatch) > 0 {
if len(includeMatch[1]) == 0 {
preSystemIncludes = append(preSystemIncludes, entry.Arguments[i+1])
} else {
preSystemIncludes = append(preSystemIncludes, includeMatch[1])
}
}
includeMatch = reSystemInclude.FindStringSubmatch(arg)
if len(includeMatch) > 0 {
if len(includeMatch[1]) == 0 {
systemIncludesRaw = append(systemIncludesRaw, entry.Arguments[i+1])
} else {
systemIncludesRaw = append(systemIncludesRaw, includeMatch[1])
}
}
includeMatch = reQuoteInclude.FindStringSubmatch(arg)
if len(includeMatch) > 0 {
if len(includeMatch[1]) == 0 {
quoteIncludesRaw = append(quoteIncludesRaw, entry.Arguments[i+1])
} else {
quoteIncludesRaw = append(quoteIncludesRaw, includeMatch[1])
}
}
}
systemIncludesRaw = append(preSystemIncludes, systemIncludesRaw...)
quoteIncludesRaw = append(quoteIncludesRaw, systemIncludesRaw...)
// Deduplicate and keep the first one
systemIncludeSeen := make(map[string]bool)
quoteIncludeSeen := make(map[string]bool)
for _, systemInclude := range systemIncludesRaw {
if !filepath.IsAbs(systemInclude) {
systemInclude = filepath.Join(entry.Directory, systemInclude)
}
if !systemIncludeSeen[systemInclude] {
systemIncludeSeen[systemInclude] = true
systemIncludes = append(systemIncludes, systemInclude)
}
}
for _, quoteInclude := range quoteIncludesRaw {
if !filepath.IsAbs(quoteInclude) {
quoteInclude = filepath.Join(entry.Directory, quoteInclude)
}
if !quoteIncludeSeen[quoteInclude] {
quoteIncludeSeen[quoteInclude] = true
quoteIncludes = append(quoteIncludes, quoteInclude)
}
}
return
}
func main() {
flag.Parse()
compilationDBFile, err := os.Open(*compilationDBPath)
if err != nil {
log.Fatalf("failed to open compilation db: %v", err)
}
var compilationDB compilationDB
if err := json.NewDecoder(compilationDBFile).Decode(&compilationDB); err != nil {
log.Fatalf("failed to read compilation db: %v", err)
}
specRaw, err := os.ReadFile(*specPath)
var spec ccfixspec.CCFixSpec
if err := prototext.Unmarshal(specRaw, &spec); err != nil {
log.Fatalf("failed to load spec: %v", err)
}
isGeneratedFile := make(map[string]bool)
for _, entry := range spec.GeneratedFile {
isGeneratedFile[filepath.Join(*workspacePath, entry.Path)] = true
}
rewriteMetadata := make(rewriteMetadata)
// Iterate over all source files in the compilation database and analyze
// them one-by-one
for _, entry := range compilationDB {
quoteIncludes, systemIncludes, err := getIncludeDirs(entry)
if err != nil {
log.Println(err)
continue
}
filePath := entry.File
if !filepath.IsAbs(entry.File) {
filePath = filepath.Join(entry.Directory, entry.File)
}
includedFiles := rewriteMetadata.fixIncludesAndGetRefs(filePath, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
// seen stores the path of already-visited files, similar to #pragma once
seen := make(map[string]bool)
// rec recursively resolves includes and records rewrites
var rec func([]string)
rec = func(files []string) {
for _, f := range files {
if seen[f] {
continue
}
seen[f] = true
icf2 := rewriteMetadata.fixIncludesAndGetRefs(f, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
rec(icf2)
}
}
rec(includedFiles)
}
// Perform all recorded rewrites on the actual files
for file, rew := range rewriteMetadata {
outFile, err := os.Create(file)
if err != nil {
log.Fatalf("failed to open file for writing output: %v", err)
}
defer outFile.Close()
if _, err := rew.rewrites.replacer().WriteString(outFile, rew.source); err != nil {
log.Fatalf("failed to write file %v: %v", file, err)
}
}
}