blob: 1be47f7eb1cd603aef29a7bd5b57c673759c4267 [file] [log] [blame]
Lorenz Brunb60d9cb2021-02-18 17:34:00 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski216fe7b2021-05-21 18:36:16 +020017// bazel_cc_fix rewrites include directives in C and C++ code. It rewrites all
18// includes in the target workspace to be workspace-relative and additionally
19// supports rewriting includes via a prototxt-based spec file to for example
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010020// fix up includes for external libraries.
Serge Bazanski216fe7b2021-05-21 18:36:16 +020021// The rewritten code can then be used in Bazel intra- and inter-workspace
22// without dealing with any copts or include- related attributes.
23// To know where an include would resolve to it expects a compilation database
24// (see https://clang.llvm.org/docs/JSONCompilationDatabase.html) as an input.
25// It looks at all files in that database and their transitive dependencies and
26// rewrites all of them according to the include paths specified in the
27// compilation command from the database.
28// The compilation database itself is either generated by the original build
29// system or by using intercept-build, which intercepts calls to the compiler
30// and records them into a compilation database.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010031package main
32
33import (
34 "encoding/json"
35 "flag"
36 "fmt"
37 "io/ioutil"
38 "log"
39 "os"
40 "path/filepath"
41 "regexp"
42 "strings"
43
44 "github.com/golang/protobuf/proto"
45 "github.com/mattn/go-shellwords"
46
47 "source.monogon.dev/build/bazel_cc_fix/ccfixspec"
48)
49
Serge Bazanski216fe7b2021-05-21 18:36:16 +020050// compilationDBEntry is a single entry from the compilation database which
51// represents a single compiler invocation on a C/C++ source file. It contains
52// the compiler working directory, arguments and input file path.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010053type compilationDBEntry struct {
54 Directory string `json:"directory"`
55 Command string `json:"command"`
56 Arguments []string `json:"arguments"`
57 File string `json:"file"`
58 Output string `json:"output"`
59}
60
Serge Bazanski216fe7b2021-05-21 18:36:16 +020061// compilationDB is a collection of compilationDBEntries usually stored in a
62// big JSON-serialized document.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010063// https://clang.llvm.org/docs/JSONCompilationDatabase.html
64type compilationDB []compilationDBEntry
65
Serge Bazanski216fe7b2021-05-21 18:36:16 +020066// rewrites represents a list of include rewrites with the key being the
67// original include statement (like "#include <xyz.h>", with whitespace trimmed
68// on both sides) and the value being another
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010069type rewrites map[string]string
70
Serge Bazanski216fe7b2021-05-21 18:36:16 +020071// replacer returns a strings.Replacer which efficiently performs all
72// replacements in a single pass
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010073func (r rewrites) replacer() *strings.Replacer {
74 var replacerArgs []string
75 for from, to := range r {
76 replacerArgs = append(replacerArgs, from, to)
77 }
78 return strings.NewReplacer(replacerArgs...)
79}
80
81// addWorkspace adds a rewrite from a given directive to a workspace-relative path.
82func (r rewrites) addWorkspace(oldDirective, workspaceRelativePath string) {
83 normalizedDirective := strings.TrimSpace(oldDirective)
84 replacementDirective := fmt.Sprintf("#include \"%s\"", workspaceRelativePath)
85 oldRewrite, ok := r[normalizedDirective]
86 if !ok {
87 r[normalizedDirective] = replacementDirective
88 } else if oldRewrite != replacementDirective {
89 log.Printf("WARNING: inconsistent rewrite detected: %s => %s | %s", normalizedDirective, oldRewrite, replacementDirective)
90 }
91}
92
93// Type rewriteMetadata is a map of a file path to rewrite metadata for that file
94type rewriteMetadata map[string]rewriteMetadataFile
95
96type rewriteMetadataFile struct {
97 rewrites rewrites
98 source string
99}
100
101var (
102 compilationDBPath = flag.String("compilation_db", "", "Path the the compilation_database.json file for the project")
103 workspacePath = flag.String("workspace", "", "Path to the workspace root")
104 specPath = flag.String("spec", "", "Path to the spec (ccfixspec.CCFixSpec)")
105)
106
107var (
108 reGlobalInclude = regexp.MustCompile("^-I(.*)")
109 reSystemInclude = regexp.MustCompile("^-isystem(.*)")
110 reQuoteInclude = regexp.MustCompile("^-iquote(.*)")
111)
112
113var (
114 reIncludeDirective = regexp.MustCompile(`(?m:^\s*#\s*include\s*([<"])(.*)([>"]))`)
115)
116
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200117// applyReplaceDirectives applies all directives of the given replaceType in
118// directives to originalPath and returns the resulting string. If
119// returnUnmodified is unset, it returns an empty string when no replacements
120// were performed, otherwise it returns the unmodified originalPath.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100121// The first rewrite wins, it does not do any recursive processing.
122func applyReplaceDirectives(directives []*ccfixspec.Replace, replaceType ccfixspec.Replace_Type, originalPath string, returnUnmodified bool) string {
123 for _, d := range directives {
124 if d.Type != replaceType {
125 continue
126 }
127 if d.From == originalPath {
128 return d.To
129 } else if strings.HasSuffix(d.From, "/") && strings.HasPrefix(originalPath, d.From) {
130 return d.To + strings.TrimPrefix(originalPath, d.From)
131 }
132 }
133 if returnUnmodified {
134 return originalPath
135 }
136 return ""
137}
138
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200139// findFileInWorkspace takes a path from a C include directive and uses the
140// given search path to find its absolute path. If that absolute path is
141// outside the workspace, it returns an empty string, otherwise it returns the
142// path of the file relative to the workspace. It pretends that all files in
143// isGeneratedFile exist on the filesystem.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100144func findFileInWorkspace(searchPath []string, inclFile string, isGeneratedFile map[string]bool) string {
145 var inclPath string
146 for _, path := range searchPath {
147 inclPathTry := filepath.Join(path, inclFile)
148 if isGeneratedFile[inclPathTry] {
149 inclPath = inclPathTry
150 break
151 }
152 if _, err := os.Stat(inclPathTry); err == nil {
153 inclPath = inclPathTry
154 break
155 }
156 }
157 if inclPath == "" {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200158 // We haven't found the included file. This can happen for system
159 // includes (<stdio.h>) or includes from other operating systems.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100160 return ""
161 }
162
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200163 // Ignore all include directives that don't resolve into our workspace
164 // after processing
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100165 if !filepath.HasPrefix(inclPath, *workspacePath) {
166 return ""
167 }
168
169 workspaceRelativeFilePath, err := filepath.Rel(*workspacePath, inclPath)
170 if err != nil {
171 panic(err)
172 }
173 return workspaceRelativeFilePath
174}
175
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200176// fixIncludesAndGetRefs opens a file, looks at all its includes, records
177// rewriting data into rewriteMetadata and returns all files included by the
178// file for further analysis.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100179func (m rewriteMetadata) fixIncludesAndGetRefs(filePath string, quoteIncludes, systemIncludes []string, spec *ccfixspec.CCFixSpec, isGeneratedFile map[string]bool) []string {
180 meta, ok := m[filePath]
181 if !ok {
182 cSourceRaw, err := ioutil.ReadFile(filePath)
183 if err != nil {
184 log.Printf("failed to open source file: %v", err)
185 return nil
186 }
187 cSource := string(cSourceRaw)
188 m[filePath] = rewriteMetadataFile{
189 rewrites: make(rewrites),
190 source: cSource,
191 }
192 meta = m[filePath]
193 }
194 var includeFiles []string
195 // Find all include directives
196 out := reIncludeDirective.FindAllStringSubmatch(meta.source, -1)
197 for _, incl := range out {
198 inclDirective := incl[0]
199 inclType := incl[1]
200 inclFile := incl[2]
201 var workspaceRelativeFilePath string
202 var searchPath []string
203 if inclType == "\"" {
204 searchPath = quoteIncludes
205 } else if inclType == "<" {
206 searchPath = systemIncludes
207 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_SYSTEM, inclFile, false)
208 }
209 if workspaceRelativeFilePath == "" {
210 workspaceRelativeFilePath = findFileInWorkspace(searchPath, inclFile, isGeneratedFile)
211 }
212 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_WORKSPACE, workspaceRelativeFilePath, true)
213
214 // Mark generated files as generated
215 foundGenerated := isGeneratedFile[filepath.Join(*workspacePath, workspaceRelativeFilePath)]
216
217 if !foundGenerated {
218 includeFiles = append(includeFiles, filepath.Join(*workspacePath, workspaceRelativeFilePath))
219 }
220
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200221 // Pretend that a generated file exists at the given path when
222 // stripping the BuildDir prefix. This is generally true for all
223 // out-of-tree build systems and saves the user from needing to
224 // manually specify lots of GeneratedFiles.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100225 if spec.BuildDir != "" && filepath.HasPrefix(workspaceRelativeFilePath, spec.BuildDir+"/") {
226 workspaceRelativeFilePath = filepath.Clean(strings.TrimPrefix(workspaceRelativeFilePath, spec.BuildDir+"/"))
227 foundGenerated = true
228 }
229
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200230 // Shorten include paths when both files are in the same directory
231 // except when a generated file is involved as these end up in
232 // physically different locations and need to be referenced using a
233 // full workspace- relative path
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100234 if !foundGenerated && filepath.Dir(filePath) == filepath.Dir(filepath.Join(*workspacePath, workspaceRelativeFilePath)) {
235 workspaceRelativeFilePath = filepath.Base(workspaceRelativeFilePath)
236 }
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200237 // Don't perform rewrites when both include directives are semantically
238 // equivalent
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100239 if workspaceRelativeFilePath == inclFile && inclType == "\"" {
240 continue
241 }
242 meta.rewrites.addWorkspace(inclDirective, workspaceRelativeFilePath)
243 }
244 return includeFiles
245}
246
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200247// getIncludeDirs takes a compilation database entry and returns the search
248// paths for both system and quote includes
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100249func getIncludeDirs(entry compilationDBEntry) (quoteIncludes []string, systemIncludes []string, err error) {
250 // Normalize arguments
251 if len(entry.Arguments) == 0 {
252 commandArgs, err := shellwords.Parse(entry.Command)
253 if err != nil {
254 return []string{}, []string{}, fmt.Errorf("failed to parse command: %w", err)
255 }
256 entry.Arguments = commandArgs
257 }
258
259 // Parse out and generate include search paths
260 var preSystemIncludes []string
261 var systemIncludesRaw []string
262 var quoteIncludesRaw []string
263 filePath := entry.File
264 if !filepath.IsAbs(entry.File) {
265 filePath = filepath.Join(entry.Directory, entry.File)
266 }
267 quoteIncludesRaw = append(quoteIncludesRaw, filepath.Dir(filePath))
268 for i, arg := range entry.Arguments {
269 includeMatch := reGlobalInclude.FindStringSubmatch(arg)
270 if len(includeMatch) > 0 {
271 if len(includeMatch[1]) == 0 {
272 preSystemIncludes = append(preSystemIncludes, entry.Arguments[i+1])
273 } else {
274 preSystemIncludes = append(preSystemIncludes, includeMatch[1])
275 }
276 }
277 includeMatch = reSystemInclude.FindStringSubmatch(arg)
278 if len(includeMatch) > 0 {
279 if len(includeMatch[1]) == 0 {
280 systemIncludesRaw = append(systemIncludesRaw, entry.Arguments[i+1])
281 } else {
282 systemIncludesRaw = append(systemIncludesRaw, includeMatch[1])
283 }
284 }
285 includeMatch = reQuoteInclude.FindStringSubmatch(arg)
286 if len(includeMatch) > 0 {
287 if len(includeMatch[1]) == 0 {
288 quoteIncludesRaw = append(quoteIncludesRaw, entry.Arguments[i+1])
289 } else {
290 quoteIncludesRaw = append(quoteIncludesRaw, includeMatch[1])
291 }
292 }
293 }
294 systemIncludesRaw = append(preSystemIncludes, systemIncludesRaw...)
295 quoteIncludesRaw = append(quoteIncludesRaw, systemIncludesRaw...)
296
297 // Deduplicate and keep the first one
298 systemIncludeSeen := make(map[string]bool)
299 quoteIncludeSeen := make(map[string]bool)
300 for _, systemInclude := range systemIncludesRaw {
301 if !filepath.IsAbs(systemInclude) {
302 systemInclude = filepath.Join(entry.Directory, systemInclude)
303 }
304 if !systemIncludeSeen[systemInclude] {
305 systemIncludeSeen[systemInclude] = true
306 systemIncludes = append(systemIncludes, systemInclude)
307 }
308 }
309 for _, quoteInclude := range quoteIncludesRaw {
310 if !filepath.IsAbs(quoteInclude) {
311 quoteInclude = filepath.Join(entry.Directory, quoteInclude)
312 }
313 if !quoteIncludeSeen[quoteInclude] {
314 quoteIncludeSeen[quoteInclude] = true
315 quoteIncludes = append(quoteIncludes, quoteInclude)
316 }
317 }
318 return
319}
320
321func main() {
322 flag.Parse()
323 compilationDBFile, err := os.Open(*compilationDBPath)
324 if err != nil {
325 log.Fatalf("failed to open compilation db: %v", err)
326 }
327 var compilationDB compilationDB
328 if err := json.NewDecoder(compilationDBFile).Decode(&compilationDB); err != nil {
329 log.Fatalf("failed to read compilation db: %v", err)
330 }
331 specRaw, err := ioutil.ReadFile(*specPath)
332 var spec ccfixspec.CCFixSpec
333 if err := proto.UnmarshalText(string(specRaw), &spec); err != nil {
334 log.Fatalf("failed to load spec: %v", err)
335 }
336
337 isGeneratedFile := make(map[string]bool)
338 for _, entry := range spec.GeneratedFile {
339 isGeneratedFile[filepath.Join(*workspacePath, entry.Path)] = true
340 }
341
342 rewriteMetadata := make(rewriteMetadata)
343
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200344 // Iterate over all source files in the compilation database and analyze
345 // them one-by-one
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100346 for _, entry := range compilationDB {
347 quoteIncludes, systemIncludes, err := getIncludeDirs(entry)
348 if err != nil {
349 log.Println(err)
350 continue
351 }
352 filePath := entry.File
353 if !filepath.IsAbs(entry.File) {
354 filePath = filepath.Join(entry.Directory, entry.File)
355 }
356 includedFiles := rewriteMetadata.fixIncludesAndGetRefs(filePath, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
357
358 // seen stores the path of already-visited files, similar to #pragma once
359 seen := make(map[string]bool)
360 // rec recursively resolves includes and records rewrites
361 var rec func([]string)
362 rec = func(files []string) {
363 for _, f := range files {
364 if seen[f] {
365 continue
366 }
367 seen[f] = true
368 icf2 := rewriteMetadata.fixIncludesAndGetRefs(f, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
369 rec(icf2)
370 }
371 }
372 rec(includedFiles)
373 }
374
375 // Perform all recorded rewrites on the actual files
376 for file, rew := range rewriteMetadata {
377 outFile, err := os.Create(file)
378 if err != nil {
379 log.Fatalf("failed to open file for writing output: %v", err)
380 }
381 defer outFile.Close()
382 if _, err := rew.rewrites.replacer().WriteString(outFile, rew.source); err != nil {
383 log.Fatalf("failed to write file %v: %v", file, err)
384 }
385 }
386}