blob: 2e6911018e991dd4596f027528537d9c9d0fdf38 [file] [log] [blame]
Lorenz Brunb60d9cb2021-02-18 17:34:00 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski216fe7b2021-05-21 18:36:16 +020017// bazel_cc_fix rewrites include directives in C and C++ code. It rewrites all
18// includes in the target workspace to be workspace-relative and additionally
19// supports rewriting includes via a prototxt-based spec file to for example
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010020// fix up includes for external libraries.
Serge Bazanski216fe7b2021-05-21 18:36:16 +020021// The rewritten code can then be used in Bazel intra- and inter-workspace
22// without dealing with any copts or include- related attributes.
23// To know where an include would resolve to it expects a compilation database
24// (see https://clang.llvm.org/docs/JSONCompilationDatabase.html) as an input.
25// It looks at all files in that database and their transitive dependencies and
26// rewrites all of them according to the include paths specified in the
27// compilation command from the database.
28// The compilation database itself is either generated by the original build
29// system or by using intercept-build, which intercepts calls to the compiler
30// and records them into a compilation database.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010031package main
32
33import (
34 "encoding/json"
35 "flag"
36 "fmt"
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010037 "log"
38 "os"
39 "path/filepath"
40 "regexp"
41 "strings"
42
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010043 "github.com/mattn/go-shellwords"
Lorenz Brun65702192023-08-31 16:27:38 +020044 "google.golang.org/protobuf/encoding/prototext"
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010045
46 "source.monogon.dev/build/bazel_cc_fix/ccfixspec"
47)
48
Serge Bazanski216fe7b2021-05-21 18:36:16 +020049// compilationDBEntry is a single entry from the compilation database which
50// represents a single compiler invocation on a C/C++ source file. It contains
51// the compiler working directory, arguments and input file path.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010052type compilationDBEntry struct {
53 Directory string `json:"directory"`
54 Command string `json:"command"`
55 Arguments []string `json:"arguments"`
56 File string `json:"file"`
57 Output string `json:"output"`
58}
59
Serge Bazanski216fe7b2021-05-21 18:36:16 +020060// compilationDB is a collection of compilationDBEntries usually stored in a
61// big JSON-serialized document.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010062// https://clang.llvm.org/docs/JSONCompilationDatabase.html
63type compilationDB []compilationDBEntry
64
Serge Bazanski216fe7b2021-05-21 18:36:16 +020065// rewrites represents a list of include rewrites with the key being the
66// original include statement (like "#include <xyz.h>", with whitespace trimmed
67// on both sides) and the value being another
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010068type rewrites map[string]string
69
Serge Bazanski216fe7b2021-05-21 18:36:16 +020070// replacer returns a strings.Replacer which efficiently performs all
71// replacements in a single pass
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010072func (r rewrites) replacer() *strings.Replacer {
73 var replacerArgs []string
74 for from, to := range r {
75 replacerArgs = append(replacerArgs, from, to)
76 }
77 return strings.NewReplacer(replacerArgs...)
78}
79
80// addWorkspace adds a rewrite from a given directive to a workspace-relative path.
81func (r rewrites) addWorkspace(oldDirective, workspaceRelativePath string) {
82 normalizedDirective := strings.TrimSpace(oldDirective)
83 replacementDirective := fmt.Sprintf("#include \"%s\"", workspaceRelativePath)
84 oldRewrite, ok := r[normalizedDirective]
85 if !ok {
86 r[normalizedDirective] = replacementDirective
87 } else if oldRewrite != replacementDirective {
88 log.Printf("WARNING: inconsistent rewrite detected: %s => %s | %s", normalizedDirective, oldRewrite, replacementDirective)
89 }
90}
91
92// Type rewriteMetadata is a map of a file path to rewrite metadata for that file
93type rewriteMetadata map[string]rewriteMetadataFile
94
95type rewriteMetadataFile struct {
96 rewrites rewrites
97 source string
98}
99
100var (
101 compilationDBPath = flag.String("compilation_db", "", "Path the the compilation_database.json file for the project")
102 workspacePath = flag.String("workspace", "", "Path to the workspace root")
103 specPath = flag.String("spec", "", "Path to the spec (ccfixspec.CCFixSpec)")
104)
105
106var (
107 reGlobalInclude = regexp.MustCompile("^-I(.*)")
108 reSystemInclude = regexp.MustCompile("^-isystem(.*)")
109 reQuoteInclude = regexp.MustCompile("^-iquote(.*)")
110)
111
112var (
113 reIncludeDirective = regexp.MustCompile(`(?m:^\s*#\s*include\s*([<"])(.*)([>"]))`)
114)
115
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200116// applyReplaceDirectives applies all directives of the given replaceType in
117// directives to originalPath and returns the resulting string. If
118// returnUnmodified is unset, it returns an empty string when no replacements
119// were performed, otherwise it returns the unmodified originalPath.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100120// The first rewrite wins, it does not do any recursive processing.
121func applyReplaceDirectives(directives []*ccfixspec.Replace, replaceType ccfixspec.Replace_Type, originalPath string, returnUnmodified bool) string {
122 for _, d := range directives {
123 if d.Type != replaceType {
124 continue
125 }
126 if d.From == originalPath {
127 return d.To
128 } else if strings.HasSuffix(d.From, "/") && strings.HasPrefix(originalPath, d.From) {
129 return d.To + strings.TrimPrefix(originalPath, d.From)
130 }
131 }
132 if returnUnmodified {
133 return originalPath
134 }
135 return ""
136}
137
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200138// findFileInWorkspace takes a path from a C include directive and uses the
139// given search path to find its absolute path. If that absolute path is
140// outside the workspace, it returns an empty string, otherwise it returns the
141// path of the file relative to the workspace. It pretends that all files in
142// isGeneratedFile exist on the filesystem.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100143func findFileInWorkspace(searchPath []string, inclFile string, isGeneratedFile map[string]bool) string {
144 var inclPath string
145 for _, path := range searchPath {
146 inclPathTry := filepath.Join(path, inclFile)
147 if isGeneratedFile[inclPathTry] {
148 inclPath = inclPathTry
149 break
150 }
151 if _, err := os.Stat(inclPathTry); err == nil {
152 inclPath = inclPathTry
153 break
154 }
155 }
156 if inclPath == "" {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200157 // We haven't found the included file. This can happen for system
158 // includes (<stdio.h>) or includes from other operating systems.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100159 return ""
160 }
161
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200162 // Ignore all include directives that don't resolve into our workspace
163 // after processing
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100164 if !filepath.HasPrefix(inclPath, *workspacePath) {
165 return ""
166 }
167
168 workspaceRelativeFilePath, err := filepath.Rel(*workspacePath, inclPath)
169 if err != nil {
170 panic(err)
171 }
172 return workspaceRelativeFilePath
173}
174
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200175// fixIncludesAndGetRefs opens a file, looks at all its includes, records
176// rewriting data into rewriteMetadata and returns all files included by the
177// file for further analysis.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100178func (m rewriteMetadata) fixIncludesAndGetRefs(filePath string, quoteIncludes, systemIncludes []string, spec *ccfixspec.CCFixSpec, isGeneratedFile map[string]bool) []string {
179 meta, ok := m[filePath]
180 if !ok {
Lorenz Brun764a2de2021-11-22 16:26:36 +0100181 cSourceRaw, err := os.ReadFile(filePath)
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100182 if err != nil {
183 log.Printf("failed to open source file: %v", err)
184 return nil
185 }
186 cSource := string(cSourceRaw)
187 m[filePath] = rewriteMetadataFile{
188 rewrites: make(rewrites),
189 source: cSource,
190 }
191 meta = m[filePath]
192 }
193 var includeFiles []string
194 // Find all include directives
195 out := reIncludeDirective.FindAllStringSubmatch(meta.source, -1)
196 for _, incl := range out {
197 inclDirective := incl[0]
198 inclType := incl[1]
199 inclFile := incl[2]
200 var workspaceRelativeFilePath string
201 var searchPath []string
202 if inclType == "\"" {
203 searchPath = quoteIncludes
204 } else if inclType == "<" {
205 searchPath = systemIncludes
206 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_SYSTEM, inclFile, false)
207 }
208 if workspaceRelativeFilePath == "" {
209 workspaceRelativeFilePath = findFileInWorkspace(searchPath, inclFile, isGeneratedFile)
210 }
211 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_WORKSPACE, workspaceRelativeFilePath, true)
212
213 // Mark generated files as generated
214 foundGenerated := isGeneratedFile[filepath.Join(*workspacePath, workspaceRelativeFilePath)]
215
216 if !foundGenerated {
217 includeFiles = append(includeFiles, filepath.Join(*workspacePath, workspaceRelativeFilePath))
218 }
219
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200220 // Pretend that a generated file exists at the given path when
221 // stripping the BuildDir prefix. This is generally true for all
222 // out-of-tree build systems and saves the user from needing to
223 // manually specify lots of GeneratedFiles.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100224 if spec.BuildDir != "" && filepath.HasPrefix(workspaceRelativeFilePath, spec.BuildDir+"/") {
225 workspaceRelativeFilePath = filepath.Clean(strings.TrimPrefix(workspaceRelativeFilePath, spec.BuildDir+"/"))
226 foundGenerated = true
227 }
228
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200229 // Shorten include paths when both files are in the same directory
230 // except when a generated file is involved as these end up in
231 // physically different locations and need to be referenced using a
232 // full workspace- relative path
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100233 if !foundGenerated && filepath.Dir(filePath) == filepath.Dir(filepath.Join(*workspacePath, workspaceRelativeFilePath)) {
234 workspaceRelativeFilePath = filepath.Base(workspaceRelativeFilePath)
235 }
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200236 // Don't perform rewrites when both include directives are semantically
237 // equivalent
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100238 if workspaceRelativeFilePath == inclFile && inclType == "\"" {
239 continue
240 }
241 meta.rewrites.addWorkspace(inclDirective, workspaceRelativeFilePath)
242 }
243 return includeFiles
244}
245
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200246// getIncludeDirs takes a compilation database entry and returns the search
247// paths for both system and quote includes
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100248func getIncludeDirs(entry compilationDBEntry) (quoteIncludes []string, systemIncludes []string, err error) {
249 // Normalize arguments
250 if len(entry.Arguments) == 0 {
251 commandArgs, err := shellwords.Parse(entry.Command)
252 if err != nil {
253 return []string{}, []string{}, fmt.Errorf("failed to parse command: %w", err)
254 }
255 entry.Arguments = commandArgs
256 }
257
258 // Parse out and generate include search paths
259 var preSystemIncludes []string
260 var systemIncludesRaw []string
261 var quoteIncludesRaw []string
262 filePath := entry.File
263 if !filepath.IsAbs(entry.File) {
264 filePath = filepath.Join(entry.Directory, entry.File)
265 }
266 quoteIncludesRaw = append(quoteIncludesRaw, filepath.Dir(filePath))
267 for i, arg := range entry.Arguments {
268 includeMatch := reGlobalInclude.FindStringSubmatch(arg)
269 if len(includeMatch) > 0 {
270 if len(includeMatch[1]) == 0 {
271 preSystemIncludes = append(preSystemIncludes, entry.Arguments[i+1])
272 } else {
273 preSystemIncludes = append(preSystemIncludes, includeMatch[1])
274 }
275 }
276 includeMatch = reSystemInclude.FindStringSubmatch(arg)
277 if len(includeMatch) > 0 {
278 if len(includeMatch[1]) == 0 {
279 systemIncludesRaw = append(systemIncludesRaw, entry.Arguments[i+1])
280 } else {
281 systemIncludesRaw = append(systemIncludesRaw, includeMatch[1])
282 }
283 }
284 includeMatch = reQuoteInclude.FindStringSubmatch(arg)
285 if len(includeMatch) > 0 {
286 if len(includeMatch[1]) == 0 {
287 quoteIncludesRaw = append(quoteIncludesRaw, entry.Arguments[i+1])
288 } else {
289 quoteIncludesRaw = append(quoteIncludesRaw, includeMatch[1])
290 }
291 }
292 }
293 systemIncludesRaw = append(preSystemIncludes, systemIncludesRaw...)
294 quoteIncludesRaw = append(quoteIncludesRaw, systemIncludesRaw...)
295
296 // Deduplicate and keep the first one
297 systemIncludeSeen := make(map[string]bool)
298 quoteIncludeSeen := make(map[string]bool)
299 for _, systemInclude := range systemIncludesRaw {
300 if !filepath.IsAbs(systemInclude) {
301 systemInclude = filepath.Join(entry.Directory, systemInclude)
302 }
303 if !systemIncludeSeen[systemInclude] {
304 systemIncludeSeen[systemInclude] = true
305 systemIncludes = append(systemIncludes, systemInclude)
306 }
307 }
308 for _, quoteInclude := range quoteIncludesRaw {
309 if !filepath.IsAbs(quoteInclude) {
310 quoteInclude = filepath.Join(entry.Directory, quoteInclude)
311 }
312 if !quoteIncludeSeen[quoteInclude] {
313 quoteIncludeSeen[quoteInclude] = true
314 quoteIncludes = append(quoteIncludes, quoteInclude)
315 }
316 }
317 return
318}
319
320func main() {
321 flag.Parse()
322 compilationDBFile, err := os.Open(*compilationDBPath)
323 if err != nil {
324 log.Fatalf("failed to open compilation db: %v", err)
325 }
326 var compilationDB compilationDB
327 if err := json.NewDecoder(compilationDBFile).Decode(&compilationDB); err != nil {
328 log.Fatalf("failed to read compilation db: %v", err)
329 }
Lorenz Brun764a2de2021-11-22 16:26:36 +0100330 specRaw, err := os.ReadFile(*specPath)
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100331 var spec ccfixspec.CCFixSpec
Lorenz Brun65702192023-08-31 16:27:38 +0200332 if err := prototext.Unmarshal(specRaw, &spec); err != nil {
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100333 log.Fatalf("failed to load spec: %v", err)
334 }
335
336 isGeneratedFile := make(map[string]bool)
337 for _, entry := range spec.GeneratedFile {
338 isGeneratedFile[filepath.Join(*workspacePath, entry.Path)] = true
339 }
340
341 rewriteMetadata := make(rewriteMetadata)
342
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200343 // Iterate over all source files in the compilation database and analyze
344 // them one-by-one
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100345 for _, entry := range compilationDB {
346 quoteIncludes, systemIncludes, err := getIncludeDirs(entry)
347 if err != nil {
348 log.Println(err)
349 continue
350 }
351 filePath := entry.File
352 if !filepath.IsAbs(entry.File) {
353 filePath = filepath.Join(entry.Directory, entry.File)
354 }
355 includedFiles := rewriteMetadata.fixIncludesAndGetRefs(filePath, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
356
357 // seen stores the path of already-visited files, similar to #pragma once
358 seen := make(map[string]bool)
359 // rec recursively resolves includes and records rewrites
360 var rec func([]string)
361 rec = func(files []string) {
362 for _, f := range files {
363 if seen[f] {
364 continue
365 }
366 seen[f] = true
367 icf2 := rewriteMetadata.fixIncludesAndGetRefs(f, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
368 rec(icf2)
369 }
370 }
371 rec(includedFiles)
372 }
373
374 // Perform all recorded rewrites on the actual files
375 for file, rew := range rewriteMetadata {
376 outFile, err := os.Create(file)
377 if err != nil {
378 log.Fatalf("failed to open file for writing output: %v", err)
379 }
380 defer outFile.Close()
381 if _, err := rew.rewrites.replacer().WriteString(outFile, rew.source); err != nil {
382 log.Fatalf("failed to write file %v: %v", file, err)
383 }
384 }
385}