blob: b0d625b1b47ab6ebd49b7f7549b3070cfb3b30a4 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +01002// SPDX-License-Identifier: Apache-2.0
Lorenz Brunb60d9cb2021-02-18 17:34:00 +01003
Serge Bazanski216fe7b2021-05-21 18:36:16 +02004// bazel_cc_fix rewrites include directives in C and C++ code. It rewrites all
5// includes in the target workspace to be workspace-relative and additionally
6// supports rewriting includes via a prototxt-based spec file to for example
Lorenz Brunb60d9cb2021-02-18 17:34:00 +01007// fix up includes for external libraries.
Serge Bazanski216fe7b2021-05-21 18:36:16 +02008// The rewritten code can then be used in Bazel intra- and inter-workspace
9// without dealing with any copts or include- related attributes.
10// To know where an include would resolve to it expects a compilation database
11// (see https://clang.llvm.org/docs/JSONCompilationDatabase.html) as an input.
12// It looks at all files in that database and their transitive dependencies and
13// rewrites all of them according to the include paths specified in the
14// compilation command from the database.
15// The compilation database itself is either generated by the original build
16// system or by using intercept-build, which intercepts calls to the compiler
17// and records them into a compilation database.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010018package main
19
20import (
21 "encoding/json"
22 "flag"
23 "fmt"
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010024 "log"
25 "os"
26 "path/filepath"
27 "regexp"
28 "strings"
29
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010030 "github.com/mattn/go-shellwords"
Lorenz Brun65702192023-08-31 16:27:38 +020031 "google.golang.org/protobuf/encoding/prototext"
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010032
33 "source.monogon.dev/build/bazel_cc_fix/ccfixspec"
34)
35
Serge Bazanski216fe7b2021-05-21 18:36:16 +020036// compilationDBEntry is a single entry from the compilation database which
37// represents a single compiler invocation on a C/C++ source file. It contains
38// the compiler working directory, arguments and input file path.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010039type compilationDBEntry struct {
40 Directory string `json:"directory"`
41 Command string `json:"command"`
42 Arguments []string `json:"arguments"`
43 File string `json:"file"`
44 Output string `json:"output"`
45}
46
Serge Bazanski216fe7b2021-05-21 18:36:16 +020047// compilationDB is a collection of compilationDBEntries usually stored in a
48// big JSON-serialized document.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010049// https://clang.llvm.org/docs/JSONCompilationDatabase.html
50type compilationDB []compilationDBEntry
51
Serge Bazanski216fe7b2021-05-21 18:36:16 +020052// rewrites represents a list of include rewrites with the key being the
53// original include statement (like "#include <xyz.h>", with whitespace trimmed
54// on both sides) and the value being another
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010055type rewrites map[string]string
56
Serge Bazanski216fe7b2021-05-21 18:36:16 +020057// replacer returns a strings.Replacer which efficiently performs all
58// replacements in a single pass
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010059func (r rewrites) replacer() *strings.Replacer {
60 var replacerArgs []string
61 for from, to := range r {
62 replacerArgs = append(replacerArgs, from, to)
63 }
64 return strings.NewReplacer(replacerArgs...)
65}
66
67// addWorkspace adds a rewrite from a given directive to a workspace-relative path.
68func (r rewrites) addWorkspace(oldDirective, workspaceRelativePath string) {
69 normalizedDirective := strings.TrimSpace(oldDirective)
70 replacementDirective := fmt.Sprintf("#include \"%s\"", workspaceRelativePath)
71 oldRewrite, ok := r[normalizedDirective]
72 if !ok {
73 r[normalizedDirective] = replacementDirective
74 } else if oldRewrite != replacementDirective {
75 log.Printf("WARNING: inconsistent rewrite detected: %s => %s | %s", normalizedDirective, oldRewrite, replacementDirective)
76 }
77}
78
79// Type rewriteMetadata is a map of a file path to rewrite metadata for that file
80type rewriteMetadata map[string]rewriteMetadataFile
81
82type rewriteMetadataFile struct {
83 rewrites rewrites
84 source string
85}
86
87var (
88 compilationDBPath = flag.String("compilation_db", "", "Path the the compilation_database.json file for the project")
89 workspacePath = flag.String("workspace", "", "Path to the workspace root")
90 specPath = flag.String("spec", "", "Path to the spec (ccfixspec.CCFixSpec)")
91)
92
93var (
94 reGlobalInclude = regexp.MustCompile("^-I(.*)")
95 reSystemInclude = regexp.MustCompile("^-isystem(.*)")
96 reQuoteInclude = regexp.MustCompile("^-iquote(.*)")
97)
98
99var (
100 reIncludeDirective = regexp.MustCompile(`(?m:^\s*#\s*include\s*([<"])(.*)([>"]))`)
101)
102
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200103// applyReplaceDirectives applies all directives of the given replaceType in
104// directives to originalPath and returns the resulting string. If
105// returnUnmodified is unset, it returns an empty string when no replacements
106// were performed, otherwise it returns the unmodified originalPath.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100107// The first rewrite wins, it does not do any recursive processing.
108func applyReplaceDirectives(directives []*ccfixspec.Replace, replaceType ccfixspec.Replace_Type, originalPath string, returnUnmodified bool) string {
109 for _, d := range directives {
110 if d.Type != replaceType {
111 continue
112 }
113 if d.From == originalPath {
114 return d.To
115 } else if strings.HasSuffix(d.From, "/") && strings.HasPrefix(originalPath, d.From) {
116 return d.To + strings.TrimPrefix(originalPath, d.From)
117 }
118 }
119 if returnUnmodified {
120 return originalPath
121 }
122 return ""
123}
124
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200125// findFileInWorkspace takes a path from a C include directive and uses the
126// given search path to find its absolute path. If that absolute path is
127// outside the workspace, it returns an empty string, otherwise it returns the
128// path of the file relative to the workspace. It pretends that all files in
129// isGeneratedFile exist on the filesystem.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100130func findFileInWorkspace(searchPath []string, inclFile string, isGeneratedFile map[string]bool) string {
131 var inclPath string
132 for _, path := range searchPath {
133 inclPathTry := filepath.Join(path, inclFile)
134 if isGeneratedFile[inclPathTry] {
135 inclPath = inclPathTry
136 break
137 }
138 if _, err := os.Stat(inclPathTry); err == nil {
139 inclPath = inclPathTry
140 break
141 }
142 }
143 if inclPath == "" {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200144 // We haven't found the included file. This can happen for system
145 // includes (<stdio.h>) or includes from other operating systems.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100146 return ""
147 }
148
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200149 // Ignore all include directives that don't resolve into our workspace
150 // after processing
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100151 if !filepath.HasPrefix(inclPath, *workspacePath) {
152 return ""
153 }
154
155 workspaceRelativeFilePath, err := filepath.Rel(*workspacePath, inclPath)
156 if err != nil {
157 panic(err)
158 }
159 return workspaceRelativeFilePath
160}
161
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200162// fixIncludesAndGetRefs opens a file, looks at all its includes, records
163// rewriting data into rewriteMetadata and returns all files included by the
164// file for further analysis.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100165func (m rewriteMetadata) fixIncludesAndGetRefs(filePath string, quoteIncludes, systemIncludes []string, spec *ccfixspec.CCFixSpec, isGeneratedFile map[string]bool) []string {
166 meta, ok := m[filePath]
167 if !ok {
Lorenz Brun764a2de2021-11-22 16:26:36 +0100168 cSourceRaw, err := os.ReadFile(filePath)
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100169 if err != nil {
170 log.Printf("failed to open source file: %v", err)
171 return nil
172 }
173 cSource := string(cSourceRaw)
174 m[filePath] = rewriteMetadataFile{
175 rewrites: make(rewrites),
176 source: cSource,
177 }
178 meta = m[filePath]
179 }
180 var includeFiles []string
181 // Find all include directives
182 out := reIncludeDirective.FindAllStringSubmatch(meta.source, -1)
183 for _, incl := range out {
184 inclDirective := incl[0]
185 inclType := incl[1]
186 inclFile := incl[2]
187 var workspaceRelativeFilePath string
188 var searchPath []string
189 if inclType == "\"" {
190 searchPath = quoteIncludes
191 } else if inclType == "<" {
192 searchPath = systemIncludes
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100193 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_TYPE_SYSTEM, inclFile, false)
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100194 }
195 if workspaceRelativeFilePath == "" {
196 workspaceRelativeFilePath = findFileInWorkspace(searchPath, inclFile, isGeneratedFile)
197 }
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100198 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_TYPE_WORKSPACE, workspaceRelativeFilePath, true)
Lorenz Brun1b1d95d2024-08-21 17:35:23 +0200199 if workspaceRelativeFilePath == "" {
200 continue
201 }
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100202
203 // Mark generated files as generated
204 foundGenerated := isGeneratedFile[filepath.Join(*workspacePath, workspaceRelativeFilePath)]
205
206 if !foundGenerated {
207 includeFiles = append(includeFiles, filepath.Join(*workspacePath, workspaceRelativeFilePath))
208 }
209
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200210 // Pretend that a generated file exists at the given path when
211 // stripping the BuildDir prefix. This is generally true for all
212 // out-of-tree build systems and saves the user from needing to
213 // manually specify lots of GeneratedFiles.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100214 if spec.BuildDir != "" && filepath.HasPrefix(workspaceRelativeFilePath, spec.BuildDir+"/") {
215 workspaceRelativeFilePath = filepath.Clean(strings.TrimPrefix(workspaceRelativeFilePath, spec.BuildDir+"/"))
216 foundGenerated = true
217 }
218
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200219 // Shorten include paths when both files are in the same directory
220 // except when a generated file is involved as these end up in
221 // physically different locations and need to be referenced using a
222 // full workspace- relative path
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100223 if !foundGenerated && filepath.Dir(filePath) == filepath.Dir(filepath.Join(*workspacePath, workspaceRelativeFilePath)) {
224 workspaceRelativeFilePath = filepath.Base(workspaceRelativeFilePath)
225 }
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200226 // Don't perform rewrites when both include directives are semantically
227 // equivalent
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100228 if workspaceRelativeFilePath == inclFile && inclType == "\"" {
229 continue
230 }
231 meta.rewrites.addWorkspace(inclDirective, workspaceRelativeFilePath)
232 }
233 return includeFiles
234}
235
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200236// getIncludeDirs takes a compilation database entry and returns the search
237// paths for both system and quote includes
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100238func getIncludeDirs(entry compilationDBEntry) (quoteIncludes []string, systemIncludes []string, err error) {
239 // Normalize arguments
240 if len(entry.Arguments) == 0 {
241 commandArgs, err := shellwords.Parse(entry.Command)
242 if err != nil {
Tim Windelschmidt3074ec62024-04-23 15:08:05 +0200243 return nil, nil, fmt.Errorf("failed to parse command: %w", err)
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100244 }
245 entry.Arguments = commandArgs
246 }
247
248 // Parse out and generate include search paths
249 var preSystemIncludes []string
250 var systemIncludesRaw []string
251 var quoteIncludesRaw []string
252 filePath := entry.File
253 if !filepath.IsAbs(entry.File) {
254 filePath = filepath.Join(entry.Directory, entry.File)
255 }
256 quoteIncludesRaw = append(quoteIncludesRaw, filepath.Dir(filePath))
257 for i, arg := range entry.Arguments {
258 includeMatch := reGlobalInclude.FindStringSubmatch(arg)
259 if len(includeMatch) > 0 {
260 if len(includeMatch[1]) == 0 {
261 preSystemIncludes = append(preSystemIncludes, entry.Arguments[i+1])
262 } else {
263 preSystemIncludes = append(preSystemIncludes, includeMatch[1])
264 }
265 }
266 includeMatch = reSystemInclude.FindStringSubmatch(arg)
267 if len(includeMatch) > 0 {
268 if len(includeMatch[1]) == 0 {
269 systemIncludesRaw = append(systemIncludesRaw, entry.Arguments[i+1])
270 } else {
271 systemIncludesRaw = append(systemIncludesRaw, includeMatch[1])
272 }
273 }
274 includeMatch = reQuoteInclude.FindStringSubmatch(arg)
275 if len(includeMatch) > 0 {
276 if len(includeMatch[1]) == 0 {
277 quoteIncludesRaw = append(quoteIncludesRaw, entry.Arguments[i+1])
278 } else {
279 quoteIncludesRaw = append(quoteIncludesRaw, includeMatch[1])
280 }
281 }
282 }
283 systemIncludesRaw = append(preSystemIncludes, systemIncludesRaw...)
284 quoteIncludesRaw = append(quoteIncludesRaw, systemIncludesRaw...)
285
286 // Deduplicate and keep the first one
287 systemIncludeSeen := make(map[string]bool)
288 quoteIncludeSeen := make(map[string]bool)
289 for _, systemInclude := range systemIncludesRaw {
290 if !filepath.IsAbs(systemInclude) {
291 systemInclude = filepath.Join(entry.Directory, systemInclude)
292 }
293 if !systemIncludeSeen[systemInclude] {
294 systemIncludeSeen[systemInclude] = true
295 systemIncludes = append(systemIncludes, systemInclude)
296 }
297 }
298 for _, quoteInclude := range quoteIncludesRaw {
299 if !filepath.IsAbs(quoteInclude) {
300 quoteInclude = filepath.Join(entry.Directory, quoteInclude)
301 }
302 if !quoteIncludeSeen[quoteInclude] {
303 quoteIncludeSeen[quoteInclude] = true
304 quoteIncludes = append(quoteIncludes, quoteInclude)
305 }
306 }
307 return
308}
309
310func main() {
311 flag.Parse()
312 compilationDBFile, err := os.Open(*compilationDBPath)
313 if err != nil {
314 log.Fatalf("failed to open compilation db: %v", err)
315 }
316 var compilationDB compilationDB
317 if err := json.NewDecoder(compilationDBFile).Decode(&compilationDB); err != nil {
318 log.Fatalf("failed to read compilation db: %v", err)
319 }
Lorenz Brun764a2de2021-11-22 16:26:36 +0100320 specRaw, err := os.ReadFile(*specPath)
Tim Windelschmidt096654a2024-04-18 23:10:19 +0200321 if err != nil {
322 log.Fatalf("failed to read spec file: %v", err)
323 }
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100324 var spec ccfixspec.CCFixSpec
Lorenz Brun65702192023-08-31 16:27:38 +0200325 if err := prototext.Unmarshal(specRaw, &spec); err != nil {
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100326 log.Fatalf("failed to load spec: %v", err)
327 }
328
329 isGeneratedFile := make(map[string]bool)
330 for _, entry := range spec.GeneratedFile {
331 isGeneratedFile[filepath.Join(*workspacePath, entry.Path)] = true
332 }
333
334 rewriteMetadata := make(rewriteMetadata)
335
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200336 // Iterate over all source files in the compilation database and analyze
337 // them one-by-one
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100338 for _, entry := range compilationDB {
339 quoteIncludes, systemIncludes, err := getIncludeDirs(entry)
340 if err != nil {
341 log.Println(err)
342 continue
343 }
344 filePath := entry.File
345 if !filepath.IsAbs(entry.File) {
346 filePath = filepath.Join(entry.Directory, entry.File)
347 }
348 includedFiles := rewriteMetadata.fixIncludesAndGetRefs(filePath, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
349
350 // seen stores the path of already-visited files, similar to #pragma once
351 seen := make(map[string]bool)
352 // rec recursively resolves includes and records rewrites
353 var rec func([]string)
354 rec = func(files []string) {
355 for _, f := range files {
356 if seen[f] {
357 continue
358 }
359 seen[f] = true
360 icf2 := rewriteMetadata.fixIncludesAndGetRefs(f, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
361 rec(icf2)
362 }
363 }
364 rec(includedFiles)
365 }
366
367 // Perform all recorded rewrites on the actual files
368 for file, rew := range rewriteMetadata {
369 outFile, err := os.Create(file)
370 if err != nil {
371 log.Fatalf("failed to open file for writing output: %v", err)
372 }
373 defer outFile.Close()
374 if _, err := rew.rewrites.replacer().WriteString(outFile, rew.source); err != nil {
375 log.Fatalf("failed to write file %v: %v", file, err)
376 }
377 }
378}