blob: a83374d3387bf4dca6f5b90c895e544084897668 [file] [log] [blame]
Lorenz Brunb60d9cb2021-02-18 17:34:00 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski216fe7b2021-05-21 18:36:16 +020017// bazel_cc_fix rewrites include directives in C and C++ code. It rewrites all
18// includes in the target workspace to be workspace-relative and additionally
19// supports rewriting includes via a prototxt-based spec file to for example
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010020// fix up includes for external libraries.
Serge Bazanski216fe7b2021-05-21 18:36:16 +020021// The rewritten code can then be used in Bazel intra- and inter-workspace
22// without dealing with any copts or include- related attributes.
23// To know where an include would resolve to it expects a compilation database
24// (see https://clang.llvm.org/docs/JSONCompilationDatabase.html) as an input.
25// It looks at all files in that database and their transitive dependencies and
26// rewrites all of them according to the include paths specified in the
27// compilation command from the database.
28// The compilation database itself is either generated by the original build
29// system or by using intercept-build, which intercepts calls to the compiler
30// and records them into a compilation database.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010031package main
32
33import (
34 "encoding/json"
35 "flag"
36 "fmt"
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010037 "log"
38 "os"
39 "path/filepath"
40 "regexp"
41 "strings"
42
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010043 "github.com/mattn/go-shellwords"
Lorenz Brun65702192023-08-31 16:27:38 +020044 "google.golang.org/protobuf/encoding/prototext"
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010045
46 "source.monogon.dev/build/bazel_cc_fix/ccfixspec"
47)
48
Serge Bazanski216fe7b2021-05-21 18:36:16 +020049// compilationDBEntry is a single entry from the compilation database which
50// represents a single compiler invocation on a C/C++ source file. It contains
51// the compiler working directory, arguments and input file path.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010052type compilationDBEntry struct {
53 Directory string `json:"directory"`
54 Command string `json:"command"`
55 Arguments []string `json:"arguments"`
56 File string `json:"file"`
57 Output string `json:"output"`
58}
59
Serge Bazanski216fe7b2021-05-21 18:36:16 +020060// compilationDB is a collection of compilationDBEntries usually stored in a
61// big JSON-serialized document.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010062// https://clang.llvm.org/docs/JSONCompilationDatabase.html
63type compilationDB []compilationDBEntry
64
Serge Bazanski216fe7b2021-05-21 18:36:16 +020065// rewrites represents a list of include rewrites with the key being the
66// original include statement (like "#include <xyz.h>", with whitespace trimmed
67// on both sides) and the value being another
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010068type rewrites map[string]string
69
Serge Bazanski216fe7b2021-05-21 18:36:16 +020070// replacer returns a strings.Replacer which efficiently performs all
71// replacements in a single pass
Lorenz Brunb60d9cb2021-02-18 17:34:00 +010072func (r rewrites) replacer() *strings.Replacer {
73 var replacerArgs []string
74 for from, to := range r {
75 replacerArgs = append(replacerArgs, from, to)
76 }
77 return strings.NewReplacer(replacerArgs...)
78}
79
80// addWorkspace adds a rewrite from a given directive to a workspace-relative path.
81func (r rewrites) addWorkspace(oldDirective, workspaceRelativePath string) {
82 normalizedDirective := strings.TrimSpace(oldDirective)
83 replacementDirective := fmt.Sprintf("#include \"%s\"", workspaceRelativePath)
84 oldRewrite, ok := r[normalizedDirective]
85 if !ok {
86 r[normalizedDirective] = replacementDirective
87 } else if oldRewrite != replacementDirective {
88 log.Printf("WARNING: inconsistent rewrite detected: %s => %s | %s", normalizedDirective, oldRewrite, replacementDirective)
89 }
90}
91
92// Type rewriteMetadata is a map of a file path to rewrite metadata for that file
93type rewriteMetadata map[string]rewriteMetadataFile
94
95type rewriteMetadataFile struct {
96 rewrites rewrites
97 source string
98}
99
100var (
101 compilationDBPath = flag.String("compilation_db", "", "Path the the compilation_database.json file for the project")
102 workspacePath = flag.String("workspace", "", "Path to the workspace root")
103 specPath = flag.String("spec", "", "Path to the spec (ccfixspec.CCFixSpec)")
104)
105
106var (
107 reGlobalInclude = regexp.MustCompile("^-I(.*)")
108 reSystemInclude = regexp.MustCompile("^-isystem(.*)")
109 reQuoteInclude = regexp.MustCompile("^-iquote(.*)")
110)
111
112var (
113 reIncludeDirective = regexp.MustCompile(`(?m:^\s*#\s*include\s*([<"])(.*)([>"]))`)
114)
115
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200116// applyReplaceDirectives applies all directives of the given replaceType in
117// directives to originalPath and returns the resulting string. If
118// returnUnmodified is unset, it returns an empty string when no replacements
119// were performed, otherwise it returns the unmodified originalPath.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100120// The first rewrite wins, it does not do any recursive processing.
121func applyReplaceDirectives(directives []*ccfixspec.Replace, replaceType ccfixspec.Replace_Type, originalPath string, returnUnmodified bool) string {
122 for _, d := range directives {
123 if d.Type != replaceType {
124 continue
125 }
126 if d.From == originalPath {
127 return d.To
128 } else if strings.HasSuffix(d.From, "/") && strings.HasPrefix(originalPath, d.From) {
129 return d.To + strings.TrimPrefix(originalPath, d.From)
130 }
131 }
132 if returnUnmodified {
133 return originalPath
134 }
135 return ""
136}
137
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200138// findFileInWorkspace takes a path from a C include directive and uses the
139// given search path to find its absolute path. If that absolute path is
140// outside the workspace, it returns an empty string, otherwise it returns the
141// path of the file relative to the workspace. It pretends that all files in
142// isGeneratedFile exist on the filesystem.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100143func findFileInWorkspace(searchPath []string, inclFile string, isGeneratedFile map[string]bool) string {
144 var inclPath string
145 for _, path := range searchPath {
146 inclPathTry := filepath.Join(path, inclFile)
147 if isGeneratedFile[inclPathTry] {
148 inclPath = inclPathTry
149 break
150 }
151 if _, err := os.Stat(inclPathTry); err == nil {
152 inclPath = inclPathTry
153 break
154 }
155 }
156 if inclPath == "" {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200157 // We haven't found the included file. This can happen for system
158 // includes (<stdio.h>) or includes from other operating systems.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100159 return ""
160 }
161
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200162 // Ignore all include directives that don't resolve into our workspace
163 // after processing
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100164 if !filepath.HasPrefix(inclPath, *workspacePath) {
165 return ""
166 }
167
168 workspaceRelativeFilePath, err := filepath.Rel(*workspacePath, inclPath)
169 if err != nil {
170 panic(err)
171 }
172 return workspaceRelativeFilePath
173}
174
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200175// fixIncludesAndGetRefs opens a file, looks at all its includes, records
176// rewriting data into rewriteMetadata and returns all files included by the
177// file for further analysis.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100178func (m rewriteMetadata) fixIncludesAndGetRefs(filePath string, quoteIncludes, systemIncludes []string, spec *ccfixspec.CCFixSpec, isGeneratedFile map[string]bool) []string {
179 meta, ok := m[filePath]
180 if !ok {
Lorenz Brun764a2de2021-11-22 16:26:36 +0100181 cSourceRaw, err := os.ReadFile(filePath)
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100182 if err != nil {
183 log.Printf("failed to open source file: %v", err)
184 return nil
185 }
186 cSource := string(cSourceRaw)
187 m[filePath] = rewriteMetadataFile{
188 rewrites: make(rewrites),
189 source: cSource,
190 }
191 meta = m[filePath]
192 }
193 var includeFiles []string
194 // Find all include directives
195 out := reIncludeDirective.FindAllStringSubmatch(meta.source, -1)
196 for _, incl := range out {
197 inclDirective := incl[0]
198 inclType := incl[1]
199 inclFile := incl[2]
200 var workspaceRelativeFilePath string
201 var searchPath []string
202 if inclType == "\"" {
203 searchPath = quoteIncludes
204 } else if inclType == "<" {
205 searchPath = systemIncludes
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100206 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_TYPE_SYSTEM, inclFile, false)
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100207 }
208 if workspaceRelativeFilePath == "" {
209 workspaceRelativeFilePath = findFileInWorkspace(searchPath, inclFile, isGeneratedFile)
210 }
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100211 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_TYPE_WORKSPACE, workspaceRelativeFilePath, true)
Lorenz Brun1b1d95d2024-08-21 17:35:23 +0200212 if workspaceRelativeFilePath == "" {
213 continue
214 }
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100215
216 // Mark generated files as generated
217 foundGenerated := isGeneratedFile[filepath.Join(*workspacePath, workspaceRelativeFilePath)]
218
219 if !foundGenerated {
220 includeFiles = append(includeFiles, filepath.Join(*workspacePath, workspaceRelativeFilePath))
221 }
222
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200223 // Pretend that a generated file exists at the given path when
224 // stripping the BuildDir prefix. This is generally true for all
225 // out-of-tree build systems and saves the user from needing to
226 // manually specify lots of GeneratedFiles.
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100227 if spec.BuildDir != "" && filepath.HasPrefix(workspaceRelativeFilePath, spec.BuildDir+"/") {
228 workspaceRelativeFilePath = filepath.Clean(strings.TrimPrefix(workspaceRelativeFilePath, spec.BuildDir+"/"))
229 foundGenerated = true
230 }
231
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200232 // Shorten include paths when both files are in the same directory
233 // except when a generated file is involved as these end up in
234 // physically different locations and need to be referenced using a
235 // full workspace- relative path
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100236 if !foundGenerated && filepath.Dir(filePath) == filepath.Dir(filepath.Join(*workspacePath, workspaceRelativeFilePath)) {
237 workspaceRelativeFilePath = filepath.Base(workspaceRelativeFilePath)
238 }
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200239 // Don't perform rewrites when both include directives are semantically
240 // equivalent
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100241 if workspaceRelativeFilePath == inclFile && inclType == "\"" {
242 continue
243 }
244 meta.rewrites.addWorkspace(inclDirective, workspaceRelativeFilePath)
245 }
246 return includeFiles
247}
248
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200249// getIncludeDirs takes a compilation database entry and returns the search
250// paths for both system and quote includes
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100251func getIncludeDirs(entry compilationDBEntry) (quoteIncludes []string, systemIncludes []string, err error) {
252 // Normalize arguments
253 if len(entry.Arguments) == 0 {
254 commandArgs, err := shellwords.Parse(entry.Command)
255 if err != nil {
Tim Windelschmidt3074ec62024-04-23 15:08:05 +0200256 return nil, nil, fmt.Errorf("failed to parse command: %w", err)
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100257 }
258 entry.Arguments = commandArgs
259 }
260
261 // Parse out and generate include search paths
262 var preSystemIncludes []string
263 var systemIncludesRaw []string
264 var quoteIncludesRaw []string
265 filePath := entry.File
266 if !filepath.IsAbs(entry.File) {
267 filePath = filepath.Join(entry.Directory, entry.File)
268 }
269 quoteIncludesRaw = append(quoteIncludesRaw, filepath.Dir(filePath))
270 for i, arg := range entry.Arguments {
271 includeMatch := reGlobalInclude.FindStringSubmatch(arg)
272 if len(includeMatch) > 0 {
273 if len(includeMatch[1]) == 0 {
274 preSystemIncludes = append(preSystemIncludes, entry.Arguments[i+1])
275 } else {
276 preSystemIncludes = append(preSystemIncludes, includeMatch[1])
277 }
278 }
279 includeMatch = reSystemInclude.FindStringSubmatch(arg)
280 if len(includeMatch) > 0 {
281 if len(includeMatch[1]) == 0 {
282 systemIncludesRaw = append(systemIncludesRaw, entry.Arguments[i+1])
283 } else {
284 systemIncludesRaw = append(systemIncludesRaw, includeMatch[1])
285 }
286 }
287 includeMatch = reQuoteInclude.FindStringSubmatch(arg)
288 if len(includeMatch) > 0 {
289 if len(includeMatch[1]) == 0 {
290 quoteIncludesRaw = append(quoteIncludesRaw, entry.Arguments[i+1])
291 } else {
292 quoteIncludesRaw = append(quoteIncludesRaw, includeMatch[1])
293 }
294 }
295 }
296 systemIncludesRaw = append(preSystemIncludes, systemIncludesRaw...)
297 quoteIncludesRaw = append(quoteIncludesRaw, systemIncludesRaw...)
298
299 // Deduplicate and keep the first one
300 systemIncludeSeen := make(map[string]bool)
301 quoteIncludeSeen := make(map[string]bool)
302 for _, systemInclude := range systemIncludesRaw {
303 if !filepath.IsAbs(systemInclude) {
304 systemInclude = filepath.Join(entry.Directory, systemInclude)
305 }
306 if !systemIncludeSeen[systemInclude] {
307 systemIncludeSeen[systemInclude] = true
308 systemIncludes = append(systemIncludes, systemInclude)
309 }
310 }
311 for _, quoteInclude := range quoteIncludesRaw {
312 if !filepath.IsAbs(quoteInclude) {
313 quoteInclude = filepath.Join(entry.Directory, quoteInclude)
314 }
315 if !quoteIncludeSeen[quoteInclude] {
316 quoteIncludeSeen[quoteInclude] = true
317 quoteIncludes = append(quoteIncludes, quoteInclude)
318 }
319 }
320 return
321}
322
323func main() {
324 flag.Parse()
325 compilationDBFile, err := os.Open(*compilationDBPath)
326 if err != nil {
327 log.Fatalf("failed to open compilation db: %v", err)
328 }
329 var compilationDB compilationDB
330 if err := json.NewDecoder(compilationDBFile).Decode(&compilationDB); err != nil {
331 log.Fatalf("failed to read compilation db: %v", err)
332 }
Lorenz Brun764a2de2021-11-22 16:26:36 +0100333 specRaw, err := os.ReadFile(*specPath)
Tim Windelschmidt096654a2024-04-18 23:10:19 +0200334 if err != nil {
335 log.Fatalf("failed to read spec file: %v", err)
336 }
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100337 var spec ccfixspec.CCFixSpec
Lorenz Brun65702192023-08-31 16:27:38 +0200338 if err := prototext.Unmarshal(specRaw, &spec); err != nil {
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100339 log.Fatalf("failed to load spec: %v", err)
340 }
341
342 isGeneratedFile := make(map[string]bool)
343 for _, entry := range spec.GeneratedFile {
344 isGeneratedFile[filepath.Join(*workspacePath, entry.Path)] = true
345 }
346
347 rewriteMetadata := make(rewriteMetadata)
348
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200349 // Iterate over all source files in the compilation database and analyze
350 // them one-by-one
Lorenz Brunb60d9cb2021-02-18 17:34:00 +0100351 for _, entry := range compilationDB {
352 quoteIncludes, systemIncludes, err := getIncludeDirs(entry)
353 if err != nil {
354 log.Println(err)
355 continue
356 }
357 filePath := entry.File
358 if !filepath.IsAbs(entry.File) {
359 filePath = filepath.Join(entry.Directory, entry.File)
360 }
361 includedFiles := rewriteMetadata.fixIncludesAndGetRefs(filePath, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
362
363 // seen stores the path of already-visited files, similar to #pragma once
364 seen := make(map[string]bool)
365 // rec recursively resolves includes and records rewrites
366 var rec func([]string)
367 rec = func(files []string) {
368 for _, f := range files {
369 if seen[f] {
370 continue
371 }
372 seen[f] = true
373 icf2 := rewriteMetadata.fixIncludesAndGetRefs(f, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
374 rec(icf2)
375 }
376 }
377 rec(includedFiles)
378 }
379
380 // Perform all recorded rewrites on the actual files
381 for file, rew := range rewriteMetadata {
382 outFile, err := os.Create(file)
383 if err != nil {
384 log.Fatalf("failed to open file for writing output: %v", err)
385 }
386 defer outFile.Close()
387 if _, err := rew.rewrites.replacer().WriteString(outFile, rew.source); err != nil {
388 log.Fatalf("failed to write file %v: %v", file, err)
389 }
390 }
391}