You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

205 lines
5.4 KiB

  1. // Program linkpatch rewrites absolute URLs pointing to targets in GitHub in
  2. // Markdown link tags to target a different branch.
  3. //
  4. // This is used to update documentation links for backport branches.
  5. // See https://github.com/tendermint/tendermint/issues/7675 for context.
  6. package main
  7. import (
  8. "bytes"
  9. "flag"
  10. "fmt"
  11. "io/fs"
  12. "log"
  13. "os"
  14. "path/filepath"
  15. "regexp"
  16. "strings"
  17. "github.com/creachadair/atomicfile"
  18. )
  19. var (
  20. repoName = flag.String("repo", "tendermint/tendermint", "Repository name to match")
  21. sourceBranch = flag.String("source", "master", "Source branch name (required)")
  22. targetBranch = flag.String("target", "", "Target branch name (required)")
  23. doRecur = flag.Bool("recur", false, "Recur into subdirectories")
  24. skipPath stringList
  25. skipMatch regexpFlag
  26. // Match markdown links pointing to absolute URLs.
  27. // This only works for "inline" links, not referenced links.
  28. // The submetch selects the URL.
  29. linkRE = regexp.MustCompile(`(?m)\[.*?\]\((https?://.*?)\)`)
  30. )
  31. func init() {
  32. flag.Var(&skipPath, "skip-path", "Skip these paths (comma-separated)")
  33. flag.Var(&skipMatch, "skip-match", "Skip URLs matching this regexp (RE2)")
  34. flag.Usage = func() {
  35. fmt.Fprintf(os.Stderr, `Usage: %[1]s [options] <file/dir>...
  36. Rewrite absolute Markdown links targeting the specified GitHub repository
  37. and source branch name to point to the target branch instead. Matching
  38. files are updated in-place.
  39. Each path names either a directory to list, or a single file path to
  40. rewrite. By default, only the top level of a directory is scanned; use -recur
  41. to recur into subdirectories.
  42. Options:
  43. `, filepath.Base(os.Args[0]))
  44. flag.PrintDefaults()
  45. }
  46. }
  47. func main() {
  48. flag.Parse()
  49. switch {
  50. case *repoName == "":
  51. log.Fatal("You must specify a non-empty -repo name (org/repo)")
  52. case *targetBranch == "":
  53. log.Fatal("You must specify a non-empty -target branch")
  54. case *sourceBranch == "":
  55. log.Fatal("You must specify a non-empty -source branch")
  56. case *sourceBranch == *targetBranch:
  57. log.Fatalf("Source and target branch are the same (%q)", *sourceBranch)
  58. case flag.NArg() == 0:
  59. log.Fatal("You must specify at least one file/directory to rewrite")
  60. }
  61. r, err := regexp.Compile(fmt.Sprintf(`^https?://github.com/%s/(?:blob|tree)/%s`,
  62. *repoName, *sourceBranch))
  63. if err != nil {
  64. log.Fatalf("Compiling regexp: %v", err)
  65. }
  66. for _, path := range flag.Args() {
  67. if err := processPath(r, path); err != nil {
  68. log.Fatalf("Processing %q failed: %v", path, err)
  69. }
  70. }
  71. }
  72. func processPath(r *regexp.Regexp, path string) error {
  73. fi, err := os.Lstat(path)
  74. if err != nil {
  75. return err
  76. }
  77. if fi.Mode().IsDir() {
  78. return processDir(r, path)
  79. } else if fi.Mode().IsRegular() {
  80. return processFile(r, path)
  81. }
  82. return nil // nothing to do with links, device files, sockets, etc.
  83. }
  84. func processDir(r *regexp.Regexp, root string) error {
  85. return filepath.Walk(root, func(path string, fi fs.FileInfo, err error) error {
  86. if err != nil {
  87. return err
  88. }
  89. if fi.IsDir() {
  90. if skipPath.Contains(path) {
  91. log.Printf("Skipping %q (per -skip-path)", path)
  92. return filepath.SkipDir // explicitly skipped
  93. } else if !*doRecur && path != root {
  94. return filepath.SkipDir // skipped because we aren't recurring
  95. }
  96. return nil // nothing else to do for directories
  97. } else if skipPath.Contains(path) {
  98. log.Printf("Skipping %q (per -skip-path)", path)
  99. return nil // explicitly skipped
  100. } else if filepath.Ext(path) != ".md" {
  101. return nil // nothing to do for non-Markdown files
  102. }
  103. return processFile(r, path)
  104. })
  105. }
  106. func processFile(r *regexp.Regexp, path string) error {
  107. log.Printf("Processing file %q", path)
  108. input, err := os.ReadFile(path)
  109. if err != nil {
  110. return err
  111. }
  112. pos := 0
  113. var output bytes.Buffer
  114. for _, m := range linkRE.FindAllSubmatchIndex(input, -1) {
  115. href := string(input[m[2]:m[3]])
  116. u := r.FindStringIndex(href)
  117. if u == nil || skipMatch.MatchString(href) {
  118. if u != nil {
  119. log.Printf("Skipped URL %q (by -skip-match)", href)
  120. }
  121. output.Write(input[pos:m[1]]) // copy the existing data as-is
  122. pos = m[1]
  123. continue
  124. }
  125. // Copy everything before the URL as-is, then write the replacement.
  126. output.Write(input[pos:m[2]]) // everything up to the URL
  127. fmt.Fprintf(&output, `https://github.com/%s/blob/%s%s`, *repoName, *targetBranch, href[u[1]:])
  128. // Write out the tail of the match, everything after the URL.
  129. output.Write(input[m[3]:m[1]])
  130. pos = m[1]
  131. }
  132. output.Write(input[pos:]) // the rest of the file
  133. _, err = atomicfile.WriteAll(path, &output, 0644)
  134. return err
  135. }
  136. // stringList implements the flag.Value interface for a comma-separated list of strings.
  137. type stringList []string
  138. func (lst *stringList) Set(s string) error {
  139. if s == "" {
  140. *lst = nil
  141. } else {
  142. *lst = strings.Split(s, ",")
  143. }
  144. return nil
  145. }
  146. // Contains reports whether lst contains s.
  147. func (lst stringList) Contains(s string) bool {
  148. for _, elt := range lst {
  149. if s == elt {
  150. return true
  151. }
  152. }
  153. return false
  154. }
  155. func (lst stringList) String() string { return strings.Join([]string(lst), ",") }
  156. // regexpFlag implements the flag.Value interface for a regular expression.
  157. type regexpFlag struct{ *regexp.Regexp }
  158. func (r regexpFlag) MatchString(s string) bool {
  159. if r.Regexp == nil {
  160. return false
  161. }
  162. return r.Regexp.MatchString(s)
  163. }
  164. func (r *regexpFlag) Set(s string) error {
  165. c, err := regexp.Compile(s)
  166. if err != nil {
  167. return err
  168. }
  169. r.Regexp = c
  170. return nil
  171. }
  172. func (r regexpFlag) String() string {
  173. if r.Regexp == nil {
  174. return ""
  175. }
  176. return r.Regexp.String()
  177. }