|
|
- // Program linkpatch rewrites absolute URLs pointing to targets in GitHub in
- // Markdown link tags to target a different branch.
- //
- // This is used to update documentation links for backport branches.
- // See https://github.com/tendermint/tendermint/issues/7675 for context.
- package main
-
- import (
- "bytes"
- "flag"
- "fmt"
- "io/fs"
- "log"
- "os"
- "path/filepath"
- "regexp"
- "strings"
-
- "github.com/creachadair/atomicfile"
- )
-
- var (
- repoName = flag.String("repo", "tendermint/tendermint", "Repository name to match")
- sourceBranch = flag.String("source", "master", "Source branch name (required)")
- targetBranch = flag.String("target", "", "Target branch name (required)")
- doRecur = flag.Bool("recur", false, "Recur into subdirectories")
-
- skipPath stringList
- skipMatch regexpFlag
-
- // Match markdown links pointing to absolute URLs.
- // This only works for "inline" links, not referenced links.
- // The submetch selects the URL.
- linkRE = regexp.MustCompile(`(?m)\[.*?\]\((https?://.*?)\)`)
- )
-
- func init() {
- flag.Var(&skipPath, "skip-path", "Skip these paths (comma-separated)")
- flag.Var(&skipMatch, "skip-match", "Skip URLs matching this regexp (RE2)")
-
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, `Usage: %[1]s [options] <file/dir>...
-
- Rewrite absolute Markdown links targeting the specified GitHub repository
- and source branch name to point to the target branch instead. Matching
- files are updated in-place.
-
- Each path names either a directory to list, or a single file path to
- rewrite. By default, only the top level of a directory is scanned; use -recur
- to recur into subdirectories.
-
- Options:
- `, filepath.Base(os.Args[0]))
- flag.PrintDefaults()
- }
- }
-
- func main() {
- flag.Parse()
- switch {
- case *repoName == "":
- log.Fatal("You must specify a non-empty -repo name (org/repo)")
- case *targetBranch == "":
- log.Fatal("You must specify a non-empty -target branch")
- case *sourceBranch == "":
- log.Fatal("You must specify a non-empty -source branch")
- case *sourceBranch == *targetBranch:
- log.Fatalf("Source and target branch are the same (%q)", *sourceBranch)
- case flag.NArg() == 0:
- log.Fatal("You must specify at least one file/directory to rewrite")
- }
-
- r, err := regexp.Compile(fmt.Sprintf(`^https?://github.com/%s/(?:blob|tree)/%s`,
- *repoName, *sourceBranch))
- if err != nil {
- log.Fatalf("Compiling regexp: %v", err)
- }
- for _, path := range flag.Args() {
- if err := processPath(r, path); err != nil {
- log.Fatalf("Processing %q failed: %v", path, err)
- }
- }
- }
-
- func processPath(r *regexp.Regexp, path string) error {
- fi, err := os.Lstat(path)
- if err != nil {
- return err
- }
- if fi.Mode().IsDir() {
- return processDir(r, path)
- } else if fi.Mode().IsRegular() {
- return processFile(r, path)
- }
- return nil // nothing to do with links, device files, sockets, etc.
- }
-
- func processDir(r *regexp.Regexp, root string) error {
- return filepath.Walk(root, func(path string, fi fs.FileInfo, err error) error {
- if err != nil {
- return err
- }
- if fi.IsDir() {
- if skipPath.Contains(path) {
- log.Printf("Skipping %q (per -skip-path)", path)
- return filepath.SkipDir // explicitly skipped
- } else if !*doRecur && path != root {
- return filepath.SkipDir // skipped because we aren't recurring
- }
- return nil // nothing else to do for directories
- } else if skipPath.Contains(path) {
- log.Printf("Skipping %q (per -skip-path)", path)
- return nil // explicitly skipped
- } else if filepath.Ext(path) != ".md" {
- return nil // nothing to do for non-Markdown files
- }
-
- return processFile(r, path)
- })
- }
-
- func processFile(r *regexp.Regexp, path string) error {
- log.Printf("Processing file %q", path)
- input, err := os.ReadFile(path)
- if err != nil {
- return err
- }
-
- pos := 0
- var output bytes.Buffer
- for _, m := range linkRE.FindAllSubmatchIndex(input, -1) {
- href := string(input[m[2]:m[3]])
- u := r.FindStringIndex(href)
- if u == nil || skipMatch.MatchString(href) {
- if u != nil {
- log.Printf("Skipped URL %q (by -skip-match)", href)
- }
- output.Write(input[pos:m[1]]) // copy the existing data as-is
- pos = m[1]
- continue
- }
-
- // Copy everything before the URL as-is, then write the replacement.
- output.Write(input[pos:m[2]]) // everything up to the URL
- fmt.Fprintf(&output, `https://github.com/%s/blob/%s%s`, *repoName, *targetBranch, href[u[1]:])
-
- // Write out the tail of the match, everything after the URL.
- output.Write(input[m[3]:m[1]])
- pos = m[1]
- }
- output.Write(input[pos:]) // the rest of the file
-
- _, err = atomicfile.WriteAll(path, &output, 0644)
- return err
- }
-
- // stringList implements the flag.Value interface for a comma-separated list of strings.
- type stringList []string
-
- func (lst *stringList) Set(s string) error {
- if s == "" {
- *lst = nil
- } else {
- *lst = strings.Split(s, ",")
- }
- return nil
- }
-
- // Contains reports whether lst contains s.
- func (lst stringList) Contains(s string) bool {
- for _, elt := range lst {
- if s == elt {
- return true
- }
- }
- return false
- }
-
- func (lst stringList) String() string { return strings.Join([]string(lst), ",") }
-
- // regexpFlag implements the flag.Value interface for a regular expression.
- type regexpFlag struct{ *regexp.Regexp }
-
- func (r regexpFlag) MatchString(s string) bool {
- if r.Regexp == nil {
- return false
- }
- return r.Regexp.MatchString(s)
- }
-
- func (r *regexpFlag) Set(s string) error {
- c, err := regexp.Compile(s)
- if err != nil {
- return err
- }
- r.Regexp = c
- return nil
- }
-
- func (r regexpFlag) String() string {
- if r.Regexp == nil {
- return ""
- }
- return r.Regexp.String()
- }
|