// Program linkpatch rewrites absolute URLs pointing to targets in GitHub in // Markdown link tags to target a different branch. // // This is used to update documentation links for backport branches. // See https://github.com/tendermint/tendermint/issues/7675 for context. package main import ( "bytes" "flag" "fmt" "io/fs" "log" "os" "path/filepath" "regexp" "strings" "github.com/creachadair/atomicfile" ) var ( repoName = flag.String("repo", "tendermint/tendermint", "Repository name to match") sourceBranch = flag.String("source", "master", "Source branch name (required)") targetBranch = flag.String("target", "", "Target branch name (required)") doRecur = flag.Bool("recur", false, "Recur into subdirectories") skipPath stringList skipMatch regexpFlag // Match markdown links pointing to absolute URLs. // This only works for "inline" links, not referenced links. // The submetch selects the URL. linkRE = regexp.MustCompile(`(?m)\[.*?\]\((https?://.*?)\)`) ) func init() { flag.Var(&skipPath, "skip-path", "Skip these paths (comma-separated)") flag.Var(&skipMatch, "skip-match", "Skip URLs matching this regexp (RE2)") flag.Usage = func() { fmt.Fprintf(os.Stderr, `Usage: %[1]s [options] ... Rewrite absolute Markdown links targeting the specified GitHub repository and source branch name to point to the target branch instead. Matching files are updated in-place. Each path names either a directory to list, or a single file path to rewrite. By default, only the top level of a directory is scanned; use -recur to recur into subdirectories. Options: `, filepath.Base(os.Args[0])) flag.PrintDefaults() } } func main() { flag.Parse() switch { case *repoName == "": log.Fatal("You must specify a non-empty -repo name (org/repo)") case *targetBranch == "": log.Fatal("You must specify a non-empty -target branch") case *sourceBranch == "": log.Fatal("You must specify a non-empty -source branch") case *sourceBranch == *targetBranch: log.Fatalf("Source and target branch are the same (%q)", *sourceBranch) case flag.NArg() == 0: log.Fatal("You must specify at least one file/directory to rewrite") } r, err := regexp.Compile(fmt.Sprintf(`^https?://github.com/%s/(?:blob|tree)/%s`, *repoName, *sourceBranch)) if err != nil { log.Fatalf("Compiling regexp: %v", err) } for _, path := range flag.Args() { if err := processPath(r, path); err != nil { log.Fatalf("Processing %q failed: %v", path, err) } } } func processPath(r *regexp.Regexp, path string) error { fi, err := os.Lstat(path) if err != nil { return err } if fi.Mode().IsDir() { return processDir(r, path) } else if fi.Mode().IsRegular() { return processFile(r, path) } return nil // nothing to do with links, device files, sockets, etc. } func processDir(r *regexp.Regexp, root string) error { return filepath.Walk(root, func(path string, fi fs.FileInfo, err error) error { if err != nil { return err } if fi.IsDir() { if skipPath.Contains(path) { log.Printf("Skipping %q (per -skip-path)", path) return filepath.SkipDir // explicitly skipped } else if !*doRecur && path != root { return filepath.SkipDir // skipped because we aren't recurring } return nil // nothing else to do for directories } else if skipPath.Contains(path) { log.Printf("Skipping %q (per -skip-path)", path) return nil // explicitly skipped } else if filepath.Ext(path) != ".md" { return nil // nothing to do for non-Markdown files } return processFile(r, path) }) } func processFile(r *regexp.Regexp, path string) error { log.Printf("Processing file %q", path) input, err := os.ReadFile(path) if err != nil { return err } pos := 0 var output bytes.Buffer for _, m := range linkRE.FindAllSubmatchIndex(input, -1) { href := string(input[m[2]:m[3]]) u := r.FindStringIndex(href) if u == nil || skipMatch.MatchString(href) { if u != nil { log.Printf("Skipped URL %q (by -skip-match)", href) } output.Write(input[pos:m[1]]) // copy the existing data as-is pos = m[1] continue } // Copy everything before the URL as-is, then write the replacement. output.Write(input[pos:m[2]]) // everything up to the URL fmt.Fprintf(&output, `https://github.com/%s/blob/%s%s`, *repoName, *targetBranch, href[u[1]:]) // Write out the tail of the match, everything after the URL. output.Write(input[m[3]:m[1]]) pos = m[1] } output.Write(input[pos:]) // the rest of the file _, err = atomicfile.WriteAll(path, &output, 0644) return err } // stringList implements the flag.Value interface for a comma-separated list of strings. type stringList []string func (lst *stringList) Set(s string) error { if s == "" { *lst = nil } else { *lst = strings.Split(s, ",") } return nil } // Contains reports whether lst contains s. func (lst stringList) Contains(s string) bool { for _, elt := range lst { if s == elt { return true } } return false } func (lst stringList) String() string { return strings.Join([]string(lst), ",") } // regexpFlag implements the flag.Value interface for a regular expression. type regexpFlag struct{ *regexp.Regexp } func (r regexpFlag) MatchString(s string) bool { if r.Regexp == nil { return false } return r.Regexp.MatchString(s) } func (r *regexpFlag) Set(s string) error { c, err := regexp.Compile(s) if err != nil { return err } r.Regexp = c return nil } func (r regexpFlag) String() string { if r.Regexp == nil { return "" } return r.Regexp.String() }