You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

205 lines
5.4 KiB

// Program linkpatch rewrites absolute URLs pointing to targets in GitHub in
// Markdown link tags to target a different branch.
//
// This is used to update documentation links for backport branches.
// See https://github.com/tendermint/tendermint/issues/7675 for context.
package main
import (
"bytes"
"flag"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/creachadair/atomicfile"
)
var (
repoName = flag.String("repo", "tendermint/tendermint", "Repository name to match")
sourceBranch = flag.String("source", "master", "Source branch name (required)")
targetBranch = flag.String("target", "", "Target branch name (required)")
doRecur = flag.Bool("recur", false, "Recur into subdirectories")
skipPath stringList
skipMatch regexpFlag
// Match markdown links pointing to absolute URLs.
// This only works for "inline" links, not referenced links.
// The submetch selects the URL.
linkRE = regexp.MustCompile(`(?m)\[.*?\]\((https?://.*?)\)`)
)
func init() {
flag.Var(&skipPath, "skip-path", "Skip these paths (comma-separated)")
flag.Var(&skipMatch, "skip-match", "Skip URLs matching this regexp (RE2)")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `Usage: %[1]s [options] <file/dir>...
Rewrite absolute Markdown links targeting the specified GitHub repository
and source branch name to point to the target branch instead. Matching
files are updated in-place.
Each path names either a directory to list, or a single file path to
rewrite. By default, only the top level of a directory is scanned; use -recur
to recur into subdirectories.
Options:
`, filepath.Base(os.Args[0]))
flag.PrintDefaults()
}
}
func main() {
flag.Parse()
switch {
case *repoName == "":
log.Fatal("You must specify a non-empty -repo name (org/repo)")
case *targetBranch == "":
log.Fatal("You must specify a non-empty -target branch")
case *sourceBranch == "":
log.Fatal("You must specify a non-empty -source branch")
case *sourceBranch == *targetBranch:
log.Fatalf("Source and target branch are the same (%q)", *sourceBranch)
case flag.NArg() == 0:
log.Fatal("You must specify at least one file/directory to rewrite")
}
r, err := regexp.Compile(fmt.Sprintf(`^https?://github.com/%s/(?:blob|tree)/%s`,
*repoName, *sourceBranch))
if err != nil {
log.Fatalf("Compiling regexp: %v", err)
}
for _, path := range flag.Args() {
if err := processPath(r, path); err != nil {
log.Fatalf("Processing %q failed: %v", path, err)
}
}
}
func processPath(r *regexp.Regexp, path string) error {
fi, err := os.Lstat(path)
if err != nil {
return err
}
if fi.Mode().IsDir() {
return processDir(r, path)
} else if fi.Mode().IsRegular() {
return processFile(r, path)
}
return nil // nothing to do with links, device files, sockets, etc.
}
func processDir(r *regexp.Regexp, root string) error {
return filepath.Walk(root, func(path string, fi fs.FileInfo, err error) error {
if err != nil {
return err
}
if fi.IsDir() {
if skipPath.Contains(path) {
log.Printf("Skipping %q (per -skip-path)", path)
return filepath.SkipDir // explicitly skipped
} else if !*doRecur && path != root {
return filepath.SkipDir // skipped because we aren't recurring
}
return nil // nothing else to do for directories
} else if skipPath.Contains(path) {
log.Printf("Skipping %q (per -skip-path)", path)
return nil // explicitly skipped
} else if filepath.Ext(path) != ".md" {
return nil // nothing to do for non-Markdown files
}
return processFile(r, path)
})
}
func processFile(r *regexp.Regexp, path string) error {
log.Printf("Processing file %q", path)
input, err := os.ReadFile(path)
if err != nil {
return err
}
pos := 0
var output bytes.Buffer
for _, m := range linkRE.FindAllSubmatchIndex(input, -1) {
href := string(input[m[2]:m[3]])
u := r.FindStringIndex(href)
if u == nil || skipMatch.MatchString(href) {
if u != nil {
log.Printf("Skipped URL %q (by -skip-match)", href)
}
output.Write(input[pos:m[1]]) // copy the existing data as-is
pos = m[1]
continue
}
// Copy everything before the URL as-is, then write the replacement.
output.Write(input[pos:m[2]]) // everything up to the URL
fmt.Fprintf(&output, `https://github.com/%s/blob/%s%s`, *repoName, *targetBranch, href[u[1]:])
// Write out the tail of the match, everything after the URL.
output.Write(input[m[3]:m[1]])
pos = m[1]
}
output.Write(input[pos:]) // the rest of the file
_, err = atomicfile.WriteAll(path, &output, 0644)
return err
}
// stringList implements the flag.Value interface for a comma-separated list of strings.
type stringList []string
func (lst *stringList) Set(s string) error {
if s == "" {
*lst = nil
} else {
*lst = strings.Split(s, ",")
}
return nil
}
// Contains reports whether lst contains s.
func (lst stringList) Contains(s string) bool {
for _, elt := range lst {
if s == elt {
return true
}
}
return false
}
func (lst stringList) String() string { return strings.Join([]string(lst), ",") }
// regexpFlag implements the flag.Value interface for a regular expression.
type regexpFlag struct{ *regexp.Regexp }
func (r regexpFlag) MatchString(s string) bool {
if r.Regexp == nil {
return false
}
return r.Regexp.MatchString(s)
}
func (r *regexpFlag) Set(s string) error {
c, err := regexp.Compile(s)
if err != nil {
return err
}
r.Regexp = c
return nil
}
func (r regexpFlag) String() string {
if r.Regexp == nil {
return ""
}
return r.Regexp.String()
}