You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

782 lines
18 KiB

package autofile
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
cmn "github.com/tendermint/tendermint/libs/common"
)
const (
defaultGroupCheckDuration = 5000 * time.Millisecond
defaultHeadSizeLimit = 10 * 1024 * 1024 // 10MB
defaultTotalSizeLimit = 1 * 1024 * 1024 * 1024 // 1GB
maxFilesToRemove = 4 // needs to be greater than 1
)
/*
You can open a Group to keep restrictions on an AutoFile, like
the maximum size of each chunk, and/or the total amount of bytes
stored in the group.
The first file to be written in the Group.Dir is the head file.
Dir/
- <HeadPath>
Once the Head file reaches the size limit, it will be rotated.
Dir/
- <HeadPath>.000 // First rolled file
- <HeadPath> // New head path, starts empty.
// The implicit index is 001.
As more files are written, the index numbers grow...
Dir/
- <HeadPath>.000 // First rolled file
- <HeadPath>.001 // Second rolled file
- ...
- <HeadPath> // New head path
The Group can also be used to binary-search for some line,
assuming that marker lines are written occasionally.
*/
type Group struct {
cmn.BaseService
ID string
Head *AutoFile // The head AutoFile to write to
headBuf *bufio.Writer
Dir string // Directory that contains .Head
ticker *time.Ticker
mtx sync.Mutex
headSizeLimit int64
totalSizeLimit int64
groupCheckDuration time.Duration
minIndex int // Includes head
maxIndex int // Includes head, where Head will move to
// close this when the processTicks routine is done.
// this ensures we can cleanup the dir after calling Stop
// and the routine won't be trying to access it anymore
doneProcessTicks chan struct{}
// TODO: When we start deleting files, we need to start tracking GroupReaders
// and their dependencies.
}
// OpenGroup creates a new Group with head at headPath. It returns an error if
// it fails to open head file.
func OpenGroup(headPath string, groupOptions ...func(*Group)) (g *Group, err error) {
dir := path.Dir(headPath)
head, err := OpenAutoFile(headPath)
if err != nil {
return nil, err
}
g = &Group{
ID: "group:" + head.ID,
Head: head,
headBuf: bufio.NewWriterSize(head, 4096*10),
Dir: dir,
headSizeLimit: defaultHeadSizeLimit,
totalSizeLimit: defaultTotalSizeLimit,
groupCheckDuration: defaultGroupCheckDuration,
minIndex: 0,
maxIndex: 0,
doneProcessTicks: make(chan struct{}),
}
for _, option := range groupOptions {
option(g)
}
g.BaseService = *cmn.NewBaseService(nil, "Group", g)
gInfo := g.readGroupInfo()
g.minIndex = gInfo.MinIndex
g.maxIndex = gInfo.MaxIndex
return
}
// GroupCheckDuration allows you to overwrite default groupCheckDuration.
func GroupCheckDuration(duration time.Duration) func(*Group) {
return func(g *Group) {
g.groupCheckDuration = duration
}
}
// GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB.
func GroupHeadSizeLimit(limit int64) func(*Group) {
return func(g *Group) {
g.headSizeLimit = limit
}
}
// GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB.
func GroupTotalSizeLimit(limit int64) func(*Group) {
return func(g *Group) {
g.totalSizeLimit = limit
}
}
// OnStart implements Service by starting the goroutine that checks file and
// group limits.
func (g *Group) OnStart() error {
g.ticker = time.NewTicker(g.groupCheckDuration)
go g.processTicks()
return nil
}
// OnStop implements Service by stopping the goroutine described above.
// NOTE: g.Head must be closed separately using Close.
func (g *Group) OnStop() {
g.ticker.Stop()
g.Flush() // flush any uncommitted data
}
func (g *Group) Wait() {
// wait for processTicks routine to finish
<-g.doneProcessTicks
}
// Close closes the head file. The group must be stopped by this moment.
func (g *Group) Close() {
g.Flush() // flush any uncommitted data
g.mtx.Lock()
_ = g.Head.closeFile()
g.mtx.Unlock()
}
// HeadSizeLimit returns the current head size limit.
func (g *Group) HeadSizeLimit() int64 {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.headSizeLimit
}
// TotalSizeLimit returns total size limit of the group.
func (g *Group) TotalSizeLimit() int64 {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.totalSizeLimit
}
// MaxIndex returns index of the last file in the group.
func (g *Group) MaxIndex() int {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.maxIndex
}
// MinIndex returns index of the first file in the group.
func (g *Group) MinIndex() int {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.minIndex
}
// Write writes the contents of p into the current head of the group. It
// returns the number of bytes written. If nn < len(p), it also returns an
// error explaining why the write is short.
// NOTE: Writes are buffered so they don't write synchronously
// TODO: Make it halt if space is unavailable
func (g *Group) Write(p []byte) (nn int, err error) {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.headBuf.Write(p)
}
// WriteLine writes line into the current head of the group. It also appends "\n".
// NOTE: Writes are buffered so they don't write synchronously
// TODO: Make it halt if space is unavailable
func (g *Group) WriteLine(line string) error {
g.mtx.Lock()
defer g.mtx.Unlock()
_, err := g.headBuf.Write([]byte(line + "\n"))
return err
}
// Buffered returns the size of the currently buffered data.
func (g *Group) Buffered() int {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.headBuf.Buffered()
}
// Flush writes any buffered data to the underlying file and commits the
// current content of the file to stable storage.
func (g *Group) Flush() error {
g.mtx.Lock()
defer g.mtx.Unlock()
err := g.headBuf.Flush()
if err == nil {
err = g.Head.Sync()
}
return err
}
func (g *Group) processTicks() {
defer close(g.doneProcessTicks)
for {
select {
case <-g.ticker.C:
g.checkHeadSizeLimit()
g.checkTotalSizeLimit()
case <-g.Quit():
return
}
}
}
// NOTE: this function is called manually in tests.
func (g *Group) checkHeadSizeLimit() {
limit := g.HeadSizeLimit()
if limit == 0 {
return
}
size, err := g.Head.Size()
if err != nil {
g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err)
return
}
if size >= limit {
g.RotateFile()
}
}
func (g *Group) checkTotalSizeLimit() {
limit := g.TotalSizeLimit()
if limit == 0 {
return
}
gInfo := g.readGroupInfo()
totalSize := gInfo.TotalSize
for i := 0; i < maxFilesToRemove; i++ {
index := gInfo.MinIndex + i
if totalSize < limit {
return
}
if index == gInfo.MaxIndex {
// Special degenerate case, just do nothing.
g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path)
return
}
pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex)
fInfo, err := os.Stat(pathToRemove)
if err != nil {
g.Logger.Error("Failed to fetch info for file", "file", pathToRemove)
continue
}
err = os.Remove(pathToRemove)
if err != nil {
g.Logger.Error("Failed to remove path", "path", pathToRemove)
return
}
totalSize -= fInfo.Size()
}
}
// RotateFile causes group to close the current head and assign it some index.
// Note it does not create a new head.
func (g *Group) RotateFile() {
g.mtx.Lock()
defer g.mtx.Unlock()
headPath := g.Head.Path
if err := g.headBuf.Flush(); err != nil {
panic(err)
}
if err := g.Head.Sync(); err != nil {
panic(err)
}
if err := g.Head.closeFile(); err != nil {
panic(err)
}
indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1)
if err := os.Rename(headPath, indexPath); err != nil {
panic(err)
}
g.maxIndex++
}
// NewReader returns a new group reader.
// CONTRACT: Caller must close the returned GroupReader.
func (g *Group) NewReader(index int) (*GroupReader, error) {
r := newGroupReader(g)
err := r.SetIndex(index)
if err != nil {
return nil, err
}
return r, nil
}
// Returns -1 if line comes after, 0 if found, 1 if line comes before.
type SearchFunc func(line string) (int, error)
// Searches for the right file in Group, then returns a GroupReader to start
// streaming lines.
// Returns true if an exact match was found, otherwise returns the next greater
// line that starts with prefix.
// CONTRACT: Caller must close the returned GroupReader
func (g *Group) Search(prefix string, cmp SearchFunc) (*GroupReader, bool, error) {
g.mtx.Lock()
minIndex, maxIndex := g.minIndex, g.maxIndex
g.mtx.Unlock()
// Now minIndex/maxIndex may change meanwhile,
// but it shouldn't be a big deal
// (maybe we'll want to limit scanUntil though)
for {
curIndex := (minIndex + maxIndex + 1) / 2
// Base case, when there's only 1 choice left.
if minIndex == maxIndex {
r, err := g.NewReader(maxIndex)
if err != nil {
return nil, false, err
}
match, err := scanUntil(r, prefix, cmp)
if err != nil {
r.Close()
return nil, false, err
}
return r, match, err
}
// Read starting roughly at the middle file,
// until we find line that has prefix.
r, err := g.NewReader(curIndex)
if err != nil {
return nil, false, err
}
foundIndex, line, err := scanNext(r, prefix)
r.Close()
if err != nil {
return nil, false, err
}
// Compare this line to our search query.
val, err := cmp(line)
if err != nil {
return nil, false, err
}
if val < 0 {
// Line will come later
minIndex = foundIndex
} else if val == 0 {
// Stroke of luck, found the line
r, err := g.NewReader(foundIndex)
if err != nil {
return nil, false, err
}
match, err := scanUntil(r, prefix, cmp)
if !match {
panic("Expected match to be true")
}
if err != nil {
r.Close()
return nil, false, err
}
return r, true, err
} else {
// We passed it
maxIndex = curIndex - 1
}
}
}
// Scans and returns the first line that starts with 'prefix'
// Consumes line and returns it.
func scanNext(r *GroupReader, prefix string) (int, string, error) {
for {
line, err := r.ReadLine()
if err != nil {
return 0, "", err
}
if !strings.HasPrefix(line, prefix) {
continue
}
index := r.CurIndex()
return index, line, nil
}
}
// Returns true iff an exact match was found.
// Pushes line, does not consume it.
func scanUntil(r *GroupReader, prefix string, cmp SearchFunc) (bool, error) {
for {
line, err := r.ReadLine()
if err != nil {
return false, err
}
if !strings.HasPrefix(line, prefix) {
continue
}
val, err := cmp(line)
if err != nil {
return false, err
}
if val < 0 {
continue
} else if val == 0 {
r.PushLine(line)
return true, nil
} else {
r.PushLine(line)
return false, nil
}
}
}
// Searches backwards for the last line in Group with prefix.
// Scans each file forward until the end to find the last match.
func (g *Group) FindLast(prefix string) (match string, found bool, err error) {
g.mtx.Lock()
minIndex, maxIndex := g.minIndex, g.maxIndex
g.mtx.Unlock()
r, err := g.NewReader(maxIndex)
if err != nil {
return "", false, err
}
defer r.Close()
// Open files from the back and read
GROUP_LOOP:
for i := maxIndex; i >= minIndex; i-- {
err := r.SetIndex(i)
if err != nil {
return "", false, err
}
// Scan each line and test whether line matches
for {
line, err := r.ReadLine()
if err == io.EOF {
if found {
return match, found, nil
}
continue GROUP_LOOP
} else if err != nil {
return "", false, err
}
if strings.HasPrefix(line, prefix) {
match = line
found = true
}
if r.CurIndex() > i {
if found {
return match, found, nil
}
continue GROUP_LOOP
}
}
}
return
}
// GroupInfo holds information about the group.
type GroupInfo struct {
MinIndex int // index of the first file in the group, including head
MaxIndex int // index of the last file in the group, including head
TotalSize int64 // total size of the group
HeadSize int64 // size of the head
}
// Returns info after scanning all files in g.Head's dir.
func (g *Group) ReadGroupInfo() GroupInfo {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.readGroupInfo()
}
// Index includes the head.
// CONTRACT: caller should have called g.mtx.Lock
func (g *Group) readGroupInfo() GroupInfo {
groupDir := filepath.Dir(g.Head.Path)
headBase := filepath.Base(g.Head.Path)
var minIndex, maxIndex int = -1, -1
var totalSize, headSize int64 = 0, 0
dir, err := os.Open(groupDir)
if err != nil {
panic(err)
}
defer dir.Close()
fiz, err := dir.Readdir(0)
if err != nil {
panic(err)
}
// For each file in the directory, filter by pattern
for _, fileInfo := range fiz {
if fileInfo.Name() == headBase {
fileSize := fileInfo.Size()
totalSize += fileSize
headSize = fileSize
continue
} else if strings.HasPrefix(fileInfo.Name(), headBase) {
fileSize := fileInfo.Size()
totalSize += fileSize
indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`)
submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name()))
if len(submatch) != 0 {
// Matches
fileIndex, err := strconv.Atoi(string(submatch[1]))
if err != nil {
panic(err)
}
if maxIndex < fileIndex {
maxIndex = fileIndex
}
if minIndex == -1 || fileIndex < minIndex {
minIndex = fileIndex
}
}
}
}
// Now account for the head.
if minIndex == -1 {
// If there were no numbered files,
// then the head is index 0.
minIndex, maxIndex = 0, 0
} else {
// Otherwise, the head file is 1 greater
maxIndex++
}
return GroupInfo{minIndex, maxIndex, totalSize, headSize}
}
func filePathForIndex(headPath string, index int, maxIndex int) string {
if index == maxIndex {
return headPath
}
return fmt.Sprintf("%v.%03d", headPath, index)
}
//--------------------------------------------------------------------------------
// GroupReader provides an interface for reading from a Group.
type GroupReader struct {
*Group
mtx sync.Mutex
curIndex int
curFile *os.File
curReader *bufio.Reader
curLine []byte
}
func newGroupReader(g *Group) *GroupReader {
return &GroupReader{
Group: g,
curIndex: 0,
curFile: nil,
curReader: nil,
curLine: nil,
}
}
// Close closes the GroupReader by closing the cursor file.
func (gr *GroupReader) Close() error {
gr.mtx.Lock()
defer gr.mtx.Unlock()
if gr.curReader != nil {
err := gr.curFile.Close()
gr.curIndex = 0
gr.curReader = nil
gr.curFile = nil
gr.curLine = nil
return err
}
return nil
}
// Read implements io.Reader, reading bytes from the current Reader
// incrementing index until enough bytes are read.
func (gr *GroupReader) Read(p []byte) (n int, err error) {
lenP := len(p)
if lenP == 0 {
return 0, errors.New("given empty slice")
}
gr.mtx.Lock()
defer gr.mtx.Unlock()
// Open file if not open yet
if gr.curReader == nil {
if err = gr.openFile(gr.curIndex); err != nil {
return 0, err
}
}
// Iterate over files until enough bytes are read
var nn int
for {
nn, err = gr.curReader.Read(p[n:])
n += nn
if err == io.EOF {
if n >= lenP {
return n, nil
}
// Open the next file
if err1 := gr.openFile(gr.curIndex + 1); err1 != nil {
return n, err1
}
} else if err != nil {
return n, err
} else if nn == 0 { // empty file
return n, err
}
}
}
// ReadLine reads a line (without delimiter).
// just return io.EOF if no new lines found.
func (gr *GroupReader) ReadLine() (string, error) {
gr.mtx.Lock()
defer gr.mtx.Unlock()
// From PushLine
if gr.curLine != nil {
line := string(gr.curLine)
gr.curLine = nil
return line, nil
}
// Open file if not open yet
if gr.curReader == nil {
err := gr.openFile(gr.curIndex)
if err != nil {
return "", err
}
}
// Iterate over files until line is found
var linePrefix string
for {
bytesRead, err := gr.curReader.ReadBytes('\n')
if err == io.EOF {
// Open the next file
if err1 := gr.openFile(gr.curIndex + 1); err1 != nil {
return "", err1
}
if len(bytesRead) > 0 && bytesRead[len(bytesRead)-1] == byte('\n') {
return linePrefix + string(bytesRead[:len(bytesRead)-1]), nil
}
linePrefix += string(bytesRead)
continue
} else if err != nil {
return "", err
}
return linePrefix + string(bytesRead[:len(bytesRead)-1]), nil
}
}
// IF index > gr.Group.maxIndex, returns io.EOF
// CONTRACT: caller should hold gr.mtx
func (gr *GroupReader) openFile(index int) error {
// Lock on Group to ensure that head doesn't move in the meanwhile.
gr.Group.mtx.Lock()
defer gr.Group.mtx.Unlock()
if index > gr.Group.maxIndex {
return io.EOF
}
curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex)
curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms)
if err != nil {
return err
}
curReader := bufio.NewReader(curFile)
// Update gr.cur*
if gr.curFile != nil {
gr.curFile.Close() // TODO return error?
}
gr.curIndex = index
gr.curFile = curFile
gr.curReader = curReader
gr.curLine = nil
return nil
}
// PushLine makes the given line the current one, so the next time somebody
// calls ReadLine, this line will be returned.
// panics if called twice without calling ReadLine.
func (gr *GroupReader) PushLine(line string) {
gr.mtx.Lock()
defer gr.mtx.Unlock()
if gr.curLine == nil {
gr.curLine = []byte(line)
} else {
panic("PushLine failed, already have line")
}
}
// CurIndex returns cursor's file index.
func (gr *GroupReader) CurIndex() int {
gr.mtx.Lock()
defer gr.mtx.Unlock()
return gr.curIndex
}
// SetIndex sets the cursor's file index to index by opening a file at this
// position.
func (gr *GroupReader) SetIndex(index int) error {
gr.mtx.Lock()
defer gr.mtx.Unlock()
return gr.openFile(index)
}
//--------------------------------------------------------------------------------
// A simple SearchFunc that assumes that the marker is of form
// <prefix><number>.
// For example, if prefix is '#HEIGHT:', the markers of expected to be of the form:
//
// #HEIGHT:1
// ...
// #HEIGHT:2
// ...
func MakeSimpleSearchFunc(prefix string, target int) SearchFunc {
return func(line string) (int, error) {
if !strings.HasPrefix(line, prefix) {
return -1, fmt.Errorf("Marker line did not have prefix: %v", prefix)
}
i, err := strconv.Atoi(line[len(prefix):])
if err != nil {
return -1, fmt.Errorf("Failed to parse marker line: %v", err.Error())
}
if target < i {
return 1, nil
} else if target == i {
return 0, nil
} else {
return -1, nil
}
}
}