Browse Source

libs/cmn/writefileatomic: Handle file already exists gracefully (#2113)

This uses the stdlib's method of creating a tempfile in our write
file atomimc method, with a few modifications. We use a 64 bit number
rather than 32 bit, and therefore a corresponding LCG. This is to
reduce collision probability. (Note we currently used 32 bytes previously,
so this is likely a concern)

We handle reseeding the LCG in such a way that multiple threads are
even less likely to reuse the same seed.
pull/2120/head
ValarDragon 6 years ago
committed by Alexander Simmerl
parent
commit
be642754f5
4 changed files with 238 additions and 52 deletions
  1. +1
    -0
      CHANGELOG_PENDING.md
  2. +0
    -42
      libs/common/os_test.go
  3. +99
    -10
      libs/common/tempfile.go
  4. +138
    -0
      libs/common/tempfile_test.go

+ 1
- 0
CHANGELOG_PENDING.md View File

@ -22,5 +22,6 @@ IMPROVEMENTS:
- [common] bit array functions which take in another parameter are now thread safe
BUG FIXES:
- [common] \#2109 Safely handle cases where atomic write files already exist
- [privval] fix a deadline for accepting new connections in socket private
validator.

+ 0
- 42
libs/common/os_test.go View File

@ -1,52 +1,10 @@
package common
import (
"bytes"
"io/ioutil"
"os"
"testing"
)
func TestWriteFileAtomic(t *testing.T) {
var (
data = []byte(RandStr(RandIntn(2048)))
old = RandBytes(RandIntn(2048))
perm os.FileMode = 0600
)
f, err := ioutil.TempFile("/tmp", "write-atomic-test-")
if err != nil {
t.Fatal(err)
}
defer os.Remove(f.Name())
if err = ioutil.WriteFile(f.Name(), old, 0664); err != nil {
t.Fatal(err)
}
if err = WriteFileAtomic(f.Name(), data, perm); err != nil {
t.Fatal(err)
}
rData, err := ioutil.ReadFile(f.Name())
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(data, rData) {
t.Fatalf("data mismatch: %v != %v", data, rData)
}
stat, err := os.Stat(f.Name())
if err != nil {
t.Fatal(err)
}
if have, want := stat.Mode().Perm(), perm; have != want {
t.Errorf("have %v, want %v", have, want)
}
}
func TestGoPath(t *testing.T) {
// restore original gopath upon exit
path := os.Getenv("GOPATH")


+ 99
- 10
libs/common/tempfile.go View File

@ -1,28 +1,117 @@
package common
import (
fmt "fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
)
// WriteFileAtomic creates a temporary file with data and the perm given and
const (
atomicWriteFilePrefix = "write-file-atomic-"
// Maximum number of atomic write file conflicts before we start reseeding
// (reduced from golang's default 10 due to using an increased randomness space)
atomicWriteFileMaxNumConflicts = 5
// Maximum number of attempts to make at writing the write file before giving up
// (reduced from golang's default 10000 due to using an increased randomness space)
atomicWriteFileMaxNumWriteAttempts = 1000
// LCG constants from Donald Knuth MMIX
// This LCG's has a period equal to 2**64
lcgA = 6364136223846793005
lcgC = 1442695040888963407
// Create in case it doesn't exist and force kernel
// flush, which still leaves the potential of lingering disk cache.
// Never overwrites files
atomicWriteFileFlag = os.O_WRONLY | os.O_CREATE | os.O_SYNC | os.O_TRUNC | os.O_EXCL
)
var (
atomicWriteFileRand uint64
atomicWriteFileRandMu sync.Mutex
)
func writeFileRandReseed() uint64 {
// Scale the PID, to minimize the chance that two processes seeded at similar times
// don't get the same seed. Note that PID typically ranges in [0, 2**15), but can be
// up to 2**22 under certain configurations. We left bit-shift the PID by 20, so that
// a PID difference of one corresponds to a time difference of 2048 seconds.
// The important thing here is that now for a seed conflict, they would both have to be on
// the correct nanosecond offset, and second-based offset, which is much less likely than
// just a conflict with the correct nanosecond offset.
return uint64(time.Now().UnixNano() + int64(os.Getpid()<<20))
}
// Use a fast thread safe LCG for atomic write file names.
// Returns a string corresponding to a 64 bit int.
// If it was a negative int, the leading number is a 0.
func randWriteFileSuffix() string {
atomicWriteFileRandMu.Lock()
r := atomicWriteFileRand
if r == 0 {
r = writeFileRandReseed()
}
// Update randomness according to lcg
r = r*lcgA + lcgC
atomicWriteFileRand = r
atomicWriteFileRandMu.Unlock()
// Can have a negative name, replace this in the following
suffix := strconv.Itoa(int(r))
if string(suffix[0]) == "-" {
// Replace first "-" with "0". This is purely for UI clarity,
// as otherwhise there would be two `-` in a row.
suffix = strings.Replace(suffix, "-", "0", 1)
}
return suffix
}
// WriteFileAtomic creates a temporary file with data and provided perm and
// swaps it atomically with filename if successful.
func WriteFileAtomic(filename string, data []byte, perm os.FileMode) error {
func WriteFileAtomic(filename string, data []byte, perm os.FileMode) (err error) {
// This implementation is inspired by the golang stdlibs method of creating
// tempfiles. Notable differences are that we use different flags, a 64 bit LCG
// and handle negatives differently.
// The core reason we can't use golang's TempFile is that we must write
// to the file synchronously, as we need this to persist to disk.
// We also open it in write-only mode, to avoid concerns that arise with read.
var (
dir = filepath.Dir(filename)
// Create in case it doesn't exist and force kernel
// flush, which still leaves the potential of lingering disk cache.
// Never overwrites files
flag = os.O_WRONLY | os.O_CREATE | os.O_SYNC | os.O_TRUNC | os.O_EXCL
f *os.File
)
tempFile := filepath.Join(dir, "write-file-atomic-"+RandStr(32))
f, err := os.OpenFile(tempFile, flag, perm)
if err != nil {
return err
nconflict := 0
// Limit the number of attempts to create a file. Something is seriously
// wrong if it didn't get created after 1000 attempts, and we don't want
// an infinite loop
i := 0
for ; i < atomicWriteFileMaxNumWriteAttempts; i++ {
name := filepath.Join(dir, atomicWriteFilePrefix+randWriteFileSuffix())
f, err = os.OpenFile(name, atomicWriteFileFlag, perm)
// If the file already exists, try a new file
if os.IsExist(err) {
// If the files exists too many times, start reseeding as we've
// likely hit another instances seed.
if nconflict++; nconflict > atomicWriteFileMaxNumConflicts {
atomicWriteFileRandMu.Lock()
atomicWriteFileRand = writeFileRandReseed()
atomicWriteFileRandMu.Unlock()
}
continue
} else if err != nil {
return err
}
break
}
if i == atomicWriteFileMaxNumWriteAttempts {
return fmt.Errorf("Could not create atomic write file after %d attempts", i)
}
// Clean up in any case. Defer stacking order is last-in-first-out.
defer os.Remove(f.Name())
defer f.Close()


+ 138
- 0
libs/common/tempfile_test.go View File

@ -0,0 +1,138 @@
package common
// Need access to internal variables, so can't use _test package
import (
"bytes"
fmt "fmt"
"io/ioutil"
"os"
testing "testing"
"github.com/stretchr/testify/require"
)
func TestWriteFileAtomic(t *testing.T) {
var (
data = []byte(RandStr(RandIntn(2048)))
old = RandBytes(RandIntn(2048))
perm os.FileMode = 0600
)
f, err := ioutil.TempFile("/tmp", "write-atomic-test-")
if err != nil {
t.Fatal(err)
}
defer os.Remove(f.Name())
if err = ioutil.WriteFile(f.Name(), old, 0664); err != nil {
t.Fatal(err)
}
if err = WriteFileAtomic(f.Name(), data, perm); err != nil {
t.Fatal(err)
}
rData, err := ioutil.ReadFile(f.Name())
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(data, rData) {
t.Fatalf("data mismatch: %v != %v", data, rData)
}
stat, err := os.Stat(f.Name())
if err != nil {
t.Fatal(err)
}
if have, want := stat.Mode().Perm(), perm; have != want {
t.Errorf("have %v, want %v", have, want)
}
}
// This tests atomic write file when there is a single duplicate file.
// Expected behavior is for a new file to be created, and the original write file to be unaltered.
func TestWriteFileAtomicDuplicateFile(t *testing.T) {
var (
defaultSeed uint64 = 1
testString = "This is a glorious test string"
expectedString = "Did the test file's string appear here?"
fileToWrite = "/tmp/TestWriteFileAtomicDuplicateFile-test.txt"
)
// Create a file at the seed, and reset the seed.
atomicWriteFileRand = defaultSeed
firstFileRand := randWriteFileSuffix()
atomicWriteFileRand = defaultSeed
fname := "/tmp/" + atomicWriteFilePrefix + firstFileRand
f, err := os.OpenFile(fname, atomicWriteFileFlag, 0777)
defer os.Remove(fname)
// Defer here, in case there is a panic in WriteFileAtomic.
defer os.Remove(fileToWrite)
require.Nil(t, err)
f.WriteString(testString)
WriteFileAtomic(fileToWrite, []byte(expectedString), 0777)
// Check that the first atomic file was untouched
firstAtomicFileBytes, err := ioutil.ReadFile(fname)
require.Nil(t, err, "Error reading first atomic file")
require.Equal(t, []byte(testString), firstAtomicFileBytes, "First atomic file was overwritten")
// Check that the resultant file is correct
resultantFileBytes, err := ioutil.ReadFile(fileToWrite)
require.Nil(t, err, "Error reading resultant file")
require.Equal(t, []byte(expectedString), resultantFileBytes, "Written file had incorrect bytes")
// Check that the intermediate write file was deleted
// Get the second write files' randomness
atomicWriteFileRand = defaultSeed
_ = randWriteFileSuffix()
secondFileRand := randWriteFileSuffix()
_, err = os.Stat("/tmp/" + atomicWriteFilePrefix + secondFileRand)
require.True(t, os.IsNotExist(err), "Intermittent atomic write file not deleted")
}
// This tests atomic write file when there are many duplicate files.
// Expected behavior is for a new file to be created under a completely new seed,
// and the original write files to be unaltered.
func TestWriteFileAtomicManyDuplicates(t *testing.T) {
var (
defaultSeed uint64 = 2
testString = "This is a glorious test string, from file %d"
expectedString = "Did any of the test file's string appear here?"
fileToWrite = "/tmp/TestWriteFileAtomicDuplicateFile-test.txt"
)
// Initialize all of the atomic write files
atomicWriteFileRand = defaultSeed
for i := 0; i < atomicWriteFileMaxNumConflicts+2; i++ {
fileRand := randWriteFileSuffix()
fname := "/tmp/" + atomicWriteFilePrefix + fileRand
f, err := os.OpenFile(fname, atomicWriteFileFlag, 0777)
require.Nil(t, err)
f.WriteString(fmt.Sprintf(testString, i))
defer os.Remove(fname)
}
atomicWriteFileRand = defaultSeed
// Defer here, in case there is a panic in WriteFileAtomic.
defer os.Remove(fileToWrite)
WriteFileAtomic(fileToWrite, []byte(expectedString), 0777)
// Check that all intermittent atomic file were untouched
atomicWriteFileRand = defaultSeed
for i := 0; i < atomicWriteFileMaxNumConflicts+2; i++ {
fileRand := randWriteFileSuffix()
fname := "/tmp/" + atomicWriteFilePrefix + fileRand
firstAtomicFileBytes, err := ioutil.ReadFile(fname)
require.Nil(t, err, "Error reading first atomic file")
require.Equal(t, []byte(fmt.Sprintf(testString, i)), firstAtomicFileBytes,
"atomic write file %d was overwritten", i)
}
// Check that the resultant file is correct
resultantFileBytes, err := ioutil.ReadFile(fileToWrite)
require.Nil(t, err, "Error reading resultant file")
require.Equal(t, []byte(expectedString), resultantFileBytes, "Written file had incorrect bytes")
}

Loading…
Cancel
Save