From f260ee1c9f71eee4e0b22a37e3eed3d084b0a01e Mon Sep 17 00:00:00 2001 From: Mason Payne Date: Tue, 18 Jul 2023 18:21:12 -0600 Subject: [PATCH] add zip and parity stuff --- db/db.go | 42 ++++++++++++++++++ fileUtilities/hash.go | 23 ++++++++++ fileUtilities/parity.go | 55 +++++++++++++++++++++++ fileUtilities/parity_test.go | 70 +++++++++++++++++++++++++++++ fileUtilities/zip.go | 86 ++++++++++++++++++++++++++++++++++++ fileUtilities/zip_test.go | 36 +++++++++++++++ go.mod | 1 + go.sum | 2 + main.go | 34 +++++++++++++- partitioner/partitioner.go | 53 ++++++++++++++++++++++ source/source.go | 22 ++------- 11 files changed, 403 insertions(+), 21 deletions(-) create mode 100644 fileUtilities/hash.go create mode 100644 fileUtilities/parity.go create mode 100644 fileUtilities/parity_test.go create mode 100644 fileUtilities/zip.go create mode 100644 fileUtilities/zip_test.go create mode 100644 partitioner/partitioner.go diff --git a/db/db.go b/db/db.go index a476074..4d38882 100644 --- a/db/db.go +++ b/db/db.go @@ -17,6 +17,9 @@ type DB interface { Migrate() error StoreFile(fileMetadata types.FileMetadata) error RemoveFile(fileMetadata types.FileMetadata) error + GetTotalSize() (int64, error) + GetFileCount() (int64, error) + GetFiles() ([]types.FileMetadata, error) } type store struct { @@ -90,6 +93,45 @@ func (d *store) RemoveFile(fileMetadata types.FileMetadata) error { return nil } +func (d *store) GetTotalSize() (int64, error) { + var size int64 + query := `SELECT SUM(size) FROM files` + err := d.db.QueryRow(query).Scan(&size) + if err != nil { + return 0, fmt.Errorf("error getting size | %w", err) + } + return size, nil +} + +func (d *store) GetFileCount() (int64, error) { + var count int64 + query := `SELECT COUNT(*) FROM files` + err := d.db.QueryRow(query).Scan(&count) + if err != nil { + return 0, fmt.Errorf("error getting count | %w", err) + } + return count, nil +} + +func (d *store) GetFiles() ([]types.FileMetadata, error) { + var files []types.FileMetadata + query := `SELECT name, path, size, hash, modifiedDate, backedUp FROM files order by path, name` + rows, err := d.db.Query(query) + if err != nil { + return nil, fmt.Errorf("error getting files | %w", err) + } + defer rows.Close() + for rows.Next() { + var file types.FileMetadata + err := rows.Scan(&file.Name, &file.Path, &file.Size, &file.Hash, &file.ModifiedDate, &file.BackedUp) + if err != nil { + return nil, fmt.Errorf("error scanning file | %w", err) + } + files = append(files, file) + } + return files, nil +} + func (d *store) Close() error { return d.db.Close() } diff --git a/fileUtilities/hash.go b/fileUtilities/hash.go new file mode 100644 index 0000000..394c725 --- /dev/null +++ b/fileUtilities/hash.go @@ -0,0 +1,23 @@ +package fileUtilities + +import ( + "crypto/sha256" + "fmt" + "io" + "os" +) + +func HashFile(filePath string) ([]byte, error) { + file, err := os.Open(filePath) + if err != nil { + return []byte{}, fmt.Errorf("error opening file for hashing: %w", err) + } + defer file.Close() + + h := sha256.New() + if _, err := io.Copy(h, file); err != nil { + return []byte{}, fmt.Errorf("error hashing file: %w", err) + } + + return h.Sum(nil), nil +} diff --git a/fileUtilities/parity.go b/fileUtilities/parity.go new file mode 100644 index 0000000..afeb75f --- /dev/null +++ b/fileUtilities/parity.go @@ -0,0 +1,55 @@ +package fileUtilities + +import ( + "fmt" + "io" +) + +func parityStream(in1, in2 io.Reader, out io.Writer) error { + var err error + byteSize := 1024 + done1 := false + done2 := false + for !done1 && !done2 { + // get bytes from in1 and in2 and write the parity to buf + // if either in1 or in2 is done, write the remaining bytes from the other to buf + in1Bytes := make([]byte, byteSize) + in2Bytes := make([]byte, byteSize) + read1 := 0 + read2 := 0 + if !done1 { + read1, err = in1.Read(in1Bytes) + if err != nil { + if err == io.EOF { + done1 = true + } else { + return err + } + } + } + if !done2 { + read2, err = in2.Read(in2Bytes) + if err != nil { + if err == io.EOF { + done2 = true + } else { + return err + } + } + } + maxRead := read1 + if read2 > maxRead { + maxRead = read2 + } + + parityBytes := make([]byte, maxRead) + for i := 0; i < maxRead; i++ { + parityBytes[i] = in1Bytes[i] ^ in2Bytes[i] + } + _, err := out.Write(parityBytes) + if err != nil { + return fmt.Errorf("error writing to buffer: %w", err) + } + } + return nil +} diff --git a/fileUtilities/parity_test.go b/fileUtilities/parity_test.go new file mode 100644 index 0000000..8e8cd45 --- /dev/null +++ b/fileUtilities/parity_test.go @@ -0,0 +1,70 @@ +package fileUtilities + +import ( + "bytes" + "io" + "testing" +) + +func Test_createParityFile(t *testing.T) { + type args struct { + in1 io.Reader + in2 io.Reader + } + tests := []struct { + name string + args args + wantOut string + wantErr bool + }{ + { + name: "create parity file", + args: args{ + in1: bytes.NewBuffer([]byte{0, 1, 2, 3, 4, 5, 6, 7}), + in2: bytes.NewBuffer([]byte{7, 6, 5, 4, 3, 2, 1, 0}), + }, + wantOut: string([]byte{7, 7, 7, 7, 7, 7, 7, 7}), + wantErr: false, + }, + { + name: "in1 is longer than in2", + args: args{ + in1: bytes.NewBuffer([]byte{0, 1, 2, 3, 4, 5, 6, 7, 8}), + in2: bytes.NewBuffer([]byte{7, 6, 5, 4, 3, 2, 1, 0}), + }, + wantOut: string([]byte{7, 7, 7, 7, 7, 7, 7, 7, 8}), + wantErr: false, + }, + { + name: "in2 is longer than in1", + args: args{ + in1: bytes.NewBuffer([]byte{0, 1, 2, 3, 4, 5, 6, 7}), + in2: bytes.NewBuffer([]byte{7, 6, 5, 4, 3, 2, 1, 0, 54}), + }, + wantOut: string([]byte{7, 7, 7, 7, 7, 7, 7, 7, 54}), + wantErr: false, + }, + { + name: "parity recreates original file", + args: args{ + in1: bytes.NewBuffer([]byte{0, 1, 2, 3, 4, 5, 6, 7}), + in2: bytes.NewBuffer([]byte{7, 7, 7, 7, 7, 7, 7, 7, 54}), + }, + wantOut: string([]byte{7, 6, 5, 4, 3, 2, 1, 0, 54}), + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out := &bytes.Buffer{} + err := parityStream(tt.args.in1, tt.args.in2, out) + if (err != nil) != tt.wantErr { + t.Errorf("parityStream() error = %v, wantErr %v", err, tt.wantErr) + return + } + if gotOut := out.String(); gotOut != tt.wantOut { + t.Errorf("parityStream() gotOut = %v, want %v", gotOut, tt.wantOut) + } + }) + } +} diff --git a/fileUtilities/zip.go b/fileUtilities/zip.go new file mode 100644 index 0000000..2797afa --- /dev/null +++ b/fileUtilities/zip.go @@ -0,0 +1,86 @@ +package fileUtilities + +import ( + "archive/zip" + "fmt" + "forever-files/types" + "io" + "os" + "path" + "strings" +) + +func CreateZip(fileName, outDir, baseDir string, partition []types.FileMetadata) error { + // Create a buffer to write our archive to. + outFile := path.Join(outDir, fileName+".zip") + zipFile, err := os.OpenFile(outFile, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("error opening/creating zip file: %w", err) + } + + // Create a new zip archive. + w := zip.NewWriter(zipFile) + + files := prepFiles(baseDir, partition) + + for _, file := range files { + zf, err := w.Create(file.Name) + if err != nil { + return fmt.Errorf("error creating zip file: %w", err) + } + f, err := os.Open(file.Path) + if err != nil { + fmt.Println(fmt.Sprintf("error opening file: %v", err)) + fmt.Println(fmt.Sprintf("skipping file: %v", file.Path)) + continue + } + if _, err := io.Copy(zf, f); err != nil { + return fmt.Errorf("error copying file to zip file: %w", err) + } + err = f.Close() + if err != nil { + return fmt.Errorf("error closing file: %w", err) + } + } + + // Make sure to check the error on Close. + err = w.Close() + if err != nil { + return fmt.Errorf("error closing zip file: %w", err) + } + + err = zipFile.Close() + if err != nil { + return fmt.Errorf("error closing zip file: %w", err) + } + + return nil +} + +func prepFiles(baseDir string, partition []types.FileMetadata) []struct { + Name, Path string +} { + var files []struct { + Name, Path string + } + for _, file := range partition { + filePath := path.Join(file.Path, file.Name) + + // from zip.Create documentation: + // ...The name must be a relative path: it must not start with a + // drive letter (e.g. C:) or leading slash, and only forward slashes + // are allowed... + fileName := strings.Replace(replaceBackslashes(strings.Replace(filePath, baseDir, "", 1)), "/", "", 1) + files = append(files, struct { + Name, Path string + }{ + Name: fileName, + Path: filePath, + }) + } + return files +} + +func replaceBackslashes(input string) string { + return strings.ReplaceAll(input, "\\", "/") +} diff --git a/fileUtilities/zip_test.go b/fileUtilities/zip_test.go new file mode 100644 index 0000000..e68b77f --- /dev/null +++ b/fileUtilities/zip_test.go @@ -0,0 +1,36 @@ +package fileUtilities + +import "testing" + +func Test_replaceBackslashes(t *testing.T) { + type args struct { + input string + } + tests := []struct { + name string + args args + want string + }{ + { + name: "replace backslashes", + args: args{ + input: "C:\\Users\\james\\Documents\\test", + }, + want: "C:/Users/james/Documents/test", + }, + { + name: "no backslashes", + args: args{ + input: "C:/Users/james/Documents/test", + }, + want: "C:/Users/james/Documents/test", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := replaceBackslashes(tt.args.input); got != tt.want { + t.Errorf("replaceBackslashes() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/go.mod b/go.mod index 1de410b..d959163 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( require ( github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect diff --git a/go.sum b/go.sum index d3c184c..24c1c91 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= diff --git a/main.go b/main.go index ff83c9a..314bc6b 100644 --- a/main.go +++ b/main.go @@ -3,23 +3,53 @@ package main import ( "fmt" "forever-files/db" - "forever-files/source" + "forever-files/fileUtilities" + "forever-files/partitioner" "forever-files/types" ) func main() { fmt.Printf("%v\n", types.AppName) + baseDir := "C:\\Users\\gomas\\Nextcloud" store, err := db.NewDB(types.AppName) if err != nil { panic(fmt.Errorf("error creating db: %w", err)) } + defer store.Close() err = store.Migrate() if err != nil { panic(fmt.Errorf("error migrating db: %w", err)) } - source.GatherInfo("C:\\Users\\gomas\\Nextcloud", store) + //source.GatherInfo(baseDir, store) + oneDVDSize := int64(4600000000) + partitions, err := partitioner.CalculatePartitions(store, oneDVDSize) + if err != nil { + panic(fmt.Errorf("error calculating partitions: %w", err)) + } + + // zip up the files in each partition + partitionCount := len(partitions) + for i, partition := range partitions { + fileName := fmt.Sprintf("partition%0*d", getZeroPadAmount(partitionCount), i) + fmt.Printf("Creating zip file: %v\n", fileName) + err = fileUtilities.CreateZip(fileName, "C:\\tmp\\", baseDir, partition) + if err != nil { + panic(fmt.Errorf("error creating zip: %w", err)) + } + } + + // create parities for each zip file pair, figure out how to store the length of each zip file with the parity + + // create a folder for each DVD add the scripts and zip files + + // copy the zip files to the DVD +} + +func getZeroPadAmount(n int) int { + str := fmt.Sprintf("%d", n) + return len(str) } diff --git a/partitioner/partitioner.go b/partitioner/partitioner.go new file mode 100644 index 0000000..bcd3fa7 --- /dev/null +++ b/partitioner/partitioner.go @@ -0,0 +1,53 @@ +package partitioner + +import ( + "fmt" + "forever-files/db" + "forever-files/types" + "github.com/dustin/go-humanize" +) + +func CalculatePartitions(store db.DB, targetSize int64) (partitions [][]types.FileMetadata, err error) { + totalSize, err := store.GetTotalSize() + if err != nil { + return nil, fmt.Errorf("error getting total size: %w", err) + } + if targetSize <= 0 { + targetSize = totalSize / 2 + } + fmt.Printf("Total Size: %v\n", totalSize) + fmt.Printf("Target Size: %v\n", targetSize) + + files, err := store.GetFiles() + if err != nil { + return nil, fmt.Errorf("error getting files: %w", err) + } + partitions = make([][]types.FileMetadata, 0) + partitionSize := int64(0) + partitionFiles := make([]types.FileMetadata, 0) + leftOverFiles := make([]types.FileMetadata, 0) + leftOverSize := int64(0) + for _, file := range files { + if partitionSize+file.Size > targetSize { + fmt.Printf("Partition Size: %v\n", humanize.Bytes(uint64(partitionSize))) + partitions = append(partitions, partitionFiles) + partitionFiles = make([]types.FileMetadata, 0) + partitionSize = 0 + } + if partitionSize < targetSize && partitionSize+file.Size < targetSize { + partitionFiles = append(partitionFiles, file) + partitionSize += file.Size + } else { + leftOverFiles = append(leftOverFiles, file) + leftOverSize += file.Size + } + } + + for _, partition := range partitions { + fmt.Printf("Partition File Count: %v\n", len(partition)) + } + fmt.Printf("Left Over File Count: %v\n", len(leftOverFiles)) + fmt.Printf("Left Over Size: %v\n", humanize.Bytes(uint64(leftOverSize))) + + return partitions, nil +} diff --git a/source/source.go b/source/source.go index 0bb5bbb..4e33ed8 100644 --- a/source/source.go +++ b/source/source.go @@ -1,11 +1,10 @@ package source import ( - "crypto/sha256" "fmt" "forever-files/db" + "forever-files/fileUtilities" "forever-files/types" - "io" "log" "os" "path" @@ -46,13 +45,13 @@ func walkDir(dirPath string, db db.DB) error { log.Default().Printf("error getting file info: %v", err) continue } - hash, err := hashFile(path.Join(dirPath, entry.Name())) + hash, err := fileUtilities.HashFile(path.Join(dirPath, entry.Name())) if err != nil { log.Default().Printf("error hashing file: %v", err) continue } // store info - fmt.Printf("Name: %v, Size: %v, Modified Date: %v, Hash: %v\n", fileInfo.Name(), fileInfo.Size(), fileInfo.ModTime(), hash) + //fmt.Printf("Name: %v, Size: %v, Modified Date: %v, Hash: %v\n", fileInfo.Name(), fileInfo.Size(), fileInfo.ModTime(), hash) err = db.StoreFile(types.FileMetadata{ Name: fileInfo.Name(), Path: dirPath, @@ -69,18 +68,3 @@ func walkDir(dirPath string, db db.DB) error { } return nil } - -func hashFile(filePath string) ([]byte, error) { - file, err := os.Open(filePath) - if err != nil { - return []byte{}, fmt.Errorf("error opening file for hashing: %w", err) - } - defer file.Close() - - h := sha256.New() - if _, err := io.Copy(h, file); err != nil { - return []byte{}, fmt.Errorf("error hashing file: %w", err) - } - - return h.Sum(nil), nil -}