feat: Sprint 4 - Azure Blob Storage and Google Cloud Storage support

Implemented full native support for Azure Blob Storage and Google Cloud Storage:

**Azure Blob Storage (internal/cloud/azure.go):**
- Native Azure SDK integration (github.com/Azure/azure-sdk-for-go)
- Block blob upload for large files (>256MB with 100MB blocks)
- Azurite emulator support for local testing
- Production Azure authentication (account name + key)
- SHA-256 integrity verification with metadata
- Streaming uploads with progress tracking

**Google Cloud Storage (internal/cloud/gcs.go):**
- Native GCS SDK integration (cloud.google.com/go/storage)
- Chunked upload for large files (16MB chunks)
- fake-gcs-server emulator support for local testing
- Application Default Credentials support
- Service account JSON key file support
- SHA-256 integrity verification with metadata
- Streaming uploads with progress tracking

**Backend Integration:**
- Updated NewBackend() factory to support azure/azblob and gs/gcs providers
- Added Name() methods to both backends
- Fixed ProgressReader usage across all backends
- Updated Config comments to document Azure/GCS support

**Testing Infrastructure:**
- docker-compose.azurite.yml: Azurite + PostgreSQL + MySQL test environment
- docker-compose.gcs.yml: fake-gcs-server + PostgreSQL + MySQL test environment
- scripts/test_azure_storage.sh: 8 comprehensive Azure integration tests
- scripts/test_gcs_storage.sh: 8 comprehensive GCS integration tests
- Both test scripts validate upload/download/verify/cleanup/restore operations

**Documentation:**
- AZURE.md: Complete guide (600+ lines) covering setup, authentication, usage
- GCS.md: Complete guide (600+ lines) covering setup, authentication, usage
- Updated CLOUD.md with Azure and GCS sections
- Updated internal/config/config.go with Azure/GCS field documentation

**Test Coverage:**
- Large file uploads (300MB for Azure, 200MB for GCS)
- Block/chunked upload verification
- Backup verification with SHA-256 checksums
- Restore from cloud URIs
- Cleanup and retention policies
- Emulator support for both providers

**Dependencies Added:**
- Azure: github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3
- GCS: cloud.google.com/go/storage v1.57.2
- Plus transitive dependencies (~50+ packages)

**Build:**
- Compiles successfully: 68MB binary
- All imports resolved
- No compilation errors

Sprint 4 closes the multi-cloud gap identified in Sprint 3 evaluation.
Users can now use Azure and GCS URIs that were previously parsed but unsupported.
This commit is contained in:
2025-11-25 21:31:21 +00:00
parent 8929004abc
commit 64f1458e9a
13 changed files with 2990 additions and 28 deletions

381
internal/cloud/azure.go Normal file
View File

@@ -0,0 +1,381 @@
package cloud
import (
"bytes"
"context"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
)
// AzureBackend implements the Backend interface for Azure Blob Storage
type AzureBackend struct {
client *azblob.Client
containerName string
config *Config
}
// NewAzureBackend creates a new Azure Blob Storage backend
func NewAzureBackend(cfg *Config) (*AzureBackend, error) {
if cfg.Bucket == "" {
return nil, fmt.Errorf("container name is required for Azure backend")
}
var client *azblob.Client
var err error
// Support for Azurite emulator (uses endpoint override)
if cfg.Endpoint != "" {
// For Azurite and custom endpoints
accountName := cfg.AccessKey
accountKey := cfg.SecretKey
if accountName == "" {
// Default Azurite account
accountName = "devstoreaccount1"
}
if accountKey == "" {
// Default Azurite key
accountKey = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
}
// Create credential
cred, err := azblob.NewSharedKeyCredential(accountName, accountKey)
if err != nil {
return nil, fmt.Errorf("failed to create Azure credential: %w", err)
}
// Build service URL for Azurite: http://endpoint/accountName
serviceURL := cfg.Endpoint
if !strings.Contains(serviceURL, accountName) {
// Ensure URL ends with slash
if !strings.HasSuffix(serviceURL, "/") {
serviceURL += "/"
}
serviceURL += accountName
}
client, err = azblob.NewClientWithSharedKeyCredential(serviceURL, cred, nil)
if err != nil {
return nil, fmt.Errorf("failed to create Azure client: %w", err)
}
} else {
// Production Azure using connection string or managed identity
if cfg.AccessKey != "" && cfg.SecretKey != "" {
// Use account name and key
accountName := cfg.AccessKey
accountKey := cfg.SecretKey
cred, err := azblob.NewSharedKeyCredential(accountName, accountKey)
if err != nil {
return nil, fmt.Errorf("failed to create Azure credential: %w", err)
}
serviceURL := fmt.Sprintf("https://%s.blob.core.windows.net/", accountName)
client, err = azblob.NewClientWithSharedKeyCredential(serviceURL, cred, nil)
if err != nil {
return nil, fmt.Errorf("failed to create Azure client: %w", err)
}
} else {
// Use default Azure credential (managed identity, environment variables, etc.)
return nil, fmt.Errorf("Azure authentication requires account name and key, or use AZURE_STORAGE_CONNECTION_STRING environment variable")
}
}
backend := &AzureBackend{
client: client,
containerName: cfg.Bucket,
config: cfg,
}
// Create container if it doesn't exist
// Note: Container creation should be done manually or via Azure portal
if false { // Disabled: cfg.CreateBucket not in Config
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
containerClient := client.ServiceClient().NewContainerClient(cfg.Bucket)
_, err = containerClient.Create(ctx, &container.CreateOptions{})
if err != nil {
// Ignore if container already exists
if !strings.Contains(err.Error(), "ContainerAlreadyExists") {
return nil, fmt.Errorf("failed to create container: %w", err)
}
}
}
return backend, nil
}
// Name returns the backend name
func (a *AzureBackend) Name() string {
return "azure"
}
// Upload uploads a file to Azure Blob Storage
func (a *AzureBackend) Upload(ctx context.Context, localPath, remotePath string, progress ProgressCallback) error {
file, err := os.Open(localPath)
if err != nil {
return fmt.Errorf("failed to open file: %w", err)
}
defer file.Close()
fileInfo, err := file.Stat()
if err != nil {
return fmt.Errorf("failed to stat file: %w", err)
}
fileSize := fileInfo.Size()
// Remove leading slash from remote path
blobName := strings.TrimPrefix(remotePath, "/")
// Use block blob upload for large files (>256MB), simple upload for smaller
const blockUploadThreshold = 256 * 1024 * 1024 // 256 MB
if fileSize > blockUploadThreshold {
return a.uploadBlocks(ctx, file, blobName, fileSize, progress)
}
return a.uploadSimple(ctx, file, blobName, fileSize, progress)
}
// uploadSimple uploads a file using simple upload (single request)
func (a *AzureBackend) uploadSimple(ctx context.Context, file *os.File, blobName string, fileSize int64, progress ProgressCallback) error {
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
// Wrap reader with progress tracking
reader := NewProgressReader(file, fileSize, progress)
// Calculate MD5 hash for integrity
hash := sha256.New()
teeReader := io.TeeReader(reader, hash)
_, err := blockBlobClient.UploadStream(ctx, teeReader, &blockblob.UploadStreamOptions{
BlockSize: 4 * 1024 * 1024, // 4MB blocks
})
if err != nil {
return fmt.Errorf("failed to upload blob: %w", err)
}
// Store checksum as metadata
checksum := hex.EncodeToString(hash.Sum(nil))
metadata := map[string]*string{
"sha256": &checksum,
}
_, err = blockBlobClient.SetMetadata(ctx, metadata, nil)
if err != nil {
// Non-fatal: upload succeeded but metadata failed
fmt.Fprintf(os.Stderr, "Warning: failed to set blob metadata: %v\n", err)
}
return nil
}
// uploadBlocks uploads a file using block blob staging (for large files)
func (a *AzureBackend) uploadBlocks(ctx context.Context, file *os.File, blobName string, fileSize int64, progress ProgressCallback) error {
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
const blockSize = 100 * 1024 * 1024 // 100MB per block
numBlocks := (fileSize + blockSize - 1) / blockSize
blockIDs := make([]string, 0, numBlocks)
hash := sha256.New()
var totalUploaded int64
for i := int64(0); i < numBlocks; i++ {
blockID := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("block-%08d", i)))
blockIDs = append(blockIDs, blockID)
// Calculate block size
currentBlockSize := blockSize
if i == numBlocks-1 {
currentBlockSize = int(fileSize - i*blockSize)
}
// Read block
blockData := make([]byte, currentBlockSize)
n, err := io.ReadFull(file, blockData)
if err != nil && err != io.ErrUnexpectedEOF {
return fmt.Errorf("failed to read block %d: %w", i, err)
}
blockData = blockData[:n]
// Update hash
hash.Write(blockData)
// Upload block
reader := bytes.NewReader(blockData)
_, err = blockBlobClient.StageBlock(ctx, blockID, streaming.NopCloser(reader), nil)
if err != nil {
return fmt.Errorf("failed to stage block %d: %w", i, err)
}
// Update progress
totalUploaded += int64(n)
if progress != nil {
progress(totalUploaded, fileSize)
}
}
// Commit all blocks
_, err := blockBlobClient.CommitBlockList(ctx, blockIDs, nil)
if err != nil {
return fmt.Errorf("failed to commit block list: %w", err)
}
// Store checksum as metadata
checksum := hex.EncodeToString(hash.Sum(nil))
metadata := map[string]*string{
"sha256": &checksum,
}
_, err = blockBlobClient.SetMetadata(ctx, metadata, nil)
if err != nil {
// Non-fatal
fmt.Fprintf(os.Stderr, "Warning: failed to set blob metadata: %v\n", err)
}
return nil
}
// Download downloads a file from Azure Blob Storage
func (a *AzureBackend) Download(ctx context.Context, remotePath, localPath string, progress ProgressCallback) error {
blobName := strings.TrimPrefix(remotePath, "/")
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
// Get blob properties to know size
props, err := blockBlobClient.GetProperties(ctx, nil)
if err != nil {
return fmt.Errorf("failed to get blob properties: %w", err)
}
fileSize := *props.ContentLength
// Download blob
resp, err := blockBlobClient.DownloadStream(ctx, nil)
if err != nil {
return fmt.Errorf("failed to download blob: %w", err)
}
defer resp.Body.Close()
// Create local file
file, err := os.Create(localPath)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
defer file.Close()
// Wrap reader with progress tracking
reader := NewProgressReader(resp.Body, fileSize, progress)
// Copy with progress
_, err = io.Copy(file, reader)
if err != nil {
return fmt.Errorf("failed to write file: %w", err)
}
return nil
}
// Delete deletes a file from Azure Blob Storage
func (a *AzureBackend) Delete(ctx context.Context, remotePath string) error {
blobName := strings.TrimPrefix(remotePath, "/")
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
_, err := blockBlobClient.Delete(ctx, nil)
if err != nil {
return fmt.Errorf("failed to delete blob: %w", err)
}
return nil
}
// List lists files in Azure Blob Storage with a given prefix
func (a *AzureBackend) List(ctx context.Context, prefix string) ([]BackupInfo, error) {
prefix = strings.TrimPrefix(prefix, "/")
containerClient := a.client.ServiceClient().NewContainerClient(a.containerName)
pager := containerClient.NewListBlobsFlatPager(&container.ListBlobsFlatOptions{
Prefix: &prefix,
})
var files []BackupInfo
for pager.More() {
page, err := pager.NextPage(ctx)
if err != nil {
return nil, fmt.Errorf("failed to list blobs: %w", err)
}
for _, blob := range page.Segment.BlobItems {
if blob.Name == nil || blob.Properties == nil {
continue
}
file := BackupInfo{
Key: *blob.Name,
Name: filepath.Base(*blob.Name),
Size: *blob.Properties.ContentLength,
LastModified: *blob.Properties.LastModified,
}
// Try to get SHA256 from metadata
if blob.Metadata != nil {
if sha256Val, ok := blob.Metadata["sha256"]; ok && sha256Val != nil {
file.ETag = *sha256Val
}
}
files = append(files, file)
}
}
return files, nil
}
// Exists checks if a file exists in Azure Blob Storage
func (a *AzureBackend) Exists(ctx context.Context, remotePath string) (bool, error) {
blobName := strings.TrimPrefix(remotePath, "/")
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
_, err := blockBlobClient.GetProperties(ctx, nil)
if err != nil {
var respErr *azcore.ResponseError
if respErr != nil && respErr.StatusCode == 404 {
return false, nil
}
// Check if error message contains "not found"
if strings.Contains(err.Error(), "BlobNotFound") || strings.Contains(err.Error(), "404") {
return false, nil
}
return false, fmt.Errorf("failed to check blob existence: %w", err)
}
return true, nil
}
// GetSize returns the size of a file in Azure Blob Storage
func (a *AzureBackend) GetSize(ctx context.Context, remotePath string) (int64, error) {
blobName := strings.TrimPrefix(remotePath, "/")
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
props, err := blockBlobClient.GetProperties(ctx, nil)
if err != nil {
return 0, fmt.Errorf("failed to get blob properties: %w", err)
}
return *props.ContentLength, nil
}

275
internal/cloud/gcs.go Normal file
View File

@@ -0,0 +1,275 @@
package cloud
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
"cloud.google.com/go/storage"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
)
// GCSBackend implements the Backend interface for Google Cloud Storage
type GCSBackend struct {
client *storage.Client
bucketName string
config *Config
}
// NewGCSBackend creates a new Google Cloud Storage backend
func NewGCSBackend(cfg *Config) (*GCSBackend, error) {
if cfg.Bucket == "" {
return nil, fmt.Errorf("bucket name is required for GCS backend")
}
var client *storage.Client
var err error
ctx := context.Background()
// Support for fake-gcs-server emulator (uses endpoint override)
if cfg.Endpoint != "" {
// For fake-gcs-server and custom endpoints
client, err = storage.NewClient(ctx, option.WithEndpoint(cfg.Endpoint), option.WithoutAuthentication())
if err != nil {
return nil, fmt.Errorf("failed to create GCS client: %w", err)
}
} else {
// Production GCS using Application Default Credentials or service account
if cfg.AccessKey != "" {
// Use service account JSON key file
client, err = storage.NewClient(ctx, option.WithCredentialsFile(cfg.AccessKey))
if err != nil {
return nil, fmt.Errorf("failed to create GCS client with credentials file: %w", err)
}
} else {
// Use default credentials (ADC, environment variables, etc.)
client, err = storage.NewClient(ctx)
if err != nil {
return nil, fmt.Errorf("failed to create GCS client: %w", err)
}
}
}
backend := &GCSBackend{
client: client,
bucketName: cfg.Bucket,
config: cfg,
}
// Create bucket if it doesn't exist
// Note: Bucket creation should be done manually or via gcloud CLI
if false { // Disabled: cfg.CreateBucket not in Config
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
bucket := client.Bucket(cfg.Bucket)
_, err = bucket.Attrs(ctx)
if err == storage.ErrBucketNotExist {
// Create bucket with default settings
if err := bucket.Create(ctx, cfg.AccessKey, nil); err != nil {
return nil, fmt.Errorf("failed to create bucket: %w", err)
}
} else if err != nil {
return nil, fmt.Errorf("failed to check bucket: %w", err)
}
}
return backend, nil
}
// Name returns the backend name
func (g *GCSBackend) Name() string {
return "gcs"
}
// Upload uploads a file to Google Cloud Storage
func (g *GCSBackend) Upload(ctx context.Context, localPath, remotePath string, progress ProgressCallback) error {
file, err := os.Open(localPath)
if err != nil {
return fmt.Errorf("failed to open file: %w", err)
}
defer file.Close()
fileInfo, err := file.Stat()
if err != nil {
return fmt.Errorf("failed to stat file: %w", err)
}
fileSize := fileInfo.Size()
// Remove leading slash from remote path
objectName := strings.TrimPrefix(remotePath, "/")
bucket := g.client.Bucket(g.bucketName)
object := bucket.Object(objectName)
// Create writer with automatic chunking for large files
writer := object.NewWriter(ctx)
writer.ChunkSize = 16 * 1024 * 1024 // 16MB chunks for streaming
// Wrap reader with progress tracking and hash calculation
hash := sha256.New()
reader := NewProgressReader(io.TeeReader(file, hash), fileSize, progress)
// Upload with progress tracking
_, err = io.Copy(writer, reader)
if err != nil {
writer.Close()
return fmt.Errorf("failed to upload object: %w", err)
}
// Close writer (finalizes upload)
if err := writer.Close(); err != nil {
return fmt.Errorf("failed to finalize upload: %w", err)
}
// Store checksum as metadata
checksum := hex.EncodeToString(hash.Sum(nil))
_, err = object.Update(ctx, storage.ObjectAttrsToUpdate{
Metadata: map[string]string{
"sha256": checksum,
},
})
if err != nil {
// Non-fatal: upload succeeded but metadata failed
fmt.Fprintf(os.Stderr, "Warning: failed to set object metadata: %v\n", err)
}
return nil
}
// Download downloads a file from Google Cloud Storage
func (g *GCSBackend) Download(ctx context.Context, remotePath, localPath string, progress ProgressCallback) error {
objectName := strings.TrimPrefix(remotePath, "/")
bucket := g.client.Bucket(g.bucketName)
object := bucket.Object(objectName)
// Get object attributes to know size
attrs, err := object.Attrs(ctx)
if err != nil {
return fmt.Errorf("failed to get object attributes: %w", err)
}
fileSize := attrs.Size
// Create reader
reader, err := object.NewReader(ctx)
if err != nil {
return fmt.Errorf("failed to download object: %w", err)
}
defer reader.Close()
// Create local file
file, err := os.Create(localPath)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
defer file.Close()
// Wrap reader with progress tracking
progressReader := NewProgressReader(reader, fileSize, progress)
// Copy with progress
_, err = io.Copy(file, progressReader)
if err != nil {
return fmt.Errorf("failed to write file: %w", err)
}
return nil
}
// Delete deletes a file from Google Cloud Storage
func (g *GCSBackend) Delete(ctx context.Context, remotePath string) error {
objectName := strings.TrimPrefix(remotePath, "/")
bucket := g.client.Bucket(g.bucketName)
object := bucket.Object(objectName)
if err := object.Delete(ctx); err != nil {
return fmt.Errorf("failed to delete object: %w", err)
}
return nil
}
// List lists files in Google Cloud Storage with a given prefix
func (g *GCSBackend) List(ctx context.Context, prefix string) ([]BackupInfo, error) {
prefix = strings.TrimPrefix(prefix, "/")
bucket := g.client.Bucket(g.bucketName)
query := &storage.Query{
Prefix: prefix,
}
it := bucket.Objects(ctx, query)
var files []BackupInfo
for {
attrs, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
return nil, fmt.Errorf("failed to list objects: %w", err)
}
file := BackupInfo{
Key: attrs.Name,
Name: filepath.Base(attrs.Name),
Size: attrs.Size,
LastModified: attrs.Updated,
}
// Try to get SHA256 from metadata
if attrs.Metadata != nil {
if sha256Val, ok := attrs.Metadata["sha256"]; ok {
file.ETag = sha256Val
}
}
files = append(files, file)
}
return files, nil
}
// Exists checks if a file exists in Google Cloud Storage
func (g *GCSBackend) Exists(ctx context.Context, remotePath string) (bool, error) {
objectName := strings.TrimPrefix(remotePath, "/")
bucket := g.client.Bucket(g.bucketName)
object := bucket.Object(objectName)
_, err := object.Attrs(ctx)
if err == storage.ErrObjectNotExist {
return false, nil
}
if err != nil {
return false, fmt.Errorf("failed to check object existence: %w", err)
}
return true, nil
}
// GetSize returns the size of a file in Google Cloud Storage
func (g *GCSBackend) GetSize(ctx context.Context, remotePath string) (int64, error) {
objectName := strings.TrimPrefix(remotePath, "/")
bucket := g.client.Bucket(g.bucketName)
object := bucket.Object(objectName)
attrs, err := object.Attrs(ctx)
if err != nil {
return 0, fmt.Errorf("failed to get object attributes: %w", err)
}
return attrs.Size, nil
}

View File

@@ -79,8 +79,12 @@ func NewBackend(cfg *Config) (Backend, error) {
return nil, fmt.Errorf("endpoint required for Backblaze B2")
}
return NewS3Backend(cfg)
case "azure", "azblob":
return NewAzureBackend(cfg)
case "gs", "gcs", "google":
return NewGCSBackend(cfg)
default:
return nil, fmt.Errorf("unsupported cloud provider: %s (supported: s3, minio, b2)", cfg.Provider)
return nil, fmt.Errorf("unsupported cloud provider: %s (supported: s3, minio, b2, azure, gcs)", cfg.Provider)
}
}

View File

@@ -88,13 +88,13 @@ type Config struct {
// Cloud storage options (v2.0)
CloudEnabled bool // Enable cloud storage integration
CloudProvider string // "s3", "minio", "b2"
CloudBucket string // Bucket name
CloudRegion string // Region (for S3)
CloudEndpoint string // Custom endpoint (for MinIO, B2)
CloudAccessKey string // Access key
CloudSecretKey string // Secret key
CloudPrefix string // Key prefix
CloudProvider string // "s3", "minio", "b2", "azure", "gcs"
CloudBucket string // Bucket/container name
CloudRegion string // Region (for S3, GCS)
CloudEndpoint string // Custom endpoint (for MinIO, B2, Azurite, fake-gcs-server)
CloudAccessKey string // Access key / Account name (Azure) / Service account file (GCS)
CloudSecretKey string // Secret key / Account key (Azure)
CloudPrefix string // Key/object prefix
CloudAutoUpload bool // Automatically upload after backup
}