Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
231
libs/storage/client.py
Normal file
231
libs/storage/client.py
Normal file
@@ -0,0 +1,231 @@
|
||||
"""MinIO/S3 storage client wrapper."""
|
||||
|
||||
from datetime import timedelta
|
||||
from typing import Any, BinaryIO
|
||||
|
||||
import structlog
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class StorageClient:
|
||||
"""MinIO/S3 storage client wrapper"""
|
||||
|
||||
def __init__(self, minio_client: Minio):
|
||||
self.client = minio_client
|
||||
|
||||
async def ensure_bucket(self, bucket_name: str, region: str = "us-east-1") -> bool:
|
||||
"""Ensure bucket exists, create if not"""
|
||||
try:
|
||||
# Check if bucket exists
|
||||
if self.client.bucket_exists(bucket_name):
|
||||
logger.debug("Bucket already exists", bucket=bucket_name)
|
||||
return True
|
||||
|
||||
# Create bucket
|
||||
self.client.make_bucket(bucket_name, location=region)
|
||||
logger.info("Created bucket", bucket=bucket_name, region=region)
|
||||
return True
|
||||
|
||||
except S3Error as e:
|
||||
logger.error("Failed to ensure bucket", bucket=bucket_name, error=str(e))
|
||||
return False
|
||||
|
||||
async def put_object( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_name: str,
|
||||
data: BinaryIO,
|
||||
length: int,
|
||||
content_type: str = "application/octet-stream",
|
||||
metadata: dict[str, str] | None = None,
|
||||
) -> bool:
|
||||
"""Upload object to bucket"""
|
||||
try:
|
||||
# Ensure bucket exists
|
||||
await self.ensure_bucket(bucket_name)
|
||||
|
||||
# Upload object
|
||||
result = self.client.put_object(
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
data=data,
|
||||
length=length,
|
||||
content_type=content_type,
|
||||
metadata=metadata or {}, # fmt: skip # pyright: ignore[reportArgumentType]
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Object uploaded",
|
||||
bucket=bucket_name,
|
||||
object=object_name,
|
||||
etag=result.etag,
|
||||
size=length,
|
||||
)
|
||||
return True
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(
|
||||
"Failed to upload object",
|
||||
bucket=bucket_name,
|
||||
object=object_name,
|
||||
error=str(e),
|
||||
)
|
||||
return False
|
||||
|
||||
async def get_object(self, bucket_name: str, object_name: str) -> bytes | None:
|
||||
"""Download object from bucket"""
|
||||
try:
|
||||
response = self.client.get_object(bucket_name, object_name)
|
||||
data = response.read()
|
||||
response.close()
|
||||
response.release_conn()
|
||||
|
||||
logger.debug(
|
||||
"Object downloaded",
|
||||
bucket=bucket_name,
|
||||
object=object_name,
|
||||
size=len(data),
|
||||
)
|
||||
return data # type: ignore
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(
|
||||
"Failed to download object",
|
||||
bucket=bucket_name,
|
||||
object=object_name,
|
||||
error=str(e),
|
||||
)
|
||||
return None
|
||||
|
||||
async def get_object_stream(self, bucket_name: str, object_name: str) -> Any:
|
||||
"""Get object as stream"""
|
||||
try:
|
||||
response = self.client.get_object(bucket_name, object_name)
|
||||
return response
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(
|
||||
"Failed to get object stream",
|
||||
bucket=bucket_name,
|
||||
object=object_name,
|
||||
error=str(e),
|
||||
)
|
||||
return None
|
||||
|
||||
async def object_exists(self, bucket_name: str, object_name: str) -> bool:
|
||||
"""Check if object exists"""
|
||||
try:
|
||||
self.client.stat_object(bucket_name, object_name)
|
||||
return True
|
||||
except S3Error:
|
||||
return False
|
||||
|
||||
async def delete_object(self, bucket_name: str, object_name: str) -> bool:
|
||||
"""Delete object from bucket"""
|
||||
try:
|
||||
self.client.remove_object(bucket_name, object_name)
|
||||
logger.info("Object deleted", bucket=bucket_name, object=object_name)
|
||||
return True
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(
|
||||
"Failed to delete object",
|
||||
bucket=bucket_name,
|
||||
object=object_name,
|
||||
error=str(e),
|
||||
)
|
||||
return False
|
||||
|
||||
async def list_objects(
|
||||
self, bucket_name: str, prefix: str | None = None, recursive: bool = True
|
||||
) -> list[str]:
|
||||
"""List objects in bucket"""
|
||||
try:
|
||||
objects = self.client.list_objects(
|
||||
bucket_name, prefix=prefix, recursive=recursive
|
||||
)
|
||||
return [obj.object_name for obj in objects if obj.object_name is not None]
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(
|
||||
"Failed to list objects",
|
||||
bucket=bucket_name,
|
||||
prefix=prefix,
|
||||
error=str(e),
|
||||
)
|
||||
return []
|
||||
|
||||
async def get_presigned_url(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_name: str,
|
||||
expires: timedelta = timedelta(hours=1),
|
||||
method: str = "GET",
|
||||
) -> str | None:
|
||||
"""Generate presigned URL for object access"""
|
||||
try:
|
||||
url = self.client.get_presigned_url(
|
||||
method=method,
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
expires=expires,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Generated presigned URL",
|
||||
bucket=bucket_name,
|
||||
object=object_name,
|
||||
method=method,
|
||||
expires=expires,
|
||||
)
|
||||
return str(url)
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(
|
||||
"Failed to generate presigned URL",
|
||||
bucket=bucket_name,
|
||||
object=object_name,
|
||||
error=str(e),
|
||||
)
|
||||
return None
|
||||
|
||||
async def copy_object(
|
||||
self, source_bucket: str, source_object: str, dest_bucket: str, dest_object: str
|
||||
) -> bool:
|
||||
"""Copy object between buckets/locations"""
|
||||
try:
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from minio.commonconfig import CopySource
|
||||
|
||||
# Ensure destination bucket exists
|
||||
await self.ensure_bucket(dest_bucket)
|
||||
|
||||
# Copy object
|
||||
self.client.copy_object(
|
||||
bucket_name=dest_bucket,
|
||||
object_name=dest_object,
|
||||
source=CopySource(source_bucket, source_object),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Object copied",
|
||||
source_bucket=source_bucket,
|
||||
source_object=source_object,
|
||||
dest_bucket=dest_bucket,
|
||||
dest_object=dest_object,
|
||||
)
|
||||
return True
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(
|
||||
"Failed to copy object",
|
||||
source_bucket=source_bucket,
|
||||
source_object=source_object,
|
||||
dest_bucket=dest_bucket,
|
||||
dest_object=dest_object,
|
||||
error=str(e),
|
||||
)
|
||||
return False
|
||||
Reference in New Issue
Block a user