Module sentspace.utils.s3
Expand source code
import importlib
import boto3
import os
import sys
from botocore import UNSIGNED
from botocore.config import Config
from tqdm import tqdm
from pathlib import Path
class _S3Storage():
"""
load pickles
"""
_NO_SIGNATURE = Config(signature_version=UNSIGNED)
def __init__(self, *args, key, bucket='sentspace-databases', region='us-east-1', root_dir=os.getcwd() + '/.feature_database/', **kwargs):
super(_S3Storage, self).__init__(*args, **kwargs)
self._key = key
self._bucket = bucket
self._region = region
# self._local_root_dir = os.path.join(os.getcwd(),'.feature_database/')
self._local_root_dir = str(Path(root_dir).expanduser().resolve()) + '/'
os.makedirs(self._local_root_dir, exist_ok=True)
self._retrieve()
#is the relevant file here?
def _retrieve(self):
key=self._key
dir=self._local_root_dir
local_path = os.path.join(dir, key)
if not os.path.isfile(local_path):
self._download_file(key, local_path)
#download it from s3 bucket
def _download_file(self, key, local_path):
print(f"Downloading {key} to {local_path}")
s3 = boto3.resource('s3', region_name=self._region, config=self._NO_SIGNATURE)
obj = s3.Object(self._bucket, key)
with tqdm(total=obj.content_length, unit='B', unit_scale=True, desc=key, file=sys.stdout) as progress_bar:
def progress_hook(bytes_amount):
progress_bar.update(bytes_amount)
obj.download_file(local_path, Callback=progress_hook)
def save(self, result, function_identifier):
raise NotImplementedError("can only load from S3, but not save")
load_feature = _S3Storage
Classes
class load_feature (*args, key, bucket='sentspace-databases', region='us-east-1', root_dir='/root/project/.feature_database/', **kwargs)
-
load pickles
Expand source code
class _S3Storage(): """ load pickles """ _NO_SIGNATURE = Config(signature_version=UNSIGNED) def __init__(self, *args, key, bucket='sentspace-databases', region='us-east-1', root_dir=os.getcwd() + '/.feature_database/', **kwargs): super(_S3Storage, self).__init__(*args, **kwargs) self._key = key self._bucket = bucket self._region = region # self._local_root_dir = os.path.join(os.getcwd(),'.feature_database/') self._local_root_dir = str(Path(root_dir).expanduser().resolve()) + '/' os.makedirs(self._local_root_dir, exist_ok=True) self._retrieve() #is the relevant file here? def _retrieve(self): key=self._key dir=self._local_root_dir local_path = os.path.join(dir, key) if not os.path.isfile(local_path): self._download_file(key, local_path) #download it from s3 bucket def _download_file(self, key, local_path): print(f"Downloading {key} to {local_path}") s3 = boto3.resource('s3', region_name=self._region, config=self._NO_SIGNATURE) obj = s3.Object(self._bucket, key) with tqdm(total=obj.content_length, unit='B', unit_scale=True, desc=key, file=sys.stdout) as progress_bar: def progress_hook(bytes_amount): progress_bar.update(bytes_amount) obj.download_file(local_path, Callback=progress_hook) def save(self, result, function_identifier): raise NotImplementedError("can only load from S3, but not save")
Methods
def save(self, result, function_identifier)
-
Expand source code
def save(self, result, function_identifier): raise NotImplementedError("can only load from S3, but not save")