Source code for allensdk.api.cloud_cache.utils

from typing import Optional
import warnings
import re
import urllib.parse as url_parse
import hashlib

[docs]def bucket_name_from_url(url: str) -> Optional[str]: """ Read in a URL and return the name of the AWS S3 bucket it points towards. Parameters ---------- URL: str A generic URL, suitable for retrieving an S3 object via an HTTP GET request. Returns ------- str An AWS S3 bucket name. Note: if '' does not occur in the URL, this method will return None and emit a warning. Note ----- URLs passed to this method should conform to the "new" scheme as described here """ s3_pattern = re.compile('\.s3[\.,a-z,0-9,\-]*\') # noqa: W605, E501 url_params = url_parse.urlparse(url) raw_location = url_params.netloc s3_match = if s3_match is None: warnings.warn(f"{s3_pattern} does not occur in url {url}") return None s3_match = raw_location[s3_match.start():s3_match.end()] return url_params.netloc.replace(s3_match, '')
[docs]def relative_path_from_url(url: str) -> str: """ Read in a url and return the relative path of the object Parameters ---------- url: str The url of the object whose path you want Returns ------- str: Relative path of the object Notes ----- This method returns a str rather than a pathlib.Path because it is used to get the S3 object Key from a URL. If using Pathlib.path on a Windows system, the '/' will get transformed into '\', confusing S3. """ url_params = url_parse.urlparse(url) return url_params.path[1:]
[docs]def file_hash_from_path(file_path: str) -> str: """ Return the hexadecimal file hash for a file Parameters ---------- file_path: str path to a file Returns ------- str: The file hash (Blake2b; hexadecimal) of the file """ hasher = hashlib.blake2b() with open(file_path, 'rb') as in_file: chunk = while len(chunk) > 0: hasher.update(chunk) chunk = return hasher.hexdigest()