Source code for allensdk.core.pickle_utils

from typing import Union, Any
import pickle
import gzip
import pathlib


[docs]def load_and_sanitize_pickle( pickle_path: Union[str, pathlib.Path]) -> Any: """ Load the data from a pickle file and pass it through sanitize_pickle_data, so that all bytes in the data are cast to strings. Parameters ---------- pickle_path: Union[str, pathlib.Path] Path to the pickle file Returns ------- pickle_data: Any The data that was in the pickle file Notes ----- Because sanitize_pickle_data alters the data in-place, this method encapsulates loading and sanitizing so that users do not think they have a pre-sanitization copy of the data available. """ if isinstance(pickle_path, str): pickle_path = pathlib.Path(pickle_path) if pickle_path.name.endswith('gz'): open_method = gzip.open elif pickle_path.name.endswith('pkl'): open_method = open else: raise ValueError("Can open .pkl and .gz files; " f"you gave {pickle_path.resolve().absolute()}") with open_method(pickle_path, 'rb') as in_file: raw_data = pickle.load(in_file, encoding='bytes') return _sanitize_pickle_data(raw_data)
def _sanitize_pickle_data( raw_data: Union[list, dict]) -> Union[list, dict]: """ Sometimes data read from the pickle file comes with keys that are strings; sometimes it comes with keys that are bytes. This method iterates over the elements in the pickle file, casting the bytes to strings, returning the same object with the mapped keys. Note ---- Alters raw_data in-place """ if isinstance(raw_data, dict): raw_data = _sanitize_dict(raw_data) elif isinstance(raw_data, list): raw_data = _sanitize_list(raw_data) return raw_data def _sanitize_list( raw_data: list) -> list: """ Sanitize a list read from the pickle file, casting bytes into str and returning the sanitized list. Note ---- Alters raw_data in place """ for idx, element in enumerate(raw_data): if isinstance(element, list) or isinstance(element, tuple): raw_data[idx] = _sanitize_list_or_tuple(element) elif isinstance(element, dict): raw_data[idx] = _sanitize_dict(element) elif isinstance(element, bytes): raw_data[idx] = element.decode('utf-8') else: pass return raw_data def _sanitize_tuple( raw_data: tuple) -> tuple: """ Sanitize a list read from the pickle file, casting bytes into str and returning the sanitized list. """ output = list(raw_data) output = _sanitize_list(output) output = tuple(output) return output def _sanitize_list_or_tuple( raw_data: Union[list, tuple]) -> Union[list, tuple]: """ Sanitize a list or tuple read from the pickle file, casting bytes into str and returning the sanitized iterable. Note ---- Alters raw_data in place (if a list) """ if isinstance(raw_data, list): return _sanitize_list(raw_data) elif isinstance(raw_data, tuple): return _sanitize_tuple(raw_data) raise ValueError("Can only process lists or tuples; " f"you gave {type(raw_data)}") def _sanitize_dict( raw_data: dict) -> dict: """ Sanitize a dict read from the pickle file, casting bytes into str and returning the sanitized dict. Note ---- Alters raw_data in-place """ key_list = list(raw_data.keys()) for this_key in key_list: this_value = raw_data.pop(this_key) if isinstance(this_key, bytes): this_key = this_key.decode('utf-8') if isinstance(this_value, list) or isinstance(this_value, tuple): this_value = _sanitize_list_or_tuple(this_value) elif isinstance(this_value, dict): this_value = _sanitize_dict(this_value) elif isinstance(this_value, bytes): this_value = this_value.decode('utf-8') raw_data[this_key] = this_value return raw_data