Source code for allensdk.config.manifest

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2014-2016. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import os
import sys
import re
import logging
import errno
import pandas as pd
from pathlib import Path


[docs]class ManifestVersionError(Exception): @property def outdated(self): try: return self.found_version < self.version except TypeError: return def __init__(self, message, version, found_version): super(ManifestVersionError, self).__init__(message) self.found_version = found_version self.version = version
[docs]class Manifest(object): """Manages the location of external files referenced in an Allen SDK configuration """ DIR = 'dir' FILE = 'file' DIRNAME = 'dir_name' VERSION = 'manifest_version' log = logging.getLogger(__name__) def __init__(self, config=None, relative_base_dir='.', version=None): self.path_info = {} self.relative_base_dir = relative_base_dir if config is not None: self.load_config(config, version=version)
[docs] def load_config(self, config, version=None): ''' Load paths into the manifest from an Allen SDK config section. Parameters ---------- config : Config Manifest section of an Allen SDK config. ''' found_version = None for path_info in config: path_type = path_info['type'] path_format = None if 'format' in path_info: path_format = path_info['format'] if path_type == 'file': try: parent_key = path_info['parent_key'] except: parent_key = None self.add_file(path_info['key'], path_info['spec'], parent_key, path_format) elif path_type == 'dir': try: parent_key = path_info['parent_key'] except: parent_key = None spec = path_info['spec'] absolute = False if spec[0] == '/': absolute = True self.add_path(path_info['key'], path_info['spec'], path_type, absolute, path_format, parent_key) elif path_type == self.VERSION: found_version = path_info['value'] else: Manifest.log.warning("Unknown path type in manifest: %s" % (path_type)) if found_version != version: raise ManifestVersionError("", version, found_version) self.version = version
[docs] def add_path(self, key, path, path_type=DIR, absolute=True, path_format=None, parent_key=None): '''Insert a new entry. Parameters ---------- key : string Identifier for referencing the entry. path : string Specification for a path using %s, %d style substitution. path_type : string enumeration 'dir' (default) or 'file' absolute : boolean Is the spec relative to the process current directory. path_format : string, optional Indicate a known file type for further parsing. parent_key : string Refer to another entry. ''' if parent_key: path_args = [] try: parent_path = self.path_info[parent_key]['spec'] path_args.append(parent_path) except: Manifest.log.error( "cannot resolve directory key %s" % (parent_key)) raise path_args.extend(path.split('/')) path = os.path.join(*path_args) # TODO: relative paths need to be considered better if absolute is True: path = os.path.abspath(path) else: path = os.path.abspath(os.path.join(self.relative_base_dir, path)) if path_type == Manifest.DIRNAME: path = os.path.dirname(path) self.path_info[key] = {'type': path_type, 'spec': path} if path_type == Manifest.FILE and path_format is not None: self.path_info[key]['format'] = path_format
[docs] def add_paths(self, path_info): ''' add information about paths stored in the manifest. Parameters path_info : dict Information about the new paths ''' for path_key, path_data in path_info.items(): path_format = None if 'format' in path_data: path_format = path_data['format'] Manifest.log.info("Adding path. type: %s, format: %s, spec: %s" % (path_data['type'], path_data['spec'], path_format)) entry = {'type': path_data['type'], 'spec': path_data['spec'] } if path_format is not None: entry['format'] = path_format self.path_info[path_key] = entry
[docs] def add_file(self, file_key, file_name, dir_key=None, path_format=None): '''Insert a new file entry. Parameters ---------- file_key : string Reference to the entry. file_name : string Subtitutions of the %s, %d style allowed. dir_key : string Reference to the parent directory entry. path_format : string, optional File type for further parsing. ''' path_args = [] if dir_key: try: dir_path = self.path_info[dir_key]['spec'] path_args.append(dir_path) except: Manifest.log.error( "cannot resolve directory key %s" % (dir_key)) raise elif not file_name.startswith('/'): path_args.append(os.curdir) else: path_args.append(os.path.sep) path_args.extend(file_name.split('/')) file_path = os.path.join(*path_args) self.path_info[file_key] = {'type': Manifest.FILE, 'spec': file_path} if path_format: self.path_info[file_key]['format'] = path_format
[docs] def get_path(self, path_key, *args): '''Retrieve an entry with substitutions. Parameters ---------- path_key : string Refer to the entry to retrieve. args : any types, optional arguments to be substituted into the path spec for %s, %d, etc. Returns ------- string Path with parent structure and substitutions applied. ''' path_spec = self.path_info[path_key]['spec'] if args is not None and len(args) != 0: path = path_spec % args else: path = path_spec return path
[docs] def get_format(self, path_key): '''Retrieve the type of a path entry. Parameters ---------- path_key : string reference to the entry Returns ------- string File type. ''' path_entry = self.path_info[path_key] path_format = None if 'format' in path_entry: path_format = path_entry['format'] return path_format
[docs] @classmethod def safe_make_parent_dirs(cls, file_name): ''' Create a parent directories for file. Parameters ---------- file_name : string Returns ------- leftmost : string most rootward directory created ''' dirname = os.path.dirname(file_name) # do nothing if there are no parent directories if not dirname: return return Manifest.safe_mkdir(dirname)
[docs] @classmethod def safe_mkdir(cls, directory): '''Create path if not already there. Parameters ---------- directory : string create it if it doesn't exist Returns ------- leftmost : string most rootward directory created ''' parts = Path(directory).parts sub_paths = [Path(parts[0])] for part in parts[1:]: sub_paths.append(sub_paths[-1] / part) leftmost = None for sub_path in sub_paths: if not sub_path.exists(): leftmost = str(sub_path) try: os.makedirs(directory) except OSError as e: if ((sys.platform == "darwin") and (e.errno == errno.EISDIR) and \ (e.filename == "/")): # undocumented behavior of mkdir on OSX where for / it raises # EISDIR and not EEXIST # https://bugs.python.org/issue24231 (old but still holds true) pass elif sys.platform == "win32" and e.errno == errno.EACCES: root_path = os.path.abspath(os.sep) if e.filename == root_path or \ e.filename == root_path.replace("\\", "/"): # When attempting to os.makedirs the root drive letter on # Windows, EACCES is raised, not EEXIST pass else: raise elif e.errno == errno.EEXIST: pass else: raise return leftmost
[docs] def create_dir(self, path_key): '''Make a directory for an entry. Parameters ---------- path_key : string Reference to the entry. ''' dir_path = self.get_path(path_key) Manifest.safe_mkdir(dir_path)
[docs] def check_dir(self, path_key, do_exit=False): '''Verify a directories existence or optionally exit. Parameters ---------- path_key : string Reference to the entry. do_exit : boolean What to do if the directory is not present. ''' dir_path = self.get_path(path_key) if not os.path.exists(dir_path): Manifest.log.fatal('Directory %s does not exist; exiting.' % (dir_path)) if do_exit is True: quit()
[docs] def resolve_paths(self, description_dict, suffix='_key'): '''Walk input items and expand those that refer to a manifest entry. Parameters ---------- description_dict : dict Any entries with key names ending in suffix will be expanded. suffix : string Indicates the entries to be expanded. ''' key_pattern = re.compile('(.*)%s$' % (suffix)) for description_key, manifest_key in description_dict.items(): m = key_pattern.match(description_key) if m: real_key = m.group(1) # i.e. job_dir_key -> job_dir filename = self.get_path(manifest_key) description_dict[real_key] = filename del description_dict[description_key]
[docs] def as_dataframe(self): return pd.DataFrame.from_dict(self.path_info, orient='index')