Source code for allensdk.core.h5_utilities

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#

import functools
import six

import h5py


[docs]def decode_bytes(bytes_dataset, encoding='UTF-8'): ''' Convert the elements of a dataset of bytes to str ''' return [ item.decode(encoding) for item in bytes_dataset[:].flat ]
[docs]def load_datasets_by_relnames(relnames, h5_file, start_node): ''' A convenience function for finding and loading into memory one or more datasets from an h5 file ''' matcher_cbs = { relname: functools.partial(h5_object_matcher_relname_in, [relname]) for relname in relnames } matches = keyed_locate_h5_objects(matcher_cbs, h5_file, start_node=start_node) return { key: value[:] for key, value in six.iteritems(matches) }
[docs]def h5_object_matcher_relname_in(relnames, h5_object_name, h5_object): ''' Asks if an h5 object's relative name (the final section of its absolute name) is contained within a provided array Parameters ---------- relnames : array-like Relative names against which to match h5_object_name : str Full name (path from origin) of h5 object h5_object : h5py.Group, h5py.Dataset Check this object's relative name Returns ------- bool : whether the match succeeded h5_object : h5py.group, h5py.Dataset the argued object ''' return h5_object_name.split('/')[-1] in relnames, h5_object
[docs]def keyed_locate_h5_objects(matcher_cbs, h5_file, start_node=None): ''' Traverse an h5 file and build up a dictionary mapping supplied keys to located objects ''' matches = {} def matcher(obj_name, obj): for key, matcher_cb in six.iteritems(matcher_cbs): match, _ = matcher_cb(obj_name, obj) if match: matches[key] = obj traverse_h5_file(matcher, h5_file, start_node) return matches
[docs]def locate_h5_objects(matcher_cb, h5_file, start_node=None): ''' Traverse an h5 file and return objects matching supplied criteria ''' matches = [] def matcher(h5_object_name, h5_object): match, _ = matcher_cb(h5_object_name, h5_object) if match: matches.append(h5_object) traverse_h5_file(matcher, h5_file, start_node) return matches
[docs]def traverse_h5_file(callback, h5_file, start_node=None): ''' Traverse an h5 file and apply a callback to each node ''' if start_node is None: start_node = h5_file['/'] elif isinstance(start_node, str): start_node = h5_file[start_node] start_node.visititems(callback)