import numpy as np
import h5py
[docs]def open_view_on_binary(file_like, dtype=np.uint8, mode="r", offset=0,
shape=None, order="C", strides=None):
'''Open a view into a memory-mapped binary file.
Parameters
----------
file_like : {string, file object}
File to open.
dtype : numpy.dtype
Numpy dtype to open the memory-mapped array as.
mode : string
Mode to open the file in.
offset : integer
Offset (in bytes) into the file at which to start the memory
map.
shape : {tuple, list}
Shape of the array.
order : {"C", "F"}
C or Fortran ordering.
strides : {tuple, list}
Strides along each axis for reading the array.
Returns
-------
numpy.memmap
Strided view into memory-mapped array.
'''
mapped = np.memmap(file_like, dtype, mode, offset, order=order)
return np.lib.stride_tricks.as_strided(mapped, shape=shape,
strides=strides)
[docs]def read_strided(filename, dtype, offset, shape, strides):
'''Load a frame without memory-mapping.'''
frame_size = np.dtype(dtype).itemsize
arr = np.empty(shape, dtype=dtype)
frame_size = arr.dtype.itemsize*np.product(shape[1:])
step = strides[0] - frame_size
with open(filename, "rb") as f:
f.seek(offset)
for i in range(shape[0]):
frame = np.frombuffer(f.read(frame_size), dtype=dtype)
arr[i] = frame.reshape(shape[1:])
f.seek(step, 1)
return arr
[docs]def load_frame(raw_filename, json_meta, use_memmap=False):
'''Load a frame of a multi-frame raw file.'''
if use_memmap:
arr = open_view_on_binary(raw_filename, dtype=json_meta["dtype"],
offset=json_meta["byte_offset"],
shape=json_meta["shape"],
strides=json_meta["strides"])
else:
arr = read_strided(raw_filename, dtype=json_meta["dtype"],
offset=json_meta["byte_offset"],
shape=json_meta["shape"],
strides=json_meta["strides"])
return arr
[docs]def export_frame_to_hdf5(raw_filename, data_hdf5_filename,
auxiliary_hdf5_filename, frame_meta,
compression="gzip", compression_opts=9):
'''Export a frame from raw to hdf5.
Data with the channel_description `data` is stored in the
data_hdf5_filename, while any other data is stored in the
auxiliary_hdf5_filename
'''
data_created = False
aux_created = False
for json_meta in frame_meta:
# This is dirty, we should expand the metadata handoff to allow
# real specification of ophys data versus auxiliary data
mode = "a"
if json_meta["channel_description"] == "data":
filename = data_hdf5_filename
if not data_created:
data_created = True
mode = "w"
else:
filename = auxiliary_hdf5_filename
if not aux_created:
aux_created = True
mode = "w"
data = load_frame(raw_filename, json_meta)
chunks = (1, json_meta["shape"][1], json_meta["shape"][2])
with h5py.File(filename, mode) as f:
f.create_dataset(json_meta["channel_description"], data=data,
chunks=chunks, compression=compression,
compression_opts=compression_opts)