import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache


# Example cache directory path, it determines where downloaded data will be stored
data_directory = '/local1/ecephys_cache_dir/'

manifest_path = os.path.join(data_directory, "manifest.json")

cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)

units = cache.get_units()


len(units)

40010


units = cache.get_units(amplitude_cutoff_maximum = np.inf,
                        presence_ratio_minimum = -np.inf,
                        isi_violations_maximum = np.inf)

len(units)

99180


from scipy.ndimage.filters import gaussian_filter1d
plt.rcParams.update({'font.size': 14})

def plot_metric(data, bins, x_axis_label, color, max_value=-1):
    
    h, b = np.histogram(data, bins=bins, density=True)

    x = b[:-1]
    y = gaussian_filter1d(h, 1)

    plt.plot(x, y, color=color)
    plt.xlabel(x_axis_label)
    plt.gca().get_yaxis().set_visible(False)
    [plt.gca().spines[loc].set_visible(False) for loc in ['right', 'top', 'left']]
    if max_value < np.max(y) * 1.1:
        max_value = np.max(y) * 1.1
    plt.ylim([0, max_value])
    
    return max_value


data = units['firing_rate']
bins = np.linspace(0,50,100)

max_value = plot_metric(data, bins, 'Firing rate (Hz)', 'red')


data = np.log10(units['firing_rate'])
bins = np.linspace(-3,2,100)

max_value = plot_metric(data, bins, 'log$_{10}$ firing rate (Hz)', 'red')


data = np.log10(units[units.nn_hit_rate > 0.9]['firing_rate'])
bins = np.linspace(-3,2,100)

max_value = plot_metric(data, bins, 'log$_{10}$ firing rate (Hz)', 'red')


region_dict = {'cortex' : ['VISp', 'VISl', 'VISrl', 'VISam', 'VISpm', 'VIS', 'VISal','VISmma','VISmmp','VISli'],
             'thalamus' : ['LGd','LD', 'LP', 'VPM', 'TH', 'MGm','MGv','MGd','PO','LGv','VL',
              'VPL','POL','Eth','PoT','PP','PIL','IntG','IGL','SGN','VPL','PF','RT'],
             'hippocampus' : ['CA1', 'CA2','CA3', 'DG', 'SUB', 'POST','PRE','ProS','HPF'],
             'midbrain': ['MB','SCig','SCiw','SCsg','SCzo','PPT','APN','NOT','MRN','OP','LT','RPF','CP']}

color_dict = {'cortex' : '#08858C',
              'thalamus' : '#FC6B6F',
              'hippocampus' : '#7ED04B',
              'midbrain' : '#FC9DFE'}

bins = np.linspace(-3,2,100)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = np.log10(units[units.ecephys_structure_acronym.isin(region_dict[region])]['firing_rate'])
    
    max_value = plot_metric(data, bins, 'log$_{10}$ firing rate (Hz)', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())


bins = np.linspace(0,1,100)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = units[units.ecephys_structure_acronym.isin(region_dict[region])]['presence_ratio']
    
    max_value = plot_metric(data, bins, 'Presence ratio', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())

plt.plot([0.9, 0.9],[0,max_value], ':')

[<matplotlib.lines.Line2D at 0x7f15c17b2550>]


np.around(np.sum(units.presence_ratio > 0.9) / len(units), 2)

0.83


bins = np.linspace(0,0.5,200)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = units[units.ecephys_structure_acronym.isin(region_dict[region])]['amplitude_cutoff']
    
    max_value = plot_metric(data, bins, 'Amplitude cutoff', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())

plt.plot([0.1, 0.1],[0,max_value], ':')

[<matplotlib.lines.Line2D at 0x7f15c1753850>]


np.around(np.sum(units.amplitude_cutoff == 0.5) / len(units), 2)

0.15


bins = np.linspace(0,10,200)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = units[units.ecephys_structure_acronym.isin(region_dict[region])]['isi_violations']
    
    max_value = plot_metric(data, bins, 'ISI violations', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())

plt.plot([0.5, 0.5],[0,max_value], ':')

[<matplotlib.lines.Line2D at 0x7f15eb6c0e50>]


bins = np.linspace(-6,2.5,100)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = np.log10(units[units.ecephys_structure_acronym.isin(region_dict[region])]['isi_violations'] + 1e-5) 
    
    max_value = plot_metric(data, bins, '$log_{10}$ ISI violations', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())

plt.plot([np.log10(0.5), np.log10(0.5)],[0,max_value], ':')

[<matplotlib.lines.Line2D at 0x7f15c18ffc90>]


np.around(np.sum(units.isi_violations == 0.0) / len(units), 2)

0.14


bins = np.linspace(0,10,100)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = units[units.ecephys_structure_acronym.isin(region_dict[region])]['snr']
    
    max_value = plot_metric(data, bins, 'SNR', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())


bins = np.linspace(0,170,50)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = units[units.ecephys_structure_acronym.isin(region_dict[region])]['isolation_distance']
    
    max_value = plot_metric(data, bins, 'Isolation distance', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())


bins = np.linspace(0,15,50)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = units[units.ecephys_structure_acronym.isin(region_dict[region])]['d_prime']
    
    max_value = plot_metric(data, bins, 'd-prime', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())


bins = np.linspace(0,1,100)
max_value = -np.inf

for idx, region in enumerate(region_dict.keys()):
    
    data = units[units.ecephys_structure_acronym.isin(region_dict[region])]['nn_hit_rate']
    
    max_value = plot_metric(data, bins, 'Nearest-neighbors hit rate', color_dict[region], max_value)
    
_ = plt.legend(region_dict.keys())


metrics = ['firing_rate', 
           'presence_ratio', 
           'amplitude_cutoff', 
           'isi_violations', 
           'snr', 
           'isolation_distance', 
           'd_prime', 
           'nn_hit_rate']

ranges = [[0,20],
          [0.9,0.995],
          [0,0.5],
          [0,2],
          [0,8],
          [0,125],
          [0,10],
          [0,1]]

_ = plt.figure(figsize=(5,10))

for idx, metric in enumerate(metrics):
    
    data = units[metric].values
    data = data[np.invert(np.isnan(data))]

    _ = plt.subplot(len(metrics),1,idx+1)
    _ = plt.boxplot(data, showfliers=False, showcaps=False, vert=False)
    _ = plt.ylim([0.8,1.2])
    _ = plt.xlim(ranges[idx])
    _ = plt.yticks([])
    
    plt.title(metric)
    
plt.tight_layout()

Unit Quality Metrics¶

Tutorial overview¶

Why do we need quality metrics?¶

How these metrics were calculated¶

Accessing the metrics¶

Firing rate¶

Presence ratio¶

Amplitude cutoff¶

ISI violations¶

SNR¶

Isolation distance¶

d-prime¶

Nearest-neighbors hit rate¶

Summary¶