ihkapy.sm_features

View Source

  1import numpy as np
  2from scipy.signal import coherence
  3from itertools import combinations as combin
  4import warnings # temporary
  5
  6
  7"""
  8Each feature is a function that takes an array-like 3d segment 
  9The segemnt has shape (n_raw_chan,n_samples,n_freq_chan)
 10The feature functions return a dictionary of features
 11    key   : str   = name of sub-feature
 12    value : float = value of the feature evaluated
 13The keys of this returned dictionary are to be used a column names for
 14the data-frame in which we save our features.
 15
 16Example 1. Mean Power raw. 
 17Takes a segment of data which has (for example) 4 raw channels and 41 
 18freq channels with 10_000 samples.
 19For each raw channel
 20    Compute the mean power 
 21    Save this to dictionary under key=f"mean_power_ch{n_raw_chan}"
 22Return the dictionary of mean powers
 23
 24Example 2. Coherence feature.
 25Takes a segment of data with four raw channels and 41 freq channels
 26Discard all the freq channels that are convolutions, only keep raw recording
 27For each pair of raw channels
 28    Compute the coherence
 29    Save selected frequencies to dictionary 
 30Return the dictionary of raw-chan pairwise coherence
 31
 32
 33
 34Naming Convention:
 35    ch_raw refers to the raw electrode channel index
 36    ch_freq refers to the binary (wavelet freq) file channel index
 37
 38
 39To add a new feature:
 40    - Write a function that takes a segment and returns a dictionary of features
 41    - Add a conditional statement in get_feats() that calls that function
 42        and adds it to the returned dictionary.
 43    - Append the name of that feature to the FEATURES list in Options.toml
 44"""
 45
 46### Formatting helpers
 47
 48def _select_freq_chans(segment,chan_freq_idxs) -> (np.ndarray,np.ndarray):
 49    """Formatting helper for all features. 
 50
 51    Assumes segment array-like w/ shape (n_chan_raw,n_samples,n_chan_freq)
 52
 53    Selects frequency (wavelet binary) channels of interest and formats 
 54    chan_freq_idxs as a 1d numpy ndarray.
 55    """
 56    # If None, select all frequency channels
 57    segment = np.asarray(segment)
 58    if chan_freq_idxs is None or chan_freq_idxs == "all": 
 59        chan_freq_idxs = np.arange(segment.shape[2])
 60    elif type(chan_freq_idxs) == type(1): # int
 61        chan_freq_idxs = np.asarray([chan_freq_idxs])
 62        segment = segment[:,:,chan_freq_idxs] 
 63    else:
 64        chan_freq_idxs = np.asarray(chan_freq_idxs)
 65        segment = segment[:,:,chan_freq_idxs] 
 66    return segment,chan_freq_idxs
 67
 68def _feats_as_dict_from_2d_array(
 69        feat_name : str,
 70        feats_all : np.ndarray,
 71        chan_freq_idxs : np.ndarray
 72        ):
 73    """Helper for single-channel features.
 74
 75    Turns a 2d feats array* with dims = (n_raw_chans , n_freq_chans)
 76    into our standard dictionary format that can easily be added to a
 77    pandas dataframe.
 78
 79    *Each element is assumed to be a feature (float) corresponding it's 
 80    index's raw channel and frequency index.
 81    """
 82    feats_dict = {}
 83    for ch_raw,feats_ch_raw in enumerate(feats_all):
 84        for chb,feat in zip(chan_freq_idxs,feats_ch_raw):
 85            feats_dict[f"{feat_name}_chraw_{str(ch_raw).zfill(3)}_chfreq_{str(chb).zfill(3)}"] = feat
 86    return feats_dict
 87
 88
 89### Begin: Features on individual channels
 90
 91def mean_power(
 92        segment : list or np.ndarray,
 93        chan_freq_idxs : np.ndarray or int = None
 94        ) -> dict:
 95    """Get mean power across each channel.
 96
 97    Parameters
 98    ----------
 99    `segment : array-like`
100        A list of segment samples. Dims = (n_raw , n_samples , n_freq_chan)
101
102    `chan_freq_idxs : ndarray or int = None`
103        The 1d list of binary file indices we are interested in, e.g.
104        if we're interested in all wavelet channels (aka binary file 
105        channels) pass None. If we're interested in the first two we 
106        can pass [0,1] or np.array([0,1]). If we're only interested in
107        the raw data we can simply pass an int, 0 in this case. 
108
109    Returns
110    -------
111    `dict`
112        A dictionary where the key,value pairs are `name of feature`
113        and `computed feature float`. All keys are strings, all values
114        are floats.
115    """
116    # Select binary (aka wavelet/freq) file channels
117    segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs)
118    # Compute the features along samples axis
119    mp_all = np.mean(segment,axis=1)
120    # Save and return, formatted as dict
121    mp_feats = _feats_as_dict_from_2d_array("mean_power",mp_all,chan_freq_idxs)
122    return mp_feats
123
124
125def var(
126        segment         : list or np.ndarray,
127        chan_freq_idxs  : np.ndarray or int = None
128        ) -> dict:
129    """Get the variance of the signal"""
130    segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs)
131    var_all = np.var(segment,axis=1) # all channels
132    var_feats = _feats_as_dict_from_2d_array("var",var_all,chan_freq_idxs)
133    return var_feats
134
135### End: Features on individual channels
136
137
138### Begin: Features comparing two channels
139
140# coherence is always only computed for a single index
141def coherence_all_pairs(
142        segment         : list or np.ndarray,
143        fs              : float or int # = 2000
144        ) -> dict:
145    """Cohere each pair of signals"""
146    # NB: The chan_freq_idxs parameter selects all freqs of relevance
147    # TODO: chan_freq_idxs is hard coded... change this to whatever is 
148    # specified in toml gets fed to this function 
149    chan_freq_idxs = 0 # only select raw channel
150    segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs)
151    segment = np.squeeze(segment) # 3d array -> 2d array
152    raw_chans = list(range(len(segment)))    
153    coherence_dict = {}
154    for (chx,chy),(x,y) in zip(combin(raw_chans,2),combin(segment,2)):
155        # There parameters defined here are similar to the default 
156        # MatLab options
157        sample_freqs,cxy = coherence(x,y,fs=fs,window='hann',
158                nperseg=256,noverlap=None,nfft=None,
159                detrend='constant',axis=0)
160        # Select a subset of the 129 freqs: 129 = 256 // 2 + 1
161        idxs = np.array([0,2,4,8,16,32,64,-1]) # A logarithmic subset of freqs
162        sample_freqs,cxy = sample_freqs[idxs] , cxy[idxs]
163        
164        # By default noverlap=None defaults it to half the segment length
165        # NB: FFT and therefore scipy.signal.coherence samples 
166        # frequencies linearly, not logarithmically
167        
168        # TODO: Do we want to implement something closer to the MatLab 
169        # or is this good enough?
170        # Scipy does not provide an easy way of computing the coherence
171        # at specific frequencies as mscohere does in MatLab. As a 
172        # temporary measure I'm returning a logarithmic subset of the 
173        # freqs computed by scipy.signal.coherence().
174        # We could potentially reimplement mscohere by hand (in Python) 
175        # so that it can take a list of frequencies as input.
176
177        # Serialize the freqs
178        for sf,cohxy in zip(sample_freqs,cxy):
179            feat_name = f"coherence_freq_{round(sf,2)}_chx_{str(chx).zfill(3)}_chy_{str(chy).zfill(3)}"
180            coherence_dict[feat_name] = cohxy 
181
182    return coherence_dict
183
184
185### End: Features comparing two channels
186
187
188# Compute all the features on a segments
189def get_feats(
190        segment : np.ndarray or list,
191        features_list : list,
192        data_ops : dict
193        ) -> dict:
194    """Computes all features specified in featurelist.
195
196    Computes each feature specified in featurelist on all of the
197    segment, then assembles output into a features_dict—a dictionary
198    of all features computed, this corresponds to a single row to be 
199    appended to our features dataframe.
200
201    To write a new feature, write the method in this module 
202    (`sm_features.py`) then add it to execut conditionally in this 
203    function (`get_feats`), finally add a string representation to the 
204    FEATURES in your `Options.toml` config file.
205
206    Parameters
207    ----------
208    `segment : np.ndarray or list`
209        Is a list of segment samples ordered by the raw_chan index.
210        Shape = (n_raw_chan , n_samples , n_wavelet_chan)
211
212    `features_list : list`
213        A list of strings: the names of the features you'd like to compute.
214
215    `data_ops : dict`
216        Dictionary containing metadata, as defined in Options.toml.
217
218    Returns
219    -------
220    `dict`
221        A dictionary of all the features we computed.
222    """
223    IMPLEMENTED_FEATS = ["mean_power","var","coherence"]
224    for feat in features_list:
225        if feat not in IMPLEMENTED_FEATS:
226            warnings.warn(f"{feat} has not yet been implemented.")
227    all_feats = {}
228    if "mean_power" in features_list:
229        AMP_FREQ_IDX_ALL = data_ops["AMP_FREQ_IDX_ALL"]
230        mp_feats = mean_power(segment,AMP_FREQ_IDX_ALL) 
231        all_feats.update(mp_feats) # add it to greater dictionary
232    if "var" in features_list:
233        var_feats = var(segment,"all")
234        all_feats.update(var_feats)
235    if "coherence" in features_list:
236        FS = data_ops["FS"]
237        coherence_feats = coherence_all_pairs(
238                segment,
239                fs = FS
240                )
241        all_feats.update(coherence_feats)
242    # print()
243    # for i,j in all_feats.items(): print(f"{i}:{j}") # debug
244    return all_feats
245
246
247
248if __name__ == "__main__":
249    # Unit tests
250    print("Runnin unit tests")
251    ### HELPERS
252
253    # TEST _select_freq_chans() 
254    segment = [np.random.normal(0,1,(10000,41)) for i in range(4)]
255    chan_freq_idxs = [0,1,2,3,4,10,12]
256    segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs)
257    try:
258        assert segment.shape == (4,10000,7)
259        print("Passed Test #1 _select_freq_chans()")
260    except:
261        print("Failed Test #1 _select_freq_chans()")
262    segment = [np.random.normal(0,1,(10000,41)) for i in range(4)]
263    chan_freq_idxs = "all"
264    segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs)
265    try:
266        assert segment.shape == (4,10000,41)
267        assert (chan_freq_idxs == np.arange(41)).all()
268        print("Passed Test #2 _select_freq_chans()")
269    except:
270        print("Failed Test #2 _select_freq_chans()")
271
272    # TEST _feats_as_dict_from_2d_array()
273    n_chan_raw, n_chan_freq = 4,3
274    feats_array = np.random.normal(0,1,(4,3))
275    feats_dict = _feats_as_dict_from_2d_array(
276        "dummy",
277        feats_all = feats_array,
278        chan_freq_idxs = np.array([1,5,9])
279        )
280    print("\nTest _feats_as_dict_from_2d_array() by human inspection.")
281    print("Does this output look good?:")
282    for i,j in feats_dict.items():
283        print(f"{i} : {j}")
284    print()
285
286    ### FEATURES
287    n_ch_raw = 4
288    n_ch_freq = 3
289    n_samples = 10000
290    segment_1 = [np.ones((n_samples,n_ch_freq)) for i in range(n_ch_raw)]
291    segment_0 = [np.zeros((n_samples,n_ch_freq)) for i in range(n_ch_raw)]
292    segment_r = [np.random.normal(0,1,(n_samples,n_ch_freq)) for i in range(n_ch_raw)]
293    # TEST mean_power()
294    print("\nTest mean_power() by human inspection.")
295    print("Does this output look good?")
296    mp_dict = mean_power(segment_1,"all") 
297    for i,j in mp_dict.items():
298        print(f"{i} : {j}")
299
300    # TEST var()
301    
302
303    # TEST coherence_all_pairs()

def mean_power(segment: list, chan_freq_idxs: numpy.ndarray = None) -> dict: View Source

 92def mean_power(
 93        segment : list or np.ndarray,
 94        chan_freq_idxs : np.ndarray or int = None
 95        ) -> dict:
 96    """Get mean power across each channel.
 97
 98    Parameters
 99    ----------
100    `segment : array-like`
101        A list of segment samples. Dims = (n_raw , n_samples , n_freq_chan)
102
103    `chan_freq_idxs : ndarray or int = None`
104        The 1d list of binary file indices we are interested in, e.g.
105        if we're interested in all wavelet channels (aka binary file 
106        channels) pass None. If we're interested in the first two we 
107        can pass [0,1] or np.array([0,1]). If we're only interested in
108        the raw data we can simply pass an int, 0 in this case. 
109
110    Returns
111    -------
112    `dict`
113        A dictionary where the key,value pairs are `name of feature`
114        and `computed feature float`. All keys are strings, all values
115        are floats.
116    """
117    # Select binary (aka wavelet/freq) file channels
118    segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs)
119    # Compute the features along samples axis
120    mp_all = np.mean(segment,axis=1)
121    # Save and return, formatted as dict
122    mp_feats = _feats_as_dict_from_2d_array("mean_power",mp_all,chan_freq_idxs)
123    return mp_feats

Get mean power across each channel.

Parameters

segment : array-like A list of segment samples. Dims = (n_raw , n_samples , n_freq_chan)

chan_freq_idxs : ndarray or int = None The 1d list of binary file indices we are interested in, e.g. if we're interested in all wavelet channels (aka binary file channels) pass None. If we're interested in the first two we can pass [0,1] or np.array([0,1]). If we're only interested in the raw data we can simply pass an int, 0 in this case.

Returns

dict A dictionary where the key,value pairs are name of feature and computed feature float. All keys are strings, all values are floats.

def var(segment: list, chan_freq_idxs: numpy.ndarray = None) -> dict: View Source

126def var(
127        segment         : list or np.ndarray,
128        chan_freq_idxs  : np.ndarray or int = None
129        ) -> dict:
130    """Get the variance of the signal"""
131    segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs)
132    var_all = np.var(segment,axis=1) # all channels
133    var_feats = _feats_as_dict_from_2d_array("var",var_all,chan_freq_idxs)
134    return var_feats

Get the variance of the signal

def coherence_all_pairs(segment: list, fs: float) -> dict: View Source

142def coherence_all_pairs(
143        segment         : list or np.ndarray,
144        fs              : float or int # = 2000
145        ) -> dict:
146    """Cohere each pair of signals"""
147    # NB: The chan_freq_idxs parameter selects all freqs of relevance
148    # TODO: chan_freq_idxs is hard coded... change this to whatever is 
149    # specified in toml gets fed to this function 
150    chan_freq_idxs = 0 # only select raw channel
151    segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs)
152    segment = np.squeeze(segment) # 3d array -> 2d array
153    raw_chans = list(range(len(segment)))    
154    coherence_dict = {}
155    for (chx,chy),(x,y) in zip(combin(raw_chans,2),combin(segment,2)):
156        # There parameters defined here are similar to the default 
157        # MatLab options
158        sample_freqs,cxy = coherence(x,y,fs=fs,window='hann',
159                nperseg=256,noverlap=None,nfft=None,
160                detrend='constant',axis=0)
161        # Select a subset of the 129 freqs: 129 = 256 // 2 + 1
162        idxs = np.array([0,2,4,8,16,32,64,-1]) # A logarithmic subset of freqs
163        sample_freqs,cxy = sample_freqs[idxs] , cxy[idxs]
164        
165        # By default noverlap=None defaults it to half the segment length
166        # NB: FFT and therefore scipy.signal.coherence samples 
167        # frequencies linearly, not logarithmically
168        
169        # TODO: Do we want to implement something closer to the MatLab 
170        # or is this good enough?
171        # Scipy does not provide an easy way of computing the coherence
172        # at specific frequencies as mscohere does in MatLab. As a 
173        # temporary measure I'm returning a logarithmic subset of the 
174        # freqs computed by scipy.signal.coherence().
175        # We could potentially reimplement mscohere by hand (in Python) 
176        # so that it can take a list of frequencies as input.
177
178        # Serialize the freqs
179        for sf,cohxy in zip(sample_freqs,cxy):
180            feat_name = f"coherence_freq_{round(sf,2)}_chx_{str(chx).zfill(3)}_chy_{str(chy).zfill(3)}"
181            coherence_dict[feat_name] = cohxy 
182
183    return coherence_dict

Cohere each pair of signals

def get_feats(segment: numpy.ndarray, features_list: list, data_ops: dict) -> dict: View Source

190def get_feats(
191        segment : np.ndarray or list,
192        features_list : list,
193        data_ops : dict
194        ) -> dict:
195    """Computes all features specified in featurelist.
196
197    Computes each feature specified in featurelist on all of the
198    segment, then assembles output into a features_dict—a dictionary
199    of all features computed, this corresponds to a single row to be 
200    appended to our features dataframe.
201
202    To write a new feature, write the method in this module 
203    (`sm_features.py`) then add it to execut conditionally in this 
204    function (`get_feats`), finally add a string representation to the 
205    FEATURES in your `Options.toml` config file.
206
207    Parameters
208    ----------
209    `segment : np.ndarray or list`
210        Is a list of segment samples ordered by the raw_chan index.
211        Shape = (n_raw_chan , n_samples , n_wavelet_chan)
212
213    `features_list : list`
214        A list of strings: the names of the features you'd like to compute.
215
216    `data_ops : dict`
217        Dictionary containing metadata, as defined in Options.toml.
218
219    Returns
220    -------
221    `dict`
222        A dictionary of all the features we computed.
223    """
224    IMPLEMENTED_FEATS = ["mean_power","var","coherence"]
225    for feat in features_list:
226        if feat not in IMPLEMENTED_FEATS:
227            warnings.warn(f"{feat} has not yet been implemented.")
228    all_feats = {}
229    if "mean_power" in features_list:
230        AMP_FREQ_IDX_ALL = data_ops["AMP_FREQ_IDX_ALL"]
231        mp_feats = mean_power(segment,AMP_FREQ_IDX_ALL) 
232        all_feats.update(mp_feats) # add it to greater dictionary
233    if "var" in features_list:
234        var_feats = var(segment,"all")
235        all_feats.update(var_feats)
236    if "coherence" in features_list:
237        FS = data_ops["FS"]
238        coherence_feats = coherence_all_pairs(
239                segment,
240                fs = FS
241                )
242        all_feats.update(coherence_feats)
243    # print()
244    # for i,j in all_feats.items(): print(f"{i}:{j}") # debug
245    return all_feats

Computes all features specified in featurelist.

Computes each feature specified in featurelist on all of the segment, then assembles output into a features_dict—a dictionary of all features computed, this corresponds to a single row to be appended to our features dataframe.

To write a new feature, write the method in this module (sm_features.py) then add it to execut conditionally in this function (get_feats), finally add a string representation to the FEATURES in your Options.toml config file.

Parameters

segment : np.ndarray or list Is a list of segment samples ordered by the raw_chan index. Shape = (n_raw_chan , n_samples , n_wavelet_chan)

features_list : list A list of strings: the names of the features you'd like to compute.

data_ops : dict Dictionary containing metadata, as defined in Options.toml.

Returns

dict A dictionary of all the features we computed.