ihkapy.sm_features
1import numpy as np 2from scipy.signal import coherence 3from itertools import combinations as combin 4import warnings # temporary 5 6 7""" 8Each feature is a function that takes an array-like 3d segment 9The segemnt has shape (n_raw_chan,n_samples,n_freq_chan) 10The feature functions return a dictionary of features 11 key : str = name of sub-feature 12 value : float = value of the feature evaluated 13The keys of this returned dictionary are to be used a column names for 14the data-frame in which we save our features. 15 16Example 1. Mean Power raw. 17Takes a segment of data which has (for example) 4 raw channels and 41 18freq channels with 10_000 samples. 19For each raw channel 20 Compute the mean power 21 Save this to dictionary under key=f"mean_power_ch{n_raw_chan}" 22Return the dictionary of mean powers 23 24Example 2. Coherence feature. 25Takes a segment of data with four raw channels and 41 freq channels 26Discard all the freq channels that are convolutions, only keep raw recording 27For each pair of raw channels 28 Compute the coherence 29 Save selected frequencies to dictionary 30Return the dictionary of raw-chan pairwise coherence 31 32 33 34Naming Convention: 35 ch_raw refers to the raw electrode channel index 36 ch_freq refers to the binary (wavelet freq) file channel index 37 38 39To add a new feature: 40 - Write a function that takes a segment and returns a dictionary of features 41 - Add a conditional statement in get_feats() that calls that function 42 and adds it to the returned dictionary. 43 - Append the name of that feature to the FEATURES list in Options.toml 44""" 45 46### Formatting helpers 47 48def _select_freq_chans(segment,chan_freq_idxs) -> (np.ndarray,np.ndarray): 49 """Formatting helper for all features. 50 51 Assumes segment array-like w/ shape (n_chan_raw,n_samples,n_chan_freq) 52 53 Selects frequency (wavelet binary) channels of interest and formats 54 chan_freq_idxs as a 1d numpy ndarray. 55 """ 56 # If None, select all frequency channels 57 segment = np.asarray(segment) 58 if chan_freq_idxs is None or chan_freq_idxs == "all": 59 chan_freq_idxs = np.arange(segment.shape[2]) 60 elif type(chan_freq_idxs) == type(1): # int 61 chan_freq_idxs = np.asarray([chan_freq_idxs]) 62 segment = segment[:,:,chan_freq_idxs] 63 else: 64 chan_freq_idxs = np.asarray(chan_freq_idxs) 65 segment = segment[:,:,chan_freq_idxs] 66 return segment,chan_freq_idxs 67 68def _feats_as_dict_from_2d_array( 69 feat_name : str, 70 feats_all : np.ndarray, 71 chan_freq_idxs : np.ndarray 72 ): 73 """Helper for single-channel features. 74 75 Turns a 2d feats array* with dims = (n_raw_chans , n_freq_chans) 76 into our standard dictionary format that can easily be added to a 77 pandas dataframe. 78 79 *Each element is assumed to be a feature (float) corresponding it's 80 index's raw channel and frequency index. 81 """ 82 feats_dict = {} 83 for ch_raw,feats_ch_raw in enumerate(feats_all): 84 for chb,feat in zip(chan_freq_idxs,feats_ch_raw): 85 feats_dict[f"{feat_name}_chraw_{str(ch_raw).zfill(3)}_chfreq_{str(chb).zfill(3)}"] = feat 86 return feats_dict 87 88 89### Begin: Features on individual channels 90 91def mean_power( 92 segment : list or np.ndarray, 93 chan_freq_idxs : np.ndarray or int = None 94 ) -> dict: 95 """Get mean power across each channel. 96 97 Parameters 98 ---------- 99 `segment : array-like` 100 A list of segment samples. Dims = (n_raw , n_samples , n_freq_chan) 101 102 `chan_freq_idxs : ndarray or int = None` 103 The 1d list of binary file indices we are interested in, e.g. 104 if we're interested in all wavelet channels (aka binary file 105 channels) pass None. If we're interested in the first two we 106 can pass [0,1] or np.array([0,1]). If we're only interested in 107 the raw data we can simply pass an int, 0 in this case. 108 109 Returns 110 ------- 111 `dict` 112 A dictionary where the key,value pairs are `name of feature` 113 and `computed feature float`. All keys are strings, all values 114 are floats. 115 """ 116 # Select binary (aka wavelet/freq) file channels 117 segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs) 118 # Compute the features along samples axis 119 mp_all = np.mean(segment,axis=1) 120 # Save and return, formatted as dict 121 mp_feats = _feats_as_dict_from_2d_array("mean_power",mp_all,chan_freq_idxs) 122 return mp_feats 123 124 125def var( 126 segment : list or np.ndarray, 127 chan_freq_idxs : np.ndarray or int = None 128 ) -> dict: 129 """Get the variance of the signal""" 130 segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs) 131 var_all = np.var(segment,axis=1) # all channels 132 var_feats = _feats_as_dict_from_2d_array("var",var_all,chan_freq_idxs) 133 return var_feats 134 135### End: Features on individual channels 136 137 138### Begin: Features comparing two channels 139 140# coherence is always only computed for a single index 141def coherence_all_pairs( 142 segment : list or np.ndarray, 143 fs : float or int # = 2000 144 ) -> dict: 145 """Cohere each pair of signals""" 146 # NB: The chan_freq_idxs parameter selects all freqs of relevance 147 # TODO: chan_freq_idxs is hard coded... change this to whatever is 148 # specified in toml gets fed to this function 149 chan_freq_idxs = 0 # only select raw channel 150 segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs) 151 segment = np.squeeze(segment) # 3d array -> 2d array 152 raw_chans = list(range(len(segment))) 153 coherence_dict = {} 154 for (chx,chy),(x,y) in zip(combin(raw_chans,2),combin(segment,2)): 155 # There parameters defined here are similar to the default 156 # MatLab options 157 sample_freqs,cxy = coherence(x,y,fs=fs,window='hann', 158 nperseg=256,noverlap=None,nfft=None, 159 detrend='constant',axis=0) 160 # Select a subset of the 129 freqs: 129 = 256 // 2 + 1 161 idxs = np.array([0,2,4,8,16,32,64,-1]) # A logarithmic subset of freqs 162 sample_freqs,cxy = sample_freqs[idxs] , cxy[idxs] 163 164 # By default noverlap=None defaults it to half the segment length 165 # NB: FFT and therefore scipy.signal.coherence samples 166 # frequencies linearly, not logarithmically 167 168 # TODO: Do we want to implement something closer to the MatLab 169 # or is this good enough? 170 # Scipy does not provide an easy way of computing the coherence 171 # at specific frequencies as mscohere does in MatLab. As a 172 # temporary measure I'm returning a logarithmic subset of the 173 # freqs computed by scipy.signal.coherence(). 174 # We could potentially reimplement mscohere by hand (in Python) 175 # so that it can take a list of frequencies as input. 176 177 # Serialize the freqs 178 for sf,cohxy in zip(sample_freqs,cxy): 179 feat_name = f"coherence_freq_{round(sf,2)}_chx_{str(chx).zfill(3)}_chy_{str(chy).zfill(3)}" 180 coherence_dict[feat_name] = cohxy 181 182 return coherence_dict 183 184 185### End: Features comparing two channels 186 187 188# Compute all the features on a segments 189def get_feats( 190 segment : np.ndarray or list, 191 features_list : list, 192 data_ops : dict 193 ) -> dict: 194 """Computes all features specified in featurelist. 195 196 Computes each feature specified in featurelist on all of the 197 segment, then assembles output into a features_dict—a dictionary 198 of all features computed, this corresponds to a single row to be 199 appended to our features dataframe. 200 201 To write a new feature, write the method in this module 202 (`sm_features.py`) then add it to execut conditionally in this 203 function (`get_feats`), finally add a string representation to the 204 FEATURES in your `Options.toml` config file. 205 206 Parameters 207 ---------- 208 `segment : np.ndarray or list` 209 Is a list of segment samples ordered by the raw_chan index. 210 Shape = (n_raw_chan , n_samples , n_wavelet_chan) 211 212 `features_list : list` 213 A list of strings: the names of the features you'd like to compute. 214 215 `data_ops : dict` 216 Dictionary containing metadata, as defined in Options.toml. 217 218 Returns 219 ------- 220 `dict` 221 A dictionary of all the features we computed. 222 """ 223 IMPLEMENTED_FEATS = ["mean_power","var","coherence"] 224 for feat in features_list: 225 if feat not in IMPLEMENTED_FEATS: 226 warnings.warn(f"{feat} has not yet been implemented.") 227 all_feats = {} 228 if "mean_power" in features_list: 229 AMP_FREQ_IDX_ALL = data_ops["AMP_FREQ_IDX_ALL"] 230 mp_feats = mean_power(segment,AMP_FREQ_IDX_ALL) 231 all_feats.update(mp_feats) # add it to greater dictionary 232 if "var" in features_list: 233 var_feats = var(segment,"all") 234 all_feats.update(var_feats) 235 if "coherence" in features_list: 236 FS = data_ops["FS"] 237 coherence_feats = coherence_all_pairs( 238 segment, 239 fs = FS 240 ) 241 all_feats.update(coherence_feats) 242 # print() 243 # for i,j in all_feats.items(): print(f"{i}:{j}") # debug 244 return all_feats 245 246 247 248if __name__ == "__main__": 249 # Unit tests 250 print("Runnin unit tests") 251 ### HELPERS 252 253 # TEST _select_freq_chans() 254 segment = [np.random.normal(0,1,(10000,41)) for i in range(4)] 255 chan_freq_idxs = [0,1,2,3,4,10,12] 256 segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs) 257 try: 258 assert segment.shape == (4,10000,7) 259 print("Passed Test #1 _select_freq_chans()") 260 except: 261 print("Failed Test #1 _select_freq_chans()") 262 segment = [np.random.normal(0,1,(10000,41)) for i in range(4)] 263 chan_freq_idxs = "all" 264 segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs) 265 try: 266 assert segment.shape == (4,10000,41) 267 assert (chan_freq_idxs == np.arange(41)).all() 268 print("Passed Test #2 _select_freq_chans()") 269 except: 270 print("Failed Test #2 _select_freq_chans()") 271 272 # TEST _feats_as_dict_from_2d_array() 273 n_chan_raw, n_chan_freq = 4,3 274 feats_array = np.random.normal(0,1,(4,3)) 275 feats_dict = _feats_as_dict_from_2d_array( 276 "dummy", 277 feats_all = feats_array, 278 chan_freq_idxs = np.array([1,5,9]) 279 ) 280 print("\nTest _feats_as_dict_from_2d_array() by human inspection.") 281 print("Does this output look good?:") 282 for i,j in feats_dict.items(): 283 print(f"{i} : {j}") 284 print() 285 286 ### FEATURES 287 n_ch_raw = 4 288 n_ch_freq = 3 289 n_samples = 10000 290 segment_1 = [np.ones((n_samples,n_ch_freq)) for i in range(n_ch_raw)] 291 segment_0 = [np.zeros((n_samples,n_ch_freq)) for i in range(n_ch_raw)] 292 segment_r = [np.random.normal(0,1,(n_samples,n_ch_freq)) for i in range(n_ch_raw)] 293 # TEST mean_power() 294 print("\nTest mean_power() by human inspection.") 295 print("Does this output look good?") 296 mp_dict = mean_power(segment_1,"all") 297 for i,j in mp_dict.items(): 298 print(f"{i} : {j}") 299 300 # TEST var() 301 302 303 # TEST coherence_all_pairs()
92def mean_power( 93 segment : list or np.ndarray, 94 chan_freq_idxs : np.ndarray or int = None 95 ) -> dict: 96 """Get mean power across each channel. 97 98 Parameters 99 ---------- 100 `segment : array-like` 101 A list of segment samples. Dims = (n_raw , n_samples , n_freq_chan) 102 103 `chan_freq_idxs : ndarray or int = None` 104 The 1d list of binary file indices we are interested in, e.g. 105 if we're interested in all wavelet channels (aka binary file 106 channels) pass None. If we're interested in the first two we 107 can pass [0,1] or np.array([0,1]). If we're only interested in 108 the raw data we can simply pass an int, 0 in this case. 109 110 Returns 111 ------- 112 `dict` 113 A dictionary where the key,value pairs are `name of feature` 114 and `computed feature float`. All keys are strings, all values 115 are floats. 116 """ 117 # Select binary (aka wavelet/freq) file channels 118 segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs) 119 # Compute the features along samples axis 120 mp_all = np.mean(segment,axis=1) 121 # Save and return, formatted as dict 122 mp_feats = _feats_as_dict_from_2d_array("mean_power",mp_all,chan_freq_idxs) 123 return mp_feats
Get mean power across each channel.
Parameters
segment : array-like
A list of segment samples. Dims = (n_raw , n_samples , n_freq_chan)
chan_freq_idxs : ndarray or int = None
The 1d list of binary file indices we are interested in, e.g.
if we're interested in all wavelet channels (aka binary file
channels) pass None. If we're interested in the first two we
can pass [0,1] or np.array([0,1]). If we're only interested in
the raw data we can simply pass an int, 0 in this case.
Returns
dict
A dictionary where the key,value pairs are name of feature
and computed feature float
. All keys are strings, all values
are floats.
126def var( 127 segment : list or np.ndarray, 128 chan_freq_idxs : np.ndarray or int = None 129 ) -> dict: 130 """Get the variance of the signal""" 131 segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs) 132 var_all = np.var(segment,axis=1) # all channels 133 var_feats = _feats_as_dict_from_2d_array("var",var_all,chan_freq_idxs) 134 return var_feats
Get the variance of the signal
142def coherence_all_pairs( 143 segment : list or np.ndarray, 144 fs : float or int # = 2000 145 ) -> dict: 146 """Cohere each pair of signals""" 147 # NB: The chan_freq_idxs parameter selects all freqs of relevance 148 # TODO: chan_freq_idxs is hard coded... change this to whatever is 149 # specified in toml gets fed to this function 150 chan_freq_idxs = 0 # only select raw channel 151 segment,chan_freq_idxs = _select_freq_chans(segment,chan_freq_idxs) 152 segment = np.squeeze(segment) # 3d array -> 2d array 153 raw_chans = list(range(len(segment))) 154 coherence_dict = {} 155 for (chx,chy),(x,y) in zip(combin(raw_chans,2),combin(segment,2)): 156 # There parameters defined here are similar to the default 157 # MatLab options 158 sample_freqs,cxy = coherence(x,y,fs=fs,window='hann', 159 nperseg=256,noverlap=None,nfft=None, 160 detrend='constant',axis=0) 161 # Select a subset of the 129 freqs: 129 = 256 // 2 + 1 162 idxs = np.array([0,2,4,8,16,32,64,-1]) # A logarithmic subset of freqs 163 sample_freqs,cxy = sample_freqs[idxs] , cxy[idxs] 164 165 # By default noverlap=None defaults it to half the segment length 166 # NB: FFT and therefore scipy.signal.coherence samples 167 # frequencies linearly, not logarithmically 168 169 # TODO: Do we want to implement something closer to the MatLab 170 # or is this good enough? 171 # Scipy does not provide an easy way of computing the coherence 172 # at specific frequencies as mscohere does in MatLab. As a 173 # temporary measure I'm returning a logarithmic subset of the 174 # freqs computed by scipy.signal.coherence(). 175 # We could potentially reimplement mscohere by hand (in Python) 176 # so that it can take a list of frequencies as input. 177 178 # Serialize the freqs 179 for sf,cohxy in zip(sample_freqs,cxy): 180 feat_name = f"coherence_freq_{round(sf,2)}_chx_{str(chx).zfill(3)}_chy_{str(chy).zfill(3)}" 181 coherence_dict[feat_name] = cohxy 182 183 return coherence_dict
Cohere each pair of signals
190def get_feats( 191 segment : np.ndarray or list, 192 features_list : list, 193 data_ops : dict 194 ) -> dict: 195 """Computes all features specified in featurelist. 196 197 Computes each feature specified in featurelist on all of the 198 segment, then assembles output into a features_dict—a dictionary 199 of all features computed, this corresponds to a single row to be 200 appended to our features dataframe. 201 202 To write a new feature, write the method in this module 203 (`sm_features.py`) then add it to execut conditionally in this 204 function (`get_feats`), finally add a string representation to the 205 FEATURES in your `Options.toml` config file. 206 207 Parameters 208 ---------- 209 `segment : np.ndarray or list` 210 Is a list of segment samples ordered by the raw_chan index. 211 Shape = (n_raw_chan , n_samples , n_wavelet_chan) 212 213 `features_list : list` 214 A list of strings: the names of the features you'd like to compute. 215 216 `data_ops : dict` 217 Dictionary containing metadata, as defined in Options.toml. 218 219 Returns 220 ------- 221 `dict` 222 A dictionary of all the features we computed. 223 """ 224 IMPLEMENTED_FEATS = ["mean_power","var","coherence"] 225 for feat in features_list: 226 if feat not in IMPLEMENTED_FEATS: 227 warnings.warn(f"{feat} has not yet been implemented.") 228 all_feats = {} 229 if "mean_power" in features_list: 230 AMP_FREQ_IDX_ALL = data_ops["AMP_FREQ_IDX_ALL"] 231 mp_feats = mean_power(segment,AMP_FREQ_IDX_ALL) 232 all_feats.update(mp_feats) # add it to greater dictionary 233 if "var" in features_list: 234 var_feats = var(segment,"all") 235 all_feats.update(var_feats) 236 if "coherence" in features_list: 237 FS = data_ops["FS"] 238 coherence_feats = coherence_all_pairs( 239 segment, 240 fs = FS 241 ) 242 all_feats.update(coherence_feats) 243 # print() 244 # for i,j in all_feats.items(): print(f"{i}:{j}") # debug 245 return all_feats
Computes all features specified in featurelist.
Computes each feature specified in featurelist on all of the segment, then assembles output into a features_dict—a dictionary of all features computed, this corresponds to a single row to be appended to our features dataframe.
To write a new feature, write the method in this module
(sm_features.py
) then add it to execut conditionally in this
function (get_feats
), finally add a string representation to the
FEATURES in your Options.toml
config file.
Parameters
segment : np.ndarray or list
Is a list of segment samples ordered by the raw_chan index.
Shape = (n_raw_chan , n_samples , n_wavelet_chan)
features_list : list
A list of strings: the names of the features you'd like to compute.
data_ops : dict
Dictionary containing metadata, as defined in Options.toml.
Returns
dict
A dictionary of all the features we computed.