ihkapy.fileio.metadata_io
1import numpy as np 2import logging 3import pandas as pd 4 5def parse_metadata( 6 session_metadata_path 7 ) -> (dict,dict): 8 """Opens and parses the '.edf' seizure recording metadata '.txt' file. 9 10 The metadata text files are formatted with tabs so it's hard to know 11 if the delimiters are properly syntaxed with a human eye. This is 12 something to bare in mind. 13 14 Parameters 15 ---------- 16 `session_metadata_path : str` 17 The path of the file where the metadata is stored. 18 e.g. "/Users/steve/Documents/code/unm/data/AC75a-5 DOB 072519_TS_2020-03-23_17_30_04.txt" 19 20 Returns 21 ------- 22 `dict ` 23 Frontmatter information. The key-value pairs contain information 24 such as the experimentor, the subject's (or animal's) id. 25 26 `dict ` 27 Seizure intervals. The keys are column headers, the values are 28 numpy arrays. Column headers are typically 29 ['Number','Start Time','End Time','Time From Start','Channel','Annotation'] 30 """ 31 logger = logging.getLogger(__name__) 32 logger.setLevel(logging.INFO) # DEBUG < INFO < WARNING < ERROR < CRITICAL 33 34 frontmatter = {} 35 seizure_intervals = {} 36 37 with open(session_metadata_path,"r") as f: 38 def fmt(line): return "".join(line.split("\n")).split("\t") 39 40 # read the frontmatter into a dictionary 41 line = fmt(f.readline()) 42 while line != [""]: 43 assert len(line) == 2 , "Frontmatter format error, must contain key-value pairs seperated by tabs." 44 key,value = line 45 frontmatter[key] = value 46 line = fmt(f.readline()) 47 48 # skip lines whitespace lines 49 while line == [""]: 50 line = fmt(f.readline()) 51 52 # Read seizure interval data into list 53 columns = line # Number, Start Time, End Time, Time From Start, Channel, Annotation 54 len_col = len(columns) 55 line = fmt(f.readline()) 56 table = [] 57 while line !=[""]: 58 assert len(line) == len_col , "Length of row data incompatible with number of columns." 59 table.append(line) 60 line = fmt(f.readline()) 61 print("Finished reading annotation file.") 62 63 # transpose and put into columns dictionary 64 table = np.asarray(table).T 65 seizure_metadata = {colname:data for colname,data in zip(columns,table)} 66 return frontmatter,seizure_metadata 67 68 69def get_seizure_start_end_times(session_metadata_path : str) -> (list, list): 70 """Returns the start and end times (in s) specified by the file metadata 71 72 This function calls `parse_metadata` on it's input parameter 73 `session_metadata_path` to get the seizure metadata (i.e. the second 74 param returned by `parse_metadata`). Then the start and end times 75 of the seizures are determined based on the annotation fields. 76 77 This function assumes that the start times will always be labelled 78 "Seizure starts" and the end times "Seizure ends". It is case sensitive. 79 """ 80 _,seizure_metadata = parse_metadata(session_metadata_path) 81 # Put the metadata into a pandas dataframe, and select the columns 82 df = pd.DataFrame(seizure_metadata) 83 start_times = df[df['Annotation'] == "Seizure starts"]["Time From Start"] 84 end_times = df[df['Annotation'] == "Seizure ends"]["Time From Start"] 85 start_times = [float(i) for i in start_times] 86 end_times = [float(i) for i in end_times] 87 88 # TODO: depending on exactly how the annotation (aka metadata) 89 # files are specified and what formats are used, we may want to use 90 # a more elaborate regex for selecting start and end of seizures 91 92 # Verify there is no formatting error 93 assert len(start_times) == len(end_times), "Incompatible seizure start/end times" 94 for i,j in zip(start_times,end_times): 95 assert i < j, "Seizure cannot end before it starts!" 96 97 return start_times,end_times
6def parse_metadata( 7 session_metadata_path 8 ) -> (dict,dict): 9 """Opens and parses the '.edf' seizure recording metadata '.txt' file. 10 11 The metadata text files are formatted with tabs so it's hard to know 12 if the delimiters are properly syntaxed with a human eye. This is 13 something to bare in mind. 14 15 Parameters 16 ---------- 17 `session_metadata_path : str` 18 The path of the file where the metadata is stored. 19 e.g. "/Users/steve/Documents/code/unm/data/AC75a-5 DOB 072519_TS_2020-03-23_17_30_04.txt" 20 21 Returns 22 ------- 23 `dict ` 24 Frontmatter information. The key-value pairs contain information 25 such as the experimentor, the subject's (or animal's) id. 26 27 `dict ` 28 Seizure intervals. The keys are column headers, the values are 29 numpy arrays. Column headers are typically 30 ['Number','Start Time','End Time','Time From Start','Channel','Annotation'] 31 """ 32 logger = logging.getLogger(__name__) 33 logger.setLevel(logging.INFO) # DEBUG < INFO < WARNING < ERROR < CRITICAL 34 35 frontmatter = {} 36 seizure_intervals = {} 37 38 with open(session_metadata_path,"r") as f: 39 def fmt(line): return "".join(line.split("\n")).split("\t") 40 41 # read the frontmatter into a dictionary 42 line = fmt(f.readline()) 43 while line != [""]: 44 assert len(line) == 2 , "Frontmatter format error, must contain key-value pairs seperated by tabs." 45 key,value = line 46 frontmatter[key] = value 47 line = fmt(f.readline()) 48 49 # skip lines whitespace lines 50 while line == [""]: 51 line = fmt(f.readline()) 52 53 # Read seizure interval data into list 54 columns = line # Number, Start Time, End Time, Time From Start, Channel, Annotation 55 len_col = len(columns) 56 line = fmt(f.readline()) 57 table = [] 58 while line !=[""]: 59 assert len(line) == len_col , "Length of row data incompatible with number of columns." 60 table.append(line) 61 line = fmt(f.readline()) 62 print("Finished reading annotation file.") 63 64 # transpose and put into columns dictionary 65 table = np.asarray(table).T 66 seizure_metadata = {colname:data for colname,data in zip(columns,table)} 67 return frontmatter,seizure_metadata
Opens and parses the '.edf' seizure recording metadata '.txt' file.
The metadata text files are formatted with tabs so it's hard to know if the delimiters are properly syntaxed with a human eye. This is something to bare in mind.
Parameters
session_metadata_path : str
The path of the file where the metadata is stored.
e.g. "/Users/steve/Documents/code/unm/data/AC75a-5 DOB 072519_TS_2020-03-23_17_30_04.txt"
Returns
dict
Frontmatter information. The key-value pairs contain information
such as the experimentor, the subject's (or animal's) id.
dict
Seizure intervals. The keys are column headers, the values are
numpy arrays. Column headers are typically
['Number','Start Time','End Time','Time From Start','Channel','Annotation']
70def get_seizure_start_end_times(session_metadata_path : str) -> (list, list): 71 """Returns the start and end times (in s) specified by the file metadata 72 73 This function calls `parse_metadata` on it's input parameter 74 `session_metadata_path` to get the seizure metadata (i.e. the second 75 param returned by `parse_metadata`). Then the start and end times 76 of the seizures are determined based on the annotation fields. 77 78 This function assumes that the start times will always be labelled 79 "Seizure starts" and the end times "Seizure ends". It is case sensitive. 80 """ 81 _,seizure_metadata = parse_metadata(session_metadata_path) 82 # Put the metadata into a pandas dataframe, and select the columns 83 df = pd.DataFrame(seizure_metadata) 84 start_times = df[df['Annotation'] == "Seizure starts"]["Time From Start"] 85 end_times = df[df['Annotation'] == "Seizure ends"]["Time From Start"] 86 start_times = [float(i) for i in start_times] 87 end_times = [float(i) for i in end_times] 88 89 # TODO: depending on exactly how the annotation (aka metadata) 90 # files are specified and what formats are used, we may want to use 91 # a more elaborate regex for selecting start and end of seizures 92 93 # Verify there is no formatting error 94 assert len(start_times) == len(end_times), "Incompatible seizure start/end times" 95 for i,j in zip(start_times,end_times): 96 assert i < j, "Seizure cannot end before it starts!" 97 98 return start_times,end_times
Returns the start and end times (in s) specified by the file metadata
This function calls parse_metadata
on it's input parameter
session_metadata_path
to get the seizure metadata (i.e. the second
param returned by parse_metadata
). Then the start and end times
of the seizures are determined based on the annotation fields.
This function assumes that the start times will always be labelled "Seizure starts" and the end times "Seizure ends". It is case sensitive.