ihkapy.fileio.metadata_io

View Source

 1import numpy as np
 2import logging
 3import pandas as pd
 4
 5def parse_metadata(
 6        session_metadata_path
 7        ) -> (dict,dict):
 8    """Opens and parses the '.edf' seizure recording metadata '.txt' file.
 9
10    The metadata text files are formatted with tabs so it's hard to know 
11    if the delimiters are properly syntaxed with a human eye. This is 
12    something to bare in mind. 
13
14    Parameters
15    ----------
16    `session_metadata_path : str`
17        The path of the file where the metadata is stored.
18        e.g. "/Users/steve/Documents/code/unm/data/AC75a-5 DOB 072519_TS_2020-03-23_17_30_04.txt"
19
20    Returns
21    -------
22    `dict `
23        Frontmatter information. The key-value pairs contain information 
24        such as the experimentor, the subject's (or animal's) id. 
25
26    `dict `
27        Seizure intervals. The keys are column headers, the values are
28        numpy arrays. Column headers are typically 
29        ['Number','Start Time','End Time','Time From Start','Channel','Annotation']
30    """
31    logger = logging.getLogger(__name__)
32    logger.setLevel(logging.INFO) # DEBUG < INFO < WARNING < ERROR < CRITICAL
33
34    frontmatter = {}
35    seizure_intervals = {}
36
37    with open(session_metadata_path,"r") as f:
38        def fmt(line): return "".join(line.split("\n")).split("\t")
39
40        # read the frontmatter into a dictionary
41        line = fmt(f.readline())
42        while line != [""]:
43            assert len(line) == 2 , "Frontmatter format error, must contain key-value pairs seperated by tabs."
44            key,value = line
45            frontmatter[key] = value
46            line = fmt(f.readline())
47
48        # skip lines whitespace lines
49        while line == [""]:
50            line = fmt(f.readline())
51
52        # Read seizure interval data into list
53        columns = line # Number, Start Time, End Time, Time From Start, Channel, Annotation
54        len_col = len(columns)
55        line = fmt(f.readline())
56        table = []
57        while line !=[""]:
58            assert len(line) == len_col , "Length of row data incompatible with number of columns."
59            table.append(line)
60            line = fmt(f.readline())
61        print("Finished reading annotation file.")
62
63        # transpose and put into columns dictionary
64        table = np.asarray(table).T
65        seizure_metadata = {colname:data for colname,data in zip(columns,table)}
66    return frontmatter,seizure_metadata
67
68
69def get_seizure_start_end_times(session_metadata_path : str) -> (list, list):
70    """Returns the start and end times (in s) specified by the file metadata
71
72    This function calls `parse_metadata` on it's input parameter
73    `session_metadata_path` to get the seizure metadata (i.e. the second
74    param returned by `parse_metadata`). Then the start and end times 
75    of the seizures are determined based on the annotation fields.
76
77    This function assumes that the start times will always be labelled 
78    "Seizure starts" and the end times "Seizure ends". It is case sensitive.
79    """
80    _,seizure_metadata = parse_metadata(session_metadata_path)
81    # Put the metadata into a pandas dataframe, and select the columns
82    df = pd.DataFrame(seizure_metadata)
83    start_times = df[df['Annotation'] == "Seizure starts"]["Time From Start"]
84    end_times = df[df['Annotation'] == "Seizure ends"]["Time From Start"]
85    start_times = [float(i) for i in start_times]
86    end_times = [float(i) for i in end_times]
87
88    # TODO: depending on exactly how the annotation (aka metadata)  
89    # files are specified and what formats are used, we may want to use
90    # a more elaborate regex for selecting start and end of seizures
91
92    # Verify there is no formatting error
93    assert len(start_times) == len(end_times), "Incompatible seizure start/end times"
94    for i,j in zip(start_times,end_times):
95        assert i < j, "Seizure cannot end before it starts!"
96
97    return start_times,end_times

def parse_metadata(session_metadata_path) -> (, ): View Source

 6def parse_metadata(
 7        session_metadata_path
 8        ) -> (dict,dict):
 9    """Opens and parses the '.edf' seizure recording metadata '.txt' file.
10
11    The metadata text files are formatted with tabs so it's hard to know 
12    if the delimiters are properly syntaxed with a human eye. This is 
13    something to bare in mind. 
14
15    Parameters
16    ----------
17    `session_metadata_path : str`
18        The path of the file where the metadata is stored.
19        e.g. "/Users/steve/Documents/code/unm/data/AC75a-5 DOB 072519_TS_2020-03-23_17_30_04.txt"
20
21    Returns
22    -------
23    `dict `
24        Frontmatter information. The key-value pairs contain information 
25        such as the experimentor, the subject's (or animal's) id. 
26
27    `dict `
28        Seizure intervals. The keys are column headers, the values are
29        numpy arrays. Column headers are typically 
30        ['Number','Start Time','End Time','Time From Start','Channel','Annotation']
31    """
32    logger = logging.getLogger(__name__)
33    logger.setLevel(logging.INFO) # DEBUG < INFO < WARNING < ERROR < CRITICAL
34
35    frontmatter = {}
36    seizure_intervals = {}
37
38    with open(session_metadata_path,"r") as f:
39        def fmt(line): return "".join(line.split("\n")).split("\t")
40
41        # read the frontmatter into a dictionary
42        line = fmt(f.readline())
43        while line != [""]:
44            assert len(line) == 2 , "Frontmatter format error, must contain key-value pairs seperated by tabs."
45            key,value = line
46            frontmatter[key] = value
47            line = fmt(f.readline())
48
49        # skip lines whitespace lines
50        while line == [""]:
51            line = fmt(f.readline())
52
53        # Read seizure interval data into list
54        columns = line # Number, Start Time, End Time, Time From Start, Channel, Annotation
55        len_col = len(columns)
56        line = fmt(f.readline())
57        table = []
58        while line !=[""]:
59            assert len(line) == len_col , "Length of row data incompatible with number of columns."
60            table.append(line)
61            line = fmt(f.readline())
62        print("Finished reading annotation file.")
63
64        # transpose and put into columns dictionary
65        table = np.asarray(table).T
66        seizure_metadata = {colname:data for colname,data in zip(columns,table)}
67    return frontmatter,seizure_metadata

Opens and parses the '.edf' seizure recording metadata '.txt' file.

The metadata text files are formatted with tabs so it's hard to know if the delimiters are properly syntaxed with a human eye. This is something to bare in mind.

Parameters

session_metadata_path : str The path of the file where the metadata is stored. e.g. "/Users/steve/Documents/code/unm/data/AC75a-5 DOB 072519_TS_2020-03-23_17_30_04.txt"

Returns

dict Frontmatter information. The key-value pairs contain information such as the experimentor, the subject's (or animal's) id.

dict Seizure intervals. The keys are column headers, the values are numpy arrays. Column headers are typically ['Number','Start Time','End Time','Time From Start','Channel','Annotation']

def get_seizure_start_end_times(session_metadata_path: str) -> (, ): View Source

70def get_seizure_start_end_times(session_metadata_path : str) -> (list, list):
71    """Returns the start and end times (in s) specified by the file metadata
72
73    This function calls `parse_metadata` on it's input parameter
74    `session_metadata_path` to get the seizure metadata (i.e. the second
75    param returned by `parse_metadata`). Then the start and end times 
76    of the seizures are determined based on the annotation fields.
77
78    This function assumes that the start times will always be labelled 
79    "Seizure starts" and the end times "Seizure ends". It is case sensitive.
80    """
81    _,seizure_metadata = parse_metadata(session_metadata_path)
82    # Put the metadata into a pandas dataframe, and select the columns
83    df = pd.DataFrame(seizure_metadata)
84    start_times = df[df['Annotation'] == "Seizure starts"]["Time From Start"]
85    end_times = df[df['Annotation'] == "Seizure ends"]["Time From Start"]
86    start_times = [float(i) for i in start_times]
87    end_times = [float(i) for i in end_times]
88
89    # TODO: depending on exactly how the annotation (aka metadata)  
90    # files are specified and what formats are used, we may want to use
91    # a more elaborate regex for selecting start and end of seizures
92
93    # Verify there is no formatting error
94    assert len(start_times) == len(end_times), "Incompatible seizure start/end times"
95    for i,j in zip(start_times,end_times):
96        assert i < j, "Seizure cannot end before it starts!"
97
98    return start_times,end_times

Returns the start and end times (in s) specified by the file metadata

This function calls parse_metadata on it's input parameter session_metadata_path to get the seizure metadata (i.e. the second param returned by parse_metadata). Then the start and end times of the seizures are determined based on the annotation fields.

This function assumes that the start times will always be labelled "Seizure starts" and the end times "Seizure ends". It is case sensitive.