Source code for heartpy.heartpy

'''
main module for HeartPy.
'''

from datetime import datetime
import time
import os

import numpy as np
from scipy.interpolate import UnivariateSpline
from scipy.signal import butter, filtfilt, welch, periodogram, resample_poly, resample

from . import exceptions
from .datautils import get_data, get_samplerate_mstimer, get_samplerate_datetime,\
                       rolling_mean, outliers_iqr_method, outliers_modified_z, \
                       load_exampledata
from .preprocessing import scale_data, scale_sections, interpolate_clipping, \
                           flip_signal, enhance_peaks, enhance_ecg_peaks
from .filtering import filter_signal, hampel_filter, hampel_correcter, \
                       remove_baseline_wander, smooth_signal
from .peakdetection import make_windows, append_dict, fit_peaks, check_peaks, \
                           check_binary_quality, interpolate_peaks
from .visualizeutils import plotter, segment_plotter, plot_poincare, plot_breathing
from .analysis import calc_rr, calc_rr_segment, clean_rr_intervals, calc_ts_measures, \
                      calc_fd_measures, calc_breathing, calc_poincare

from . import config
config.init() #initialize global conf vars

__all__ = ['enhance_peaks',
           'enhance_ecg_peaks',
           'get_data',
           'get_samplerate_mstimer',
           'get_samplerate_datetime',
           'hampel_correcter',
           'hampel_filter',
           'load_exampledata',
           'plotter',
           'plot_breathing',
           'plot_poincare',
           'process',
           'process_rr',
           'process_segmentwise',
           'flip_signal',
           'remove_baseline_wander',
           'scale_data',
           'scale_sections',
           'segment_plotter',
           'smooth_signal',
           'filter_signal',
           'run_tests']


[docs]def process(hrdata, sample_rate, windowsize=0.75, report_time=False, 
            calc_freq=False, freq_method='welch', freq_square=True,
            interp_clipping=False, clipping_scale=False, interp_threshold=1020, 
            hampel_correct=False, bpmmin=40, bpmmax=180, reject_segmentwise=False, 
            high_precision=False, high_precision_fs=1000.0, breathing_method='fft',
            clean_rr=False, clean_rr_method='quotient-filter', measures={}, working_data={}):
    '''processes passed heart rate data.
    
    Processes the passed heart rate data. Returns measures{} dict containing results.

    Parameters
    ----------
    hrdata : 1d array or list 
        array or list containing heart rate data to be analysed

    sample_rate : int or float
        the sample rate with which the heart rate data is sampled

    windowsize : int or float
        the window size in seconds to use in the calculation of the moving average.
        Calculated as windowsize * sample_rate
        default : 0.75

    report_time : bool
        whether to report total processing time of algorithm 
        default : True

    calc_freq : bool
        whether to compute time-series measurements 
        default : False

    freq_method : str
        method used to extract the frequency spectrum. Available: 'fft' (Fourier Analysis), 
        'periodogram', and 'welch' (Welch's method). 
        default : 'welch'

    freq_square : bool
        whether to square the power spectrum returned when computing frequency measures
        default : true

    interp_clipping : bool 
        whether to detect and interpolate clipping segments of the signal 
        default : True

    clipping_scale : bool
        whether to scale the data prior to clipping detection. Can correct errors 
        if signal amplitude has been affected after digitization (for example through 
        filtering). Not recommended by default. 
        default : False

    interp_threshold : int or float
        threshold to use to detect clipping segments. Recommended to be a few
        datapoints below the sensor or ADC's maximum value (to account for
        slight data line noise). 
        default : 1020, 4 below max of 1024 for 10-bit ADC

    hampel_correct : bool 
        whether to reduce noisy segments using large median filter. Disabled by
        default due to computational complexity and (small) distortions induced
        into output measures. Generally it is not necessary.
        default : False

    bpmmin : int or float 
        minimum value to see as likely for BPM when fitting peaks
        default : 40

    bpmmax : int or float
        maximum value to see as likely for BPM when fitting peaks
        default : 180

    reject_segmentwise : bool
        whether to reject segments with more than 30% rejected beats. 
        By default looks at segments of 10 beats at a time. 
        default : False

    high_precision : bool 
        whether to estimate peak positions by upsampling signal to sample rate
        as specified in high_precision_fs
        default : false

    high_precision_fs : int or float 
        the sample rate to which to upsample for more accurate peak position estimation 
        default : 1000 Hz

    breathing_method : str
        method to use for estimating breathing rate, should be 'welch' or 'fft'
        default : fft

    clean_rr : bool
        if true, the RR_list is further cleaned with an outlier rejection pass
        default : false

    clean_rr_method: str
        how to find and reject outliers. Available methods are ' quotient-filter', 
        'iqr' (interquartile range), and 'z-score'.
        default : 'quotient-filter'

    measures : dict
        dictionary object used by heartpy to store computed measures. Will be created
        if not passed to function.

    working_data : dict
        dictionary object that contains all heartpy's working data (temp) objects.
        will be created if not passed to function

    Returns
    -------
    working_data : dict
        dictionary object used to store temporary values.
    
    measures : dict
        dictionary object used by heartpy to store computed measures.

    Examples
    --------
    There's example data included in HeartPy to help you get up to speed. Here are
    provided two examples of how to approach heart rate analysis.

    The first example contains noisy sections and comes with a timer column that
    counts miliseconds since start of recording. 

    >>> import heartpy as hp
    >>> data, timer = hp.load_exampledata(1)
    >>> sample_rate = hp.get_samplerate_mstimer(timer)
    >>> '%.3f' %sample_rate
    '116.996'

    The sample rate is one of the most important characteristics during the
    heart rate analysis, as all measures are relative to this.
    
    With all data loaded and the sample rate determined, nalysis is now easy:

    >>> wd, m = hp.process(data, sample_rate = sample_rate)

    The measures ('m') dictionary returned contains all determined measures

    >>> '%.3f' %m['bpm']
    '62.376'
    >>> '%.3f' %m['rmssd']
    '57.070'

    Using a slightly longer example:

    >>> data, timer = hp.load_exampledata(2)
    >>> print(timer[0])
    2016-11-24 13:58:58.081000

    As you can see something is going on here: we have a datetime-based timer.
    HeartPy can accomodate this and determine sample rate nontheless:

    >>> sample_rate = hp.get_samplerate_datetime(timer, timeformat = '%Y-%m-%d %H:%M:%S.%f')
    >>> '%.3f' %sample_rate
    '100.420'

    Now analysis can proceed. Let's also compute frequency domain data and interpolate clipping.
    In this segment the clipping is visible around amplitude 980 so let's set that as well:

    >>> wd, m = hp.process(data, sample_rate = sample_rate, calc_freq = True, 
    ... interp_clipping = True, interp_threshold = 975)
    >>> '%.3f' %m['bpm']
    '97.270'
    >>> '%.3f' %m['rmssd']
    '34.743'
    >>> '%.3f' %m['lf/hf']
    '4.960'

    High precision mode will upsample 200ms of data surrounding detected peak
    and attempt to estimate the peak's real position with higher accuracy.
    Use high_precision_fs to set the virtual sample rate to which the peak
    will be upsampled (e.g. 1000Hz gives an estimated 1ms accuracy)

    >>> wd, m = hp.process(data, sample_rate = sample_rate, calc_freq = True, 
    ... high_precision = True, high_precision_fs = 1000.0)

    Finally setting reject_segmentwise will reject segments with more than 30% rejected beats
    See check_binary_quality in the peakdetection.py module.

    >>> wd, m = hp.process(data, sample_rate = sample_rate, calc_freq = True, 
    ... reject_segmentwise = True)

    Final test for code coverage, let's turn all bells and whistles on that haven't been
    tested yet

    >>> wd, m = hp.process(data, sample_rate = 100.0, calc_freq = True, 
    ... interp_clipping = True, clipping_scale = True, hampel_correct = True,
    ... reject_segmentwise = True, clean_rr = True)
    '''
    t1 = time.clock()

    assert np.asarray(hrdata).ndim == 1, 'error: multi-dimensional data passed to process() \
function. Please supply a 1d array or list containing heart rate signal data. \n\nDid you perhaps \
include an index column?'

    if interp_clipping:
        if clipping_scale:
            hrdata = scale_data(hrdata)
        hrdata = interpolate_clipping(hrdata, sample_rate, threshold=interp_threshold)

    if hampel_correct:
        hrdata = enhance_peaks(hrdata)
        hrdata = hampel_correcter(hrdata, sample_rate)

    working_data['hr'] = hrdata
    rol_mean = rolling_mean(hrdata, windowsize, sample_rate)

    working_data = fit_peaks(hrdata, rol_mean, sample_rate, bpmmin=bpmmin,
                             bpmmax=bpmmax, working_data=working_data)
    
    if high_precision:
        working_data = interpolate_peaks(hrdata, working_data['peaklist'], sample_rate=sample_rate, 
                                         desired_sample_rate=high_precision_fs, working_data=working_data)

    working_data = calc_rr(working_data['peaklist'], sample_rate, working_data=working_data)
    working_data = check_peaks(working_data['RR_list'], working_data['peaklist'], working_data['ybeat'],
                               reject_segmentwise, working_data=working_data)

    if clean_rr:
        working_data = clean_rr_intervals(working_data, method = clean_rr_method)

    working_data, measures = calc_ts_measures(working_data['RR_list_cor'], working_data['RR_diff'],
                                              working_data['RR_sqdiff'], measures=measures, 
                                              working_data=working_data)
    
    measures = calc_poincare(working_data['RR_list'], working_data['RR_masklist'], measures = measures,
                             working_data = working_data)

    try:
        measures, working_data = calc_breathing(working_data['RR_list_cor'], hrdata, sample_rate, 
                                                method = breathing_method, measures=measures, 
                                                working_data=working_data)
    except:
        measures['breathingrate'] = np.nan

    if calc_freq:
        working_data, measures = calc_fd_measures(method=freq_method, measures=measures,
                                                  working_data = working_data)
    
    #report time if requested. Exclude from tests, output is untestable.
    if report_time: # pragma: no cover
        print('\nFinished in %.8s sec' %(time.clock()-t1))

    return working_data, measures


[docs]def process_segmentwise(hrdata, sample_rate, segment_width=120, segment_overlap=0,
                        segment_min_size=20, replace_outliers=False, outlier_method='iqr',
                        mode='full', **kwargs):
    '''processes passed heart rate data with a windowed function

    Analyses a long heart rate data array by running a moving window 
    over the data, computing measures in each iteration. Both the window width
    and the overlap with the previous window location are settable.

    Parameters
    ----------
    hrdata : 1d array or list 
        array or list containing heart rate data to be analysed

    sample_rate : int or float
        the sample rate with which the heart rate data is sampled

    segment_width : int or float
        width of segments in seconds
        default : 120

    segment_overlap: float
        overlap fraction of adjacent segments.
        Needs to be 0 <= segment_overlap < 1.
        default : 0 (no overlap)

    segment_min_size : int
        often a tail end of the data remains after segmenting into segments.
        segment_min_size indicates the minimum length (in seconds) the tail 
        end needs  to be in order to be included in analysis. It is discarded 
        if it's shorter.
        default : 20

    replace_outliers : bool
        whether to detct and replace outliers in the segments. Will iterate over
        all computed measures and evaluate each.

    outlier_method : str
        what method to use to detect outlers. Available are 'iqr', which uses the
        inter-quartile range, and 'z-score', which uses the modified z-score approach.

    mode : str
        'full' or 'fast'

    Keyword arguments:
    ------------------
    hrdata -- 1-dimensional numpy array or list containing heart rate data
    sample_rate -- the sample rate of the heart rate data
    segment_width -- the width of the segment, in seconds, within which all measures 
                     will be computed.
    segment_overlap -- the fraction of overlap of adjacent segments, 
                       needs to be 0 <= segment_overlap < 1
    segment_min_size -- After segmenting the data, a tail end will likely remain that is shorter than the specified
                        segment_size. segment_min_size sets the minimum size for the last segment of the 
                        generated series of segments to still be included. Default = 20.
    replace_outliers -- bool, whether to replace outliers (likely caused by peak fitting
                        errors on one or more segments) with the median.
    outlier_method -- which  method to use to detect outliers. Available are the
                      'interquartile-range' ('iqr') and the 'modified z-score' ('z-score') methods.

    Returns
    -------
    working_data : dict
        dictionary object used to store temporary values.
    
    measures : dict
        dictionary object used by heartpy to store computed measures.
        
    Examples
    --------
    Given one of the included example datasets we can demonstrate this function:

    >>> import heartpy as hp
    >>> data, timer = hp.load_exampledata(2)
    >>> sample_rate = hp.get_samplerate_datetime(timer, timeformat = '%Y-%m-%d %H:%M:%S.%f')
    >>> wd, m = hp.process_segmentwise(data, sample_rate, segment_width=120, segment_overlap=0.5)
    >>> len(m['bpm'])
    11

    The function has split the data into 11 segments and analysed each one. Every key in the
    measures (m) dict now contains a list of that measure for each segment.

    >>> [round(x, 1) for x in m['bpm']]
    [100.0, 96.8, 97.2, 97.9, 96.7, 96.8, 96.8, 95.0, 92.9, 96.7, 99.2]

    Specifying mode = 'fast' will run peak detection once and use detections
    to compute measures over each segment. Useful for speed ups, but typically
    the full mode has better results.

    >>> wd, m = hp.process_segmentwise(data, sample_rate, segment_width=120, segment_overlap=0.5, 
    ... mode = 'fast', replace_outliers = True)

    You can specify the outlier detection method ('iqr' - interquartile range, or 'z-score' for 
    modified z-score approach).
    
    >>> wd, m = hp.process_segmentwise(data, sample_rate, segment_width=120, segment_overlap=0.5, 
    ... mode = 'fast', replace_outliers = True, outlier_method = 'z-score')

    '''

    assert 0 <= segment_overlap < 1.0, 'value error: segment_overlap needs to be \
0 <= segment_overlap < 1.0!'

    assert outlier_method in ['iqr', 'z-score'], 'Unknown outlier detection method specified, \
use either \'iqr\' or \'z-score\''

    s_measures={}
    s_working_data={}

    slice_indices = make_windows(hrdata, sample_rate, segment_width, segment_overlap, segment_min_size)

    if mode == 'full':
        for i, ii in slice_indices:
            try:
                working_data, measures = process(hrdata[i:ii], sample_rate, **kwargs)
                for k in measures.keys():
                    s_measures = append_dict(s_measures, k, measures[k])
                for k in working_data.keys():
                    s_working_data = append_dict(s_working_data, k, working_data[k])
                s_measures = append_dict(s_measures, 'segment_indices', (i, ii))
                s_working_data = append_dict(s_working_data, 'segment_indices', (i, ii))
            except exceptions.BadSignalWarning:
                pass

    elif mode == 'fast':
        working_data, measures = process(hrdata, sample_rate, **kwargs)
        peaklist = np.asarray(working_data['peaklist'])
        for i, ii in slice_indices:
            #pks = [x for x in peaklist if i <= x < ii]
            pks = peaklist[np.where((peaklist >= i) & (peaklist < ii))]
            pks_b = working_data['binary_peaklist'][np.int(np.where(peaklist == pks[0])[0]):
                                                    np.int(np.where(peaklist == pks[-1])[-1]) + 1]
            rr_list = (np.diff(pks) / sample_rate) * 1000.0
            rr_list, rr_diff, rr_sqdiff = calc_rr_segment(rr_list, pks_b)
            _, tmp = calc_ts_measures(rr_list, rr_diff, rr_sqdiff)
            for k in tmp.keys():
                s_measures = append_dict(s_measures, k, tmp[k])
            s_measures = append_dict(s_measures, 'segment_indices', (i, ii))
            s_working_data = append_dict(s_working_data, 'segment_indices', (i, ii))
            s_working_data = append_dict(s_working_data, 'rr_list', rr_list)
            s_working_data = append_dict(s_working_data, 'rr_diff', rr_diff)
            s_working_data = append_dict(s_working_data, 'rr_sqdiff', rr_sqdiff)
            s_working_data = append_dict(s_working_data, 'peaklist', peaklist)

    else:
        raise ValueError('mode not understood! Needs to be either \'fast\' or \'full\', passed: %s' %mode)

    if replace_outliers:
        if outlier_method.lower() == 'iqr':
            for k in s_measures.keys():
                if k not in ['nn20', 'nn50', 'interp_rr_function', 
                             'interp_rr_linspace', 'segment_indices']: #skip these measures
                    s_measures[k], _ = outliers_iqr_method(s_measures[k])
        elif outlier_method.lower() == 'z-score':
            for k in s_measures.keys():
                if k not in ['nn20', 'nn50', 'interp_rr_function', 
                             'interp_rr_linspace', 'segment_indices']: #skip these measures
                    s_measures[k], _ = outliers_modified_z(s_measures[k])

    return s_working_data, s_measures


def process_rr(rr_list, threshold_rr=False, clean_rr=False, 
               clean_rr_method='quotient-filter', calc_freq=False, 
               freq_method='welch', square_spectrum=True, 
               measures={}, working_data={}):
    '''process rr-list

    Function that takes and processes a list of peak-peak intervals.
    Computes all measures as computed by the regular process() function, and
    sets up all dicts required for plotting poincare plots.
    
    Several filtering methods are available as well.

    Parameters
    ----------
    rr_list : 1d array or list
        list or array containing peak-peak intervals (in ms).

    threshold_rr : bool
        if true, the peak-peak intervals are cleaned using a threshold filter, which
        rejects all intervals that differ 30% from the mean peak-peak interval, with
        a minimum of 300ms. 
        default : false

    clean_rr : bool
        if true, the RR_list is further cleaned with an outlier rejection pass. This pass
        is performed after threshold_rr, if that is specified.
        default : false

    clean_rr_method: str
        how to find and reject outliers. Available methods are ' quotient-filter', 
        'iqr' (interquartile range), and 'z-score'.
        default : 'quotient-filter'

    calc_freq : bool
        whether to compute time-series measurements 
        default : False

    freq_method : str
        method used to extract the frequency spectrum. Available: 'fft' (Fourier Analysis), 
        'periodogram', and 'welch' (Welch's method). 
        default : 'welch'

    square_spectrum : bool
        whether to square the power spectrum returned.
        default : true

    measures : dict
        dictionary object used by heartpy to store computed measures. Will be created
        if not passed to function.

    working_data : dict
        dictionary object that contains all heartpy's working data (temp) objects.
        will be created if not passed to function

    Returns
    -------
    working_data : dict
        dictionary object used to store temporary values.
    
    measures : dict
        dictionary object used by heartpy to store computed measures.

    Examples
    --------
    Let's generate an RR-list first.

    >>> import heartpy as hp
    >>> data, timer = hp.load_exampledata(2)
    >>> sample_rate = hp.get_samplerate_datetime(timer, timeformat = '%Y-%m-%d %H:%M:%S.%f')
    >>> wd, m = hp.process(data, sample_rate)
    >>> rr_list = wd['RR_list']

    Using only the RR-list (in ms!) we can now call this function, and let's put the results
    into a differently named container so we're sure all measures are unique:
    >>> wd2, m2 = process_rr(rr_list, threshold_rr = True, clean_rr = True, calc_freq = True)
    >>> '%.3f' %m2['rmssd']
    '45.641'

    If you want to, you can turn off all filters and rejection features:
    >>> wd2, m2 = process_rr(rr_list, threshold_rr = False, clean_rr = False)
    >>> '%.3f' %m2['rmssd']
    '162.645'

    In this case it seems the filtering was necessary: without the RMSSD lies outside the
    range expected in healthy humans.
    '''

    working_data['RR_list'] = rr_list

    if threshold_rr:
        #do thresholding pass
        mean_rr = np.mean(rr_list)
        upper_threshold = mean_rr + 300 if (0.3 * mean_rr) <= 300 else mean_rr + (0.3 * mean_rr)
        lower_threshold = mean_rr - 300 if (0.3 * mean_rr) <= 300 else mean_rr - (0.3 * mean_rr)
        rr_list_cor = [x for x in rr_list if x > lower_threshold and x < upper_threshold]
        rr_mask = [1 if x <= lower_threshold or x >= upper_threshold else 0 for x in rr_list]
        working_data['RR_list_cor'] = rr_list_cor
        working_data['RR_masklist'] = rr_mask

    if clean_rr:
        #do clean_rr pass
        working_data = clean_rr_intervals(working_data = working_data, method = clean_rr_method)

    if not threshold_rr and not clean_rr:
        working_data['RR_list_cor'] = rr_list
        working_data['RR_masklist'] = [0 for i in range(len(rr_list))]
        rr_diff = np.abs(np.diff(rr_list))
        rr_sqdiff = np.power(rr_diff, 2)
    else:
        rr_diff = np.abs(np.diff(working_data['RR_list_cor']))
        rr_sqdiff = np.power(rr_diff, 2)


    #compute ts measures
    working_data, measures = calc_ts_measures(rr_list = working_data['RR_list_cor'], rr_diff = rr_diff, 
                                              rr_sqdiff = rr_sqdiff, measures = measures, 
                                              working_data = working_data)

    measures = calc_poincare(rr_list = working_data['RR_list'], rr_mask = working_data['RR_masklist'], 
                             measures = measures, working_data = working_data)
    if calc_freq:
        #compute freq measures
        working_data, measures = calc_fd_measures(method = freq_method, square_spectrum = square_spectrum,
                                                  measures = measures, working_data = working_data)
        
    return working_data, measures


def run_tests():
    '''
    function to run doctest on all of HeartPy
    '''

    from . import analysis, datautils, filtering, peakdetection, preprocessing, visualizeutils, config
    import doctest
    
    succeeded = 0

    print('testing config')
    results = doctest.testmod(config)
    if results.failed == 0: # pragma: no cover
        print('success!')
        succeeded += 1

    print('testing analysis')
    results = doctest.testmod(analysis)
    if results.failed == 0: # pragma: no cover
        print('success!')
        succeeded += 1
        
    print('testing datautils')
    results = doctest.testmod(datautils)
    if results.failed == 0: # pragma: no cover
        print('success!')
        succeeded += 1

    print('testing filtering')
    results = doctest.testmod(filtering)
    if results.failed == 0: # pragma: no cover
        print('success!') 
        succeeded += 1

    print('testing peakdetection')
    results = doctest.testmod(peakdetection)
    if results.failed == 0: # pragma: no cover
        print('success!')
        succeeded += 1

    print('testing preprocessing')
    results = doctest.testmod(preprocessing)
    if results.failed == 0: # pragma: no cover
        print('success!')
        succeeded += 1

    print('testing visualization utils')
    results = doctest.testmod(visualizeutils)
    if results.failed == 0: # pragma: no cover
        print('success!')
        succeeded += 1

    print('testing main processing pipeline')
    from . import heartpy as hptester
    results = doctest.testmod(hptester)
    if results.failed == 0: # pragma: no cover
        print('success!')
        succeeded += 1

    if succeeded == 8: # pragma: no cover
        print('all tests passed, ready to go!')
    else:
        print('some tests failed...')