Skip to content

Numerical

get_extrema_date_from_spline(spline, type='max', thresh=None, n_extrema=2)

Given a spline, this returns the dates (x variable) corresponding to the maxima or minima.

Parameters:

Name Type Description Default
spline CubicSpline

spline to find extrema of

required
type str

which extrema to find ('max' or 'min')

'max'
thresh Optional[float]

Only keep maxima (minima) with values above (below) this.

None
n_extrema int

Keep at most this many extrema, if more than this then will only keep highest (lowest).

2
Source code in isca_tools/utils/numerical.py
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def get_extrema_date_from_spline(spline: CubicSpline, type: str = 'max', thresh: Optional[float] = None,
                                 n_extrema: int = 2) -> np.ndarray:
    """
    Given a spline, this returns the dates (x variable) corresponding to the maxima or minima.

    Args:
        spline: spline to find extrema of
        type: which extrema to find ('max' or 'min')
        thresh: Only keep maxima (minima) with values above (below) this.
        n_extrema: Keep at most this many extrema, if more than this then will only keep highest (lowest).
    """
    extrema_date = spline.derivative().roots(extrapolate=False)
    if type == 'max':
        extrema_date = extrema_date[spline(extrema_date, 2) < 0]  # maxima have a negative second derivative
    elif type == 'min':
        extrema_date = extrema_date[spline(extrema_date, 2) > 0]  # minima have a positive second derivative
    else:
        raise ValueError('type is not valid, it should be max or min')
    extrema_values = spline(extrema_date)
    if thresh is not None:
        # Only keep maxima with value above threshold
        if type == 'max':
            keep = extrema_values > thresh
        elif type == 'min':
            keep = extrema_values < thresh
        extrema_date = extrema_date[keep]
        extrema_values = extrema_values[keep]
    if len(extrema_date) > n_extrema:
        if type == 'max':
            keep_ind = np.argsort(extrema_values)[-n_extrema:]
        elif type == 'min':
            keep_ind = np.argsort(extrema_values)[:n_extrema]
        extrema_date = extrema_date[keep_ind]
    return extrema_date

get_var_extrema_date(time, var, smooth_window=1, type='max', thresh_extrema=None, max_extrema=2, smooth_method='convolve')

Finds the dates of extrema of a variable, given some smoothing is performed first. Also returns the splines themselves.

Parameters:

Name Type Description Default
time ndarray

float [n_time]
Time in days (assumes periodic e.g. annual mean, so time = np.arange(360))

required
var ndarray

float [n_time]
Value of variable at each time. Again, assume periodic

required
smooth_window int

Number of time steps to use to smooth var before finding extrema. Smaller equals more accurate fit. 1 is perfect fit.

1
type str

which extrema to find ('max' or 'min')

'max'
thresh_extrema Optional[float]

Only keep maxima (minima) with values above (below) this.

None
max_extrema int

Keep at most this many extrema, if more than this then will only keep highest (lowest).

2
smooth_method str

convolve or spline
If convolve, will smooth via convolution with window of length smooth_window. If spline, will fit a spline using every smooth_window values of time.

'convolve'

Returns:

Type Description
ndarray

extrema_date: float [max_extrema]
Dates of extrema of var

CubicSpline

spline_var: Spline fit to var to find the extrema.

Source code in isca_tools/utils/numerical.py
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
def get_var_extrema_date(time: np.ndarray, var: np.ndarray, smooth_window: int = 1,
                         type: str = 'max', thresh_extrema: Optional[float] = None,
                         max_extrema: int = 2, smooth_method: str = 'convolve') -> Tuple[np.ndarray, CubicSpline]:
    """
    Finds the dates of extrema of a variable, given some smoothing is performed first.
    Also returns the splines themselves.

    Args:
        time: `float [n_time]`</br>
            Time in days (assumes periodic e.g. annual mean, so `time = np.arange(360)`)
        var: `float [n_time]`</br>
            Value of variable at each time. Again, assume periodic
        smooth_window: Number of time steps to use to smooth `var` before finding extrema.
            Smaller equals more accurate fit. `1` is perfect fit.
        type: which extrema to find ('max' or 'min')
        thresh_extrema: Only keep maxima (minima) with values above (below) this.
        max_extrema: Keep at most this many extrema, if more than this then will only keep highest (lowest).
        smooth_method: `convolve` or `spline`</br>
            If `convolve`, will smooth via convolution with window of length `smooth_window`.
            If `spline`, will fit a spline using every `smooth_window` values of `time`.

    Returns:
        `extrema_date`: `float [max_extrema]`</br>
            Dates of extrema of var
        `spline_var`: Spline fit to var to find the extrema.
    """
    if smooth_method.lower() == 'spline':
        # Make so last element of arrays equal first as periodic
        time_smooth = np.append(time, time[-1]+1)[::smooth_window]
        var_smooth = np.append(var, var[0])[::smooth_window]
    elif smooth_method.lower() == 'convolve':
        var_smooth = scipy.ndimage.convolve(var, np.ones(smooth_window) / smooth_window, mode='wrap')
        time_smooth = np.append(time, time[-1] + 1)
        var_smooth = np.append(var_smooth, var_smooth[0])
    else:
        raise  ValueError('smooth_method must be either spline or convolve')
    # Spline var is the spline replicating var_smooth exactly i.e. spline_var(t) = var_smooth[t] if t in time_smooth
    spline_var = CubicSpline(time_smooth, var_smooth, bc_type='periodic')
    extrema_date = get_extrema_date_from_spline(spline_var, type, thresh_extrema, max_extrema)
    return extrema_date, spline_var

get_var_shift(x, shift_time=None, shift_phase=None, time=None, time_start=None, time_end=None)

Returns the periodic variable \(x(t-t_{shift})\) where \(t\)=time, and \(t_{shift}=\)shift_time. If shift_phase is provided, will set shift_time = shift_phase * period.

Parameters:

Name Type Description Default
x ndarray

float [n_x]
\(x\) variable such that x[i] is the value of \(x\) at time time[i].

required
shift_time Optional[float]

How much to shift \(x\) by in units of time.

None
shift_phase Optional[float]

Fraction of period to shift \(x\) by.

None
time Optional[ndarray]

float [n_x]
Time such that x[i] is \(x\) at time time[i]. If time provided, will use spline to apply shift to \(x\). If time not provided, assume time is np.arange(n_x), and will use np.roll to apply shift to \(x\).

None
time_start Optional[float]

Start time such that period is given by time_end - time_start + 1. If not provided, will set to min value in time.

None
time_end Optional[float]

End time such that period is given by time_end - time_start + 1. If not provided, will set to max value in time.

None

Returns:

Name Type Description
x_shift ndarray

float [n_x]
\(x\) variable shifted in time such that x_shift[i] is value of \(x\) at time time[i] - shift_time.

Source code in isca_tools/utils/numerical.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def get_var_shift(x: np.ndarray, shift_time: Optional[float]=None, shift_phase: Optional[float]=None,
                time: Optional[np.ndarray] = None, time_start: Optional[float] = None,
                time_end: Optional[float] = None) -> np.ndarray:
    """
    Returns the periodic variable $x(t-t_{shift})$ where $t$=`time`, and $t_{shift}=$`shift_time`.
    If `shift_phase` is provided, will set `shift_time = shift_phase * period`.

    Args:
        x: `float [n_x]`</br>
            $x$ variable such that `x[i]` is the value of $x$ at time `time[i]`.
        shift_time: How much to shift $x$ by in units of `time`.
        shift_phase: Fraction of period to shift $x$ by.
        time: `float [n_x]`</br>
            Time such that `x[i]` is $x$ at time `time[i]`.</n>
            If `time` provided, will use spline to apply shift to $x$.</n>
            If `time` not provided, assume time is `np.arange(n_x)`, and will use `np.roll` to apply shift to $x$.
        time_start: Start time such that period is given by `time_end - time_start + 1`.
            If not provided, will set to min value in `time`.
        time_end: End time such that period is given by `time_end - time_start + 1`.
            If not provided, will set to max value in `time`.

    Returns:
        x_shift: `float [n_x]`</br>
            $x$ variable shifted in time such that `x_shift[i]` is value of $x$ at time `time[i] - shift_time`.
    """
    if time is not None:
        ind = np.argsort(time)
        if time_start is None:
            time_start = time[ind][0]
        if time_end is None:
            time_end = time[ind][-1]
        if time[ind][0] < time_start:
            raise ValueError(f'Min time={time[ind][0]} is less than time_start={time_start}')
        if time[ind][-1] > time_end:
            raise ValueError(f'Max time={time[ind][-1]} is greater than time_end={time_end}')
        x_spline_fit = CubicSpline(np.append(time[ind], time_end+time[ind][0]-time_start+1), np.append(x[ind], x[ind][0]),
                                   bc_type='periodic')
        period = time_end - time_start + 1
        if shift_phase is not None:
            shift_time = shift_phase * period
        x_shift = x_spline_fit(time - shift_time)
    else:
        if shift_phase is not None:
            shift_time = shift_phase * x.size
        if int(np.round(shift_time)) != shift_time:
            raise ValueError(f'shift_time={shift_time} is not a whole number - '
                             f'may be better using spline by providing time.')
        x_shift = np.roll(x, int(np.round(shift_time)))
    return x_shift

polyfit_phase(x, y, deg, time=None, time_start=None, time_end=None, deg_phase_calc=10, resample=False, include_phase=True, fourier_harmonics=None, integ_method='spline')

This fits a polynomial y_approx(x) = p[0] * x**deg + ... + p[deg] of degree deg to points (x, y) as np.polyfit but also includes additional phase shift term such that the total approximation for y is:

\(y_{approx} = \frac{1}{2} \lambda_{phase}(x(t-T/4) - x(t+T/4)) + \sum_{n=0}^{n_{deg}} \lambda_n x^n\)

where \(\lambda_n=\)poly_coefs[-1-n] and \(\lambda_{phase}=\)poly_coefs[0]. \(x\) is assumed periodic with period \(T=\)time[-1]-time[0]+time_spacing.

The phase component, \(y_{phase}=\frac{1}{2} \lambda_{phase}(x(t-T/4) - x(t+T/4))\), is found first from the residual of \(y-y_{best}\), where \(y_{best}\) is the polynomial approximation of degree deg_phase_calc.

\(\sum_{n=0}^{n_{deg}} \lambda_n x^n\) is then found by doing the normal polynomial approximation of degree deg to the residual \(y-y_{phase}\).

If fourier_harmonics is provided, then a fourier series will also be added to \(y_{approx}\) containing all harmonics, \(n\), in fourier_harmonics:

\(y_{approx} + \sum_{n} F_n\cos(2n\pi ft - \Phi_n)\)

The idea behind this is to account for part of \(y\) not directly related to \(x\).

Parameters:

Name Type Description Default
x ndarray

float [n_x]
\(x\) coordinates used to approximate \(y\). x[i] is value at time time[i].

required
y ndarray

float [n_x]
\(y\) coordinate correesponding to each \(x\).

required
deg int

Degree of the fitting polynomial. If negative, will only do the phase fitting.

required
time Optional[ndarray]

float [n_x]
Time such that x[i] is \(x\) and y[i] is \(y\) at time time[i]. If time provided, will use spline to apply phase shift to \(x\). If time not provided, assume time is np.arange(n_x), and will use np.roll to apply phase shift to \(x\).

None
time_start Optional[float]

Start time such that period is given by time_end - time_start + 1. If not provided, will set to min value in time.

None
time_end Optional[float]

End time such that period is given by time_end - time_start + 1. If not provided, will set to max value in time.

None
deg_phase_calc int

Degree of the fitting polynomial to use in the phase term calculation. Should be a large integer.

10
resample bool

If True, will use resample_data to resample x and y before each calling of np.polyfit.

False
include_phase bool

If False, will only call np.polyfit, but first return value will be 0 indicating no phase shift. Only makes sense to call this rather than np.polyfit if you want to use resample.

True
fourier_harmonics Optional[Union[int, ndarray]]

int [n_harmonics_include]
After applied polynomial of degree deg_phase_calc and fit the phase factor, a residual is obtained y_residual = y - y_best - y_phase. If fourier_harmonics is provided, a fourier series will be directly fit to this residual including all the harmonics in fourier_harmonics. If fourier_harmonics is an integer, all harmonics up to and including the value will be fit.
The final polynomial of degree deg will then be fit to y - y_phase - y_fourier.
Idea behind this is to account for part of \(y\) not directly related to \(x\).

None
integ_method str

How to perform the integration when calculating Fourier coefficients..
If spline, will fit a spline and then integrate the spline, otherwise will use scipy.integrate.simpson.

'spline'

Returns:

Name Type Description
poly_coefs Union[ndarray, Tuple[ndarray, ndarray, ndarray]]

float [n_deg+2] Polynomial coefficients, phase first and then normal output of np.polyfit with lowest power last.

coefs_fourier_amp Union[ndarray, Tuple[ndarray, ndarray, ndarray]]

float [fourier_harmonics.max()+1]
coefs_fourier_amp[n] is the amplitude fourier coefficient for the \(n^{th}\) harmonic.
First value will be zero, because \(0^{th}\) harmonic is just a constant and so will be found in polynomial fitting.
Only returned if fourier_harmonics is provided.

coefs_fourier_phase Union[ndarray, Tuple[ndarray, ndarray, ndarray]]

float [fourier_harmonics.max()+1]
coefs_fourier_phase[n] is the phase fourier coefficient for the \((n+1)^{th}\) harmonic.
Only returned if fourier_harmonics is provided.

Source code in isca_tools/utils/numerical.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def polyfit_phase(x: np.ndarray, y: np.ndarray,
                  deg: int, time: Optional[np.ndarray] = None, time_start: Optional[float] = None,
                  time_end: Optional[float] = None,
                  deg_phase_calc: int = 10, resample: bool = False,
                  include_phase: bool = True, fourier_harmonics: Optional[Union[int, np.ndarray]] = None,
                  integ_method: str = 'spline') -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
    """
    This fits a polynomial `y_approx(x) = p[0] * x**deg + ... + p[deg]` of degree `deg` to points (x, y) as `np.polyfit`
    but also includes additional phase shift term such that the total approximation for y is:

    $y_{approx} = \\frac{1}{2} \lambda_{phase}(x(t-T/4) - x(t+T/4)) + \sum_{n=0}^{n_{deg}} \lambda_n x^n$

    where $\lambda_n=$`poly_coefs[-1-n]` and $\lambda_{phase}=$`poly_coefs[0]`.
    $x$ is assumed periodic with period $T=$`time[-1]-time[0]+time_spacing`.

    The phase component, $y_{phase}=\\frac{1}{2} \lambda_{phase}(x(t-T/4) - x(t+T/4))$, is found first from the
    residual of $y-y_{best}$, where $y_{best}$ is the polynomial approximation of degree `deg_phase_calc`.

    $\sum_{n=0}^{n_{deg}} \lambda_n x^n$ is then found by doing the normal polynomial approximation of degree
    `deg` to the residual $y-y_{phase}$.

    If `fourier_harmonics` is provided, then a fourier series will also be added to $y_{approx}$ containing all
    harmonics, $n$, in `fourier_harmonics`:

    $y_{approx} + \\sum_{n} F_n\\cos(2n\\pi ft - \\Phi_n)$

    The idea behind this is to account for part of $y$ not directly related to $x$.

    Args:
        x: `float [n_x]`</br>
            $x$ coordinates used to approximate $y$. `x[i]` is value at time `time[i]`.
        y: `float [n_x]`</br>
            $y$ coordinate correesponding to each $x$.
        deg: Degree of the fitting polynomial. If negative, will only do the phase fitting.
        time: `float [n_x]`</br>
            Time such that `x[i]` is $x$ and `y[i]` is $y$ at time `time[i]`.</n>
            If time provided, will use spline to apply phase shift to $x$.</n>
            If time not provided, assume time is `np.arange(n_x)`, and will use `np.roll` to apply phase shift to $x$.
        time_start: Start time such that period is given by `time_end - time_start + 1`.
            If not provided, will set to min value in `time`.
        time_end: End time such that period is given by `time_end - time_start + 1`.
            If not provided, will set to max value in `time`.
        deg_phase_calc: Degree of the fitting polynomial to use in the phase term calculation.
            Should be a large integer.
        resample: If `True`, will use `resample_data` to resample x and y before each calling of
            `np.polyfit`.
        include_phase: If `False`, will only call `np.polyfit`, but first return value will be 0 indicating
            no phase shift. Only makes sense to call this rather than `np.polyfit` if you want to use `resample`.
        fourier_harmonics: `int [n_harmonics_include]`</br>
            After applied polynomial of degree `deg_phase_calc` and fit the phase factor, a residual is obtained
            `y_residual = y - y_best - y_phase`. If `fourier_harmonics` is provided, a fourier
            series will be directly fit to this residual including all the harmonics in `fourier_harmonics`.
            If `fourier_harmonics` is an integer, all harmonics up to and including the value will be fit.</br>
            The final polynomial of degree `deg` will then be fit to `y - y_phase - y_fourier`.</br>
            Idea behind this is to account for part of $y$ not directly related to $x$.
        integ_method: How to perform the integration when calculating Fourier coefficients..</br>
            If `spline`, will fit a spline and then integrate the spline, otherwise will use `scipy.integrate.simpson`.

    Returns:
        poly_coefs: `float [n_deg+2]`
            Polynomial coefficients, phase first and then normal output of `np.polyfit` with lowest power last.
        coefs_fourier_amp: `float [fourier_harmonics.max()+1]`</br>
            `coefs_fourier_amp[n]` is the amplitude fourier coefficient for the $n^{th}$ harmonic.</br>
            First value will be zero, because $0^{th}$ harmonic is just a constant and so will be found in
            polynomial fitting.</br>
            Only returned if `fourier_harmonics` is provided.
        coefs_fourier_phase: `float [fourier_harmonics.max()+1]`</br>
            `coefs_fourier_phase[n]` is the phase fourier coefficient for the $(n+1)^{th}$ harmonic.</br>
            Only returned if `fourier_harmonics` is provided.
    """
    coefs = np.zeros(np.clip(deg, 0, 1000) + 2)  # first coef is phase coef
    if resample:
        x_fit, y_fit = resample_data(time, x, y)[1:]
    else:
        x_fit = x
        y_fit = y
    if not include_phase:
        coefs[1:] = np.polyfit(x_fit, y_fit, deg)       # don't do phase stuff so 1st value is 0
    else:
        y_best_polyfit = np.polyval(np.polyfit(x_fit, y_fit, deg_phase_calc), x)
        x_shift = 0.5 * (get_var_shift(x, shift_phase=0.25, time=time, time_start=time_start, time_end=time_end) -
                         get_var_shift(x, shift_phase=-0.25, time=time, time_start=time_start, time_end=time_end))
        if resample:
            x_shift_fit, y_residual_fit = resample_data(time, x_shift, y - y_best_polyfit)[1:]
        else:
            x_shift_fit = x_shift
            y_residual_fit = y - y_best_polyfit
        coefs[[0, -1]] = np.polyfit(x_shift_fit, y_residual_fit, 1)
        y_no_phase = y - polyval_phase(coefs, x, time, time_start, time_end)  # residual after removing phase dependent term

        if fourier_harmonics is not None:
            time_use = np.arange(x.size) if time is None else time
            if not all(np.ediff1d(time_use) == np.ediff1d(time_use)[0]):
                raise ValueError('Can only include Fourier with evenly spaced data')
            if isinstance(fourier_harmonics, int):
                # if int, use all harmonics up to value indicated but without 0th harmonic
                fourier_harmonics = np.arange(1, fourier_harmonics+1)
            coefs_fourier_amp = np.zeros(np.max(fourier_harmonics)+1)
            coefs_fourier_phase = np.zeros(np.max(fourier_harmonics))
            for n in fourier_harmonics:
                if n==0:
                    warnings.warn('Will not fit 0th harmonic as constant will be fit in polynomial')
                else:
                    coefs_fourier_amp[n], coefs_fourier_phase[n-1] = \
                        get_fourier_coef(time_use, y_no_phase - y_best_polyfit, n, integ_method)
            y_residual_fourier = fourier_series(time_use, coefs_fourier_amp, coefs_fourier_phase)
            y_no_phase = y_no_phase - y_residual_fourier

        if deg >= 0:
            if resample:
                x_fit, y_no_phase_fit = resample_data(time, x, y_no_phase)[1:]
            else:
                x_fit = x
                y_no_phase_fit = y_no_phase
            coefs[1:] += np.polyfit(x_fit, y_no_phase_fit, deg)

    if fourier_harmonics is None:
        return coefs
    else:
        return coefs, coefs_fourier_amp, coefs_fourier_phase

polyval_phase(poly_coefs, x, time=None, time_start=None, time_end=None, coefs_fourier_amp=None, coefs_fourier_phase=None)

Given the polynomial coefficients found by polyfit_phase for fitting a polynomial of degree len(polyfit)-2 of \(x\) to \(y\), this will return the approximation of \(y\):

\(y_{approx} = \frac{1}{2} \lambda_{phase}(x(t-T/4) - x(t+T/4)) + \sum_{n=0}^{n_{deg}} \lambda_n x^n\)

where \(\lambda_n=\)poly_coefs[-1-n], \(\lambda_{phase}=\)poly_coefs[0] and \(x\) is assumed periodic with period \(T\).

If coefs_fourier_amp is provided, then a fourier series will also be added to \(y_{approx}\):

\(y_{approx} + \frac{F_0}{2} + \sum_{n=1}^{N} F_n\cos(2n\pi ft - \Phi_n)\)

Parameters:

Name Type Description Default
poly_coefs ndarray

float [n_deg+2]
Polynomial coefficients as output by polyfit_phase, lowest power last.
\(\lambda_{phase}=\)poly_coefs[0] and \(\lambda_n=\)poly_coefs[-1-n].

required
x ndarray

float [n_x]
\(x\) coordinates used to approximate \(y\).

required
time Optional[ndarray]

float [n_x]
Time such that x[i] is \(x\) at time time[i]. If time provided, will use spline to apply shift to \(x\). If time not provided, assume time is np.arange(n_x), and will use np.roll to apply shift to \(x\).

None
time_start Optional[float]

Start time such that period is given by time_end - time_start + 1. If not provided, will set to min value in time.

None
time_end Optional[float]

End time such that period is given by time_end - time_start + 1. If not provided, will set to max value in time.

None
coefs_fourier_amp Optional[ndarray]

float [n_harmonics+1]
The amplitude Fourier coefficients \(F_n\).

None
coefs_fourier_phase Optional[ndarray]

float [n_harmonics]
The phase Fourier coefficients \(\Phi_n\).

None

Returns:

Name Type Description
y_approx ndarray

float [n_x]
Polynomial approximation to \(y\), possibly including phase and Fourier terms.

Source code in isca_tools/utils/numerical.py
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def polyval_phase(poly_coefs: np.ndarray, x: np.ndarray, time: Optional[np.ndarray] = None,
                  time_start: Optional[float] = None, time_end: Optional[float] = None,
                  coefs_fourier_amp: Optional[np.ndarray] = None,
                  coefs_fourier_phase: Optional[np.ndarray] = None) -> np.ndarray:
    """
    Given the polynomial coefficients found by `polyfit_phase` for fitting a polynomial
    of degree `len(polyfit)-2` of $x$ to $y$, this will return the approximation of $y$:

    $y_{approx} = \\frac{1}{2} \lambda_{phase}(x(t-T/4) - x(t+T/4)) + \sum_{n=0}^{n_{deg}} \lambda_n x^n$

    where $\lambda_n=$`poly_coefs[-1-n]`, $\lambda_{phase}=$`poly_coefs[0]` and
    $x$ is assumed periodic with period $T$.

    If `coefs_fourier_amp` is provided, then a fourier series will also be added to $y_{approx}$:

    $y_{approx} + \\frac{F_0}{2} + \\sum_{n=1}^{N} F_n\\cos(2n\\pi ft - \\Phi_n)$

    Args:
        poly_coefs: `float [n_deg+2]`</br>
            Polynomial coefficients as output by `polyfit_phase`, lowest power last.</br>
            $\lambda_{phase}=$`poly_coefs[0]` and $\lambda_n=$`poly_coefs[-1-n]`.
        x: `float [n_x]`</br>
            $x$ coordinates used to approximate $y$.
        time: `float [n_x]`</br>
            Time such that `x[i]` is $x$ at time `time[i]`.</n>
            If `time` provided, will use spline to apply shift to $x$.</n>
            If `time` not provided, assume time is `np.arange(n_x)`, and will use `np.roll` to apply shift to $x$.
        time_start: Start time such that period is given by `time_end - time_start + 1`.
            If not provided, will set to min value in `time`.
        time_end: End time such that period is given by `time_end - time_start + 1`.
            If not provided, will set to max value in `time`.
        coefs_fourier_amp: `float [n_harmonics+1]`</br>
            The amplitude Fourier coefficients $F_n$.
        coefs_fourier_phase: `float [n_harmonics]`</br>
            The phase Fourier coefficients $\\Phi_n$.

    Returns:
        y_approx: `float [n_x]`</br>
            Polynomial approximation to $y$, possibly including phase and Fourier terms.
    """
    # In this case, poly_coefs are output of polyfit_with_phase so first coefficient is the phase coefficient
    y_approx = np.polyval(poly_coefs[1:], x)
    x_shift = 0.5 * (get_var_shift(x, shift_phase=0.25, time=time, time_start=time_start, time_end=time_end) -
                     get_var_shift(x, shift_phase=-0.25, time=time, time_start=time_start, time_end=time_end))
    if coefs_fourier_amp is not None:
        time_use = np.arange(x.size) if time is None else time
        y_residual_fourier = fourier_series(time_use, coefs_fourier_amp, coefs_fourier_phase)
    else:
        y_residual_fourier = 0
    return y_approx + poly_coefs[0] * x_shift + y_residual_fourier

resample_data(time, x, y, x_return=None, n_return=None, bc_type='periodic', extrapolate=False)

Given that x[i] and y[i] both occur at time time[i], this resamples data to return values of y corresponding to x_return.

Parameters:

Name Type Description Default
time Optional[ndarray]

float [n_time]
Times such that x[i] and y[i] correspond to time time[i]. It assumes time has a spacing of 1, and starts with 0, so for a 360-day year, it would be np.arange(360).

required
x ndarray

float [n_time]
Value of variable \(x\) at each time.

required
y ndarray

float [n_time]
Value of variable \(y\) at each time.

required
x_return Optional[ndarray]

float [n_return]
Values of \(x\) for the resampled \(y\) data to be returned. If not provided, will use np.linspace(x.min(), x.max(), n_return).

None
n_return Optional[int]

Number of resampled data if x_return is not provided. Will set to n_time neither this nor x_return provided.

None
bc_type str

Boundary condition type in scipy.interpolate.CubicSpline.

'periodic'
extrapolate bool

Whether to extrapolate if any x_return outsidex is provided.

False

Returns:

Name Type Description
times_return ndarray

float [n_return_out]
Times corresponding to x_return. Not necessarily n_return values because can have multiple \(y\) values for each \(x\).

x_return_out ndarray

float [n_return_out]
\(x\) values corresponding to times_return. Will only contain values in x_return, but may contain multiple of each.

y_return ndarray

float [n_return_out]
\(y\) values corresponding to times_return and x_return_out.

Source code in isca_tools/utils/numerical.py
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
def resample_data(time: Optional[np.ndarray], x: np.ndarray, y: np.ndarray, x_return: Optional[np.ndarray] = None,
                  n_return: Optional[int] = None, bc_type: str = 'periodic',
                  extrapolate: bool=False) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Given that `x[i]` and `y[i]` both occur at time `time[i]`, this resamples data to return values of `y`
    corresponding to `x_return`.

    Args:
        time: `float [n_time]`</br>
            Times such that `x[i]` and `y[i]` correspond to time `time[i]`. It assumes time has a spacing of 1, and
            starts with 0, so for a 360-day year, it would be `np.arange(360)`.
        x: `float [n_time]`</br>
            Value of variable $x$ at each time.
        y: `float [n_time]`</br>
            Value of variable $y$ at each time.
        x_return: `float [n_return]`</br>
            Values of $x$ for the resampled $y$ data to be returned. If not provided, will use
            `np.linspace(x.min(), x.max(), n_return)`.
        n_return: Number of resampled data if `x_return` is not provided. Will set to `n_time` neither this
            nor `x_return` provided.
        bc_type: Boundary condition type in `scipy.interpolate.CubicSpline`.
        extrapolate: Whether to extrapolate if any `x_return` outside`x` is provided.

    Returns:
        times_return: `float [n_return_out]`</br>
            Times corresponding to `x_return`. Not necessarily `n_return` values because can have multiple $y$
            values for each $x$.
        x_return_out: `float [n_return_out]`</br>
            $x$ values corresponding to `times_return`. Will only contain values in `x_return`, but may contain
            multiple of each.
        y_return: `float [n_return_out]`</br>
            $y$ values corresponding to `times_return` and `x_return_out`.
    """
    if n_return is None:
        n_return = x.size
    if time is None:
        time = np.arange(x.size)
    time_spacing = np.median(np.ediff1d(time))
    if 'periodic' in bc_type:
        x_spline = CubicSpline(np.append(time, [time[-1]+time_spacing]), np.append(x, x[0]),
                               bc_type=bc_type)
        y_spline = CubicSpline(np.append(time, [time[-1]+time_spacing]), np.append(y, y[0]),
                               bc_type=bc_type)
    else:
        x_spline = CubicSpline(time, x, bc_type=bc_type)
        y_spline = CubicSpline(time, y, bc_type=bc_type)
    if x_return is None:
        x_return = np.linspace(x.min(), x.max(), n_return)
    times_return = []
    for i in range(x_return.size):
        times_return += [*x_spline.solve(x_return[i], extrapolate=extrapolate)]
    times_return = np.asarray(times_return) % time[-1]      # make return times between 0 and time[-1]
    return times_return, x_spline(times_return), y_spline(times_return)

resample_data_distance(time, x, y, n_return=None, bc_type='periodic', norm=False)

Given that x[i] and y[i] both occur at time time[i], this resamples data to return n_return values of \(x\) and \(y\) evenly spaced along the line connecting all (x, y) coordinates.

Parameters:

Name Type Description Default
time Optional[ndarray]

float [n_time]
Times such that x[i] and y[i] correspond to time time[i].
If time not provided, assume time is np.arange(n_x).

required
x ndarray

float [n_time]
Value of variable \(x\) at each time.

required
y ndarray

float [n_time]
Value of variable \(y\) at each time.

required
n_return Optional[int]

Number of resampled data, will set to n_time if not provided.

None
bc_type str

Boundary condition type in scipy.interpolate.CubicSpline for x and y.

'periodic'
norm bool

If True will normalize x and y so both have a range of 1, before calculating distance along line.

False

Returns:

Name Type Description
times_return ndarray

float [n_return]
Times of returned \(x\) and \(y\) such that they are evenly spaced along line connecting all input (x, y) coordinates.

x_return_out ndarray

float [n_return_out]
\(x\) values corresponding to times_return.

y_return ndarray

float [n_return_out]
\(y\) values corresponding to times_return and x_return.

Source code in isca_tools/utils/numerical.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def resample_data_distance(time: Optional[np.ndarray], x: np.ndarray, y: np.ndarray, n_return: Optional[int] = None,
                           bc_type: str = 'periodic', norm: bool = False) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Given that `x[i]` and `y[i]` both occur at time `time[i]`, this resamples data to return `n_return` values of
    $x$ and $y$ evenly spaced along the line connecting all (x, y) coordinates.

    Args:
        time: `float [n_time]`</br>
            Times such that `x[i]` and `y[i]` correspond to time `time[i]`.</br>
            If time not provided, assume time is `np.arange(n_x)`.
        x: `float [n_time]`</br>
            Value of variable $x$ at each time.
        y: `float [n_time]`</br>
            Value of variable $y$ at each time.
        n_return:  Number of resampled data, will set to `n_time` if not provided.
        bc_type: Boundary condition type in `scipy.interpolate.CubicSpline` for `x` and `y`.
        norm: If `True` will normalize `x` and `y` so both have a range of 1, before calculating distance along line.

    Returns:
        times_return: `float [n_return]`</br>
            Times of returned $x$ and $y$ such that they are evenly spaced along line connecting all input
            (x, y) coordinates.
        x_return_out: `float [n_return_out]`</br>
            $x$ values corresponding to `times_return`.
        y_return: `float [n_return_out]`</br>
            $y$ values corresponding to `times_return` and `x_return`.
    """
    if n_return is None:
        n_return = x.size
    if time is None:
        time = np.arange(x.size)
    time_spacing = np.median(np.ediff1d(time))
    # dist[i] is distance along line from (x[0], y[0]) at time=time[i]
    if norm:
        coords_dist_calc = np.vstack((x / (np.max(x) - np.min(x)), y / (np.max(y) - np.min(y))))
    else:
        coords_dist_calc = np.vstack((x, y))
    dist = np.append(0, np.cumsum(np.sqrt(np.sum(np.diff(coords_dist_calc, axis=1) ** 2, axis=0))))
    x_spline = CubicSpline(np.append(time, [time[-1] + time_spacing]), np.append(x, x[0]),
                                             bc_type=bc_type)
    y_spline = CubicSpline(np.append(time, [time[-1] + time_spacing]), np.append(y, y[0]),
                                             bc_type=bc_type)
    dist_spline = CubicSpline(time, dist)
    dist_return = np.linspace(dist[0], dist[-1], n_return)
    # Adjust first and last values by tiny amount, to ensure that within the range when trying to solve
    small = 0.0001 * (dist_return[1]-dist_return[0])
    dist_return[0] += small
    dist_return[-1] -= small
    time_resample = np.zeros(n_return)
    for i in range(n_return):
        time_resample[i] = dist_spline.solve(dist_return[i], extrapolate=False)[0]
    return time_resample, x_spline(time_resample), y_spline(time_resample)

spline_integral(x, dy_dx, y0=0, x0=None, x_return=None, periodic=False)

Uses spline integration to solve for \(y\) given \(\frac{dy}{dx}\) such that \(y=y_0\) at \(x=x_0\).

Parameters:

Name Type Description Default
x ndarray

float [n_x]
Values of \(x\) where \(\frac{dy}{dx}\) given, and used to fit the spline.

required
dy_dx ndarray

float [n_x]
Values of \(\frac{dy}{dx}\) corresponding to \(x\), and used to fit the spline.

required
y0 float

Boundary condition, \(y(x_0)=y_0\).

0
x0 Optional[float]

Boundary condition, \(y(x_0)=y_0\).
If not given, will assume x0=x_return[0].

None
x_return Optional[ndarray]

float [n_x_return]
Values of \(y\) are returned for these \(x\) values. If not given, will set to x.

None
periodic bool

Whether to use periodic boundary condition.
If periodic expect \(\frac{dy}{dx}\) at \(x=\)x[-1]+x_spacing is equal to dy_dx[0].

False

Returns:

Name Type Description
y_return ndarray

float [n_x_return]
Values of \(y\) corresponding to `x_return.

Source code in isca_tools/utils/numerical.py
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
def spline_integral(x: np.ndarray, dy_dx: np.ndarray, y0: float = 0, x0: Optional[float] = None,
                    x_return: Optional[np.ndarray] = None,
                    periodic: bool = False) -> np.ndarray:
    """
    Uses spline integration to solve for $y$ given $\\frac{dy}{dx}$ such that $y=y_0$ at $x=x_0$.

    Args:
        x: `float [n_x]`</br>
            Values of $x$ where $\\frac{dy}{dx}$ given, and used to fit the spline.
        dy_dx: `float [n_x]`</br>
            Values of $\\frac{dy}{dx}$ corresponding to $x$, and used to fit the spline.
        y0: Boundary condition, $y(x_0)=y_0$.
        x0: Boundary condition, $y(x_0)=y_0$.</br>
            If not given, will assume `x0=x_return[0]`.
        x_return: `float [n_x_return]`</br>
            Values of $y$ are returned for these $x$ values. If not given, will set to `x`.
        periodic: Whether to use periodic boundary condition.</br>
            If periodic expect $\\frac{dy}{dx}$ at $x=$`x[-1]+x_spacing` is equal to `dy_dx[0]`.

    Returns:
        y_return: `float [n_x_return]`</br>
            Values of $y$ corresponding to `x_return.
    """
    if periodic:
        x_spacing = np.median(np.ediff1d(x))
        spline_use = CubicSpline(np.append(x, x[-1] + x_spacing), np.append(dy_dx, dy_dx[0]), bc_type='periodic')
    else:
        spline_use = CubicSpline(x, dy_dx)
    if x_return is None:
        x_return = x
    if x0 is None:
        x0 = x_return[0]
    y = np.full_like(x_return, y0, dtype=float)
    for i in range(x_return.size):
        y[i] += spline_use.integrate(x0, x_return[i], extrapolate='periodic' if periodic else None)
    return y