Skip to content

Commit 60f77db

Browse files
Add param isconstant to stimp (#878)
* add test function * add param isconstant to stimp * add test function for stimped * minor fixes * fix non-normalized
1 parent 58f24c7 commit 60f77db

File tree

2 files changed

+220
-14
lines changed

2 files changed

+220
-14
lines changed

stumpy/stimp.py

Lines changed: 83 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,16 @@ class _stimp:
5050
T : numpy.ndarray
5151
The time series or sequence for which to compute the pan matrix profile
5252
53-
m_start : int, default 3
53+
min_m : int, default 3
5454
The starting (or minimum) subsequence window size for which a matrix profile
5555
may be computed
5656
57-
m_stop : int, default None
57+
max_m : int, default None
5858
The stopping (or maximum) subsequence window size for which a matrix profile
59-
may be computed. When `m_stop = Non`, this is set to the maximum allowable
59+
may be computed. When `max_m = Non`, this is set to the maximum allowable
6060
subsequence window size
6161
62-
m_step : int, default 1
62+
step : int, default 1
6363
The step between subsequence window sizes
6464
6565
percentage : float, default 0.01
@@ -87,6 +87,14 @@ class _stimp:
8787
mp_func : function, default stump
8888
The matrix profile function to use when `percentage = 1.0`
8989
90+
T_subseq_isconstant_func : function, default None
91+
A custom, user-defined function that returns a boolean array that indicates
92+
whether a subsequence in `T` is constant (True). The function must only take
93+
two arguments, `a`, a 1-D array, and `w`, the window size, while additional
94+
arguments may be specified by currying the user-defined function using
95+
`functools.partial`. Any subsequence with at least one np.nan/np.inf will
96+
automatically have its corresponding value set to False in this boolean array.
97+
9098
Attributes
9199
----------
92100
PAN_ : numpy.ndarray
@@ -122,6 +130,7 @@ def __init__(
122130
client=None,
123131
device_id=None,
124132
mp_func=stump,
133+
T_subseq_isconstant_func=None,
125134
):
126135
"""
127136
Initialize the `stimp` object and compute the Pan Matrix Profile
@@ -167,6 +176,15 @@ def __init__(
167176
168177
mp_func : function, default stump
169178
The matrix profile function to use when `percentage = 1.0`
179+
180+
T_subseq_isconstant_func : function, default None
181+
A custom, user-defined function that returns a boolean array that indicates
182+
whether a subsequence in `T` is constant (True). The function must only take
183+
two arguments, `a`, a 1-D array, and `w`, the window size, while additional
184+
arguments may be specified by currying the user-defined function using
185+
`functools.partial`. Any subsequence with at least one np.nan/np.inf will
186+
automatically have its corresponding value set to False in this boolean
187+
array.
170188
"""
171189
self._T = T.copy()
172190
if max_m is None:
@@ -189,6 +207,16 @@ def __init__(
189207
mp_func, client=client, device_id=device_id
190208
)
191209

210+
if T_subseq_isconstant_func is None:
211+
T_subseq_isconstant_func = core._rolling_isconstant
212+
if not callable(T_subseq_isconstant_func): # pragma: no cover
213+
msg = (
214+
"`T_subseq_isconstant_func` was expected to be a callable function "
215+
+ f"but {type(T_subseq_isconstant_func)} was found."
216+
)
217+
raise ValueError(msg)
218+
self._T_subseq_isconstant_func = T_subseq_isconstant_func
219+
192220
self._PAN = np.full(
193221
(self._M.shape[0], self._T.shape[0]), fill_value=np.inf, dtype=np.float64
194222
)
@@ -223,6 +251,7 @@ def update(self):
223251
percentage=self._percentage,
224252
pre_scrump=self._pre_scrump,
225253
k=1,
254+
T_A_subseq_isconstant=self._T_subseq_isconstant_func,
226255
)
227256
approx.update()
228257
self._PAN[
@@ -233,6 +262,7 @@ def update(self):
233262
self._T,
234263
m,
235264
ignore_trivial=True,
265+
T_A_subseq_isconstant=self._T_subseq_isconstant_func,
236266
)
237267
self._PAN[
238268
self._bfs_indices[self._n_processed], : out[:, 0].shape[0]
@@ -347,7 +377,7 @@ def M_(self):
347377

348378
@core.non_normalized(
349379
aamp_stimp,
350-
exclude=["pre_scrump", "normalize", "p", "pre_scraamp"],
380+
exclude=["pre_scrump", "normalize", "p", "T_subseq_isconstant_func", "pre_scraamp"],
351381
replace={"pre_scrump": "pre_scraamp"},
352382
)
353383
class stimp(_stimp):
@@ -361,16 +391,16 @@ class stimp(_stimp):
361391
T : numpy.ndarray
362392
The time series or sequence for which to compute the pan matrix profile
363393
364-
m_start : int, default 3
394+
min_m : int, default 3
365395
The starting (or minimum) subsequence window size for which a matrix profile
366396
may be computed
367397
368-
m_stop : int, default None
398+
max_m : int, default None
369399
The stopping (or maximum) subsequence window size for which a matrix profile
370-
may be computed. When `m_stop = Non`, this is set to the maximum allowable
400+
may be computed. When `max_m = Non`, this is set to the maximum allowable
371401
subsequence window size
372402
373-
m_step : int, default 1
403+
step : int, default 1
374404
The step between subsequence window sizes
375405
376406
percentage : float, default 0.01
@@ -393,6 +423,14 @@ class stimp(_stimp):
393423
The p-norm to apply for computing the Minkowski distance. This parameter is
394424
ignored when `normalize == True`.
395425
426+
T_subseq_isconstant_func : function, default None
427+
A custom, user-defined function that returns a boolean array that indicates
428+
whether a subsequence in `T` is constant (True). The function must only take
429+
two arguments, `a`, a 1-D array, and `w`, the window size, while additional
430+
arguments may be specified by currying the user-defined function using
431+
`functools.partial`. Any subsequence with at least one np.nan/np.inf will
432+
automatically have its corresponding value set to False in this boolean array.
433+
396434
Attributes
397435
----------
398436
PAN_ : numpy.ndarray
@@ -442,6 +480,7 @@ def __init__(
442480
pre_scrump=True,
443481
normalize=True,
444482
p=2.0,
483+
T_subseq_isconstant_func=None,
445484
):
446485
"""
447486
Initialize the `stimp` object and compute the Pan Matrix Profile
@@ -483,6 +522,15 @@ def __init__(
483522
p : float, default 2.0
484523
The p-norm to apply for computing the Minkowski distance. This parameter is
485524
ignored when `normalize == True`.
525+
526+
T_subseq_isconstant_func : function, default None
527+
A custom, user-defined function that returns a boolean array that indicates
528+
whether a subsequence in `T` is constant (True). The function must only take
529+
two arguments, `a`, a 1-D array, and `w`, the window size, while additional
530+
arguments may be specified by currying the user-defined function using
531+
`functools.partial`. Any subsequence with at least one np.nan/np.inf will
532+
automatically have its corresponding value set to False in this boolean
533+
array.
486534
"""
487535
super().__init__(
488536
T,
@@ -492,12 +540,13 @@ def __init__(
492540
percentage=percentage,
493541
pre_scrump=pre_scrump,
494542
mp_func=stump,
543+
T_subseq_isconstant_func=T_subseq_isconstant_func,
495544
)
496545

497546

498547
@core.non_normalized(
499548
aamp_stimped,
500-
exclude=["pre_scrump", "normalize", "p", "pre_scraamp"],
549+
exclude=["pre_scrump", "normalize", "p", "T_subseq_isconstant_func", "pre_scraamp"],
501550
replace={"pre_scrump": "pre_scraamp"},
502551
)
503552
class stimped(_stimp):
@@ -516,16 +565,16 @@ class stimped(_stimp):
516565
T : numpy.ndarray
517566
The time series or sequence for which to compute the pan matrix profile
518567
519-
m_start : int, default 3
568+
min_m : int, default 3
520569
The starting (or minimum) subsequence window size for which a matrix profile
521570
may be computed
522571
523-
m_stop : int, default None
572+
max_m : int, default None
524573
The stopping (or maximum) subsequence window size for which a matrix profile
525-
may be computed. When `m_stop = Non`, this is set to the maximum allowable
574+
may be computed. When `max_m = Non`, this is set to the maximum allowable
526575
subsequence window size
527576
528-
m_step : int, default 1
577+
step : int, default 1
529578
The step between subsequence window sizes
530579
531580
normalize : bool, default True
@@ -537,6 +586,15 @@ class stimped(_stimp):
537586
The p-norm to apply for computing the Minkowski distance. This parameter is
538587
ignored when `normalize == True`.
539588
589+
T_subseq_isconstant_func : function, default None
590+
A custom, user-defined function that returns a boolean array that indicates
591+
whether a subsequence in `T` is constant (True). The function must only take
592+
two arguments, `a`, a 1-D array, and `w`, the window size, while additional
593+
arguments may be specified by currying the user-defined function using
594+
`functools.partial`. Any subsequence with at least one np.nan/np.inf will
595+
automatically have its corresponding value set to False in this boolean
596+
array.
597+
540598
Attributes
541599
----------
542600
PAN_ : numpy.ndarray
@@ -590,6 +648,7 @@ def __init__(
590648
step=1,
591649
normalize=True,
592650
p=2.0,
651+
T_subseq_isconstant_func=None,
593652
):
594653
"""
595654
Initialize the `stimp` object and compute the Pan Matrix Profile
@@ -625,6 +684,15 @@ def __init__(
625684
p : float, default 2.0
626685
The p-norm to apply for computing the Minkowski distance. This parameter is
627686
ignored when `normalize == True`.
687+
688+
T_subseq_isconstant_func : function, default None
689+
A custom, user-defined function that returns a boolean array that indicates
690+
whether a subsequence in `T` is constant (True). The function must only take
691+
two arguments, `a`, a 1-D array, and `w`, the window size, while additional
692+
arguments may be specified by currying the user-defined function using
693+
`functools.partial`. Any subsequence with at least one np.nan/np.inf will
694+
automatically have its corresponding value set to False in this boolean
695+
array.
628696
"""
629697
super().__init__(
630698
T,
@@ -635,4 +703,5 @@ def __init__(
635703
pre_scrump=False,
636704
client=client,
637705
mp_func=stumped,
706+
T_subseq_isconstant_func=T_subseq_isconstant_func,
638707
)

tests/test_stimp.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import functools
2+
13
import naive
24
import numpy as np
35
import numpy.testing as npt
@@ -223,3 +225,138 @@ def test_stimped(T, dask_cluster):
223225
naive.replace_inf(cmp_pan)
224226

225227
npt.assert_almost_equal(ref_pan, cmp_pan)
228+
229+
230+
def test_stimp_1_percent_with_isconstant():
231+
T = np.random.uniform(-1, 1, [64])
232+
isconstant_func = functools.partial(
233+
naive.isconstant_func_stddev_threshold, stddev_threshold=0.5
234+
)
235+
236+
threshold = 0.2
237+
percentage = 0.01
238+
min_m = 3
239+
n = T.shape[0] - min_m + 1
240+
241+
seed = np.random.randint(100000)
242+
243+
np.random.seed(seed)
244+
pan = stimp(
245+
T,
246+
min_m=min_m,
247+
max_m=None,
248+
step=1,
249+
percentage=percentage,
250+
pre_scrump=True,
251+
# normalize=True,
252+
T_subseq_isconstant_func=isconstant_func,
253+
)
254+
255+
for i in range(n):
256+
pan.update()
257+
258+
ref_PAN = np.full((pan.M_.shape[0], T.shape[0]), fill_value=np.inf)
259+
260+
np.random.seed(seed)
261+
for idx, m in enumerate(pan.M_[:n]):
262+
zone = int(np.ceil(m / 4))
263+
s = zone
264+
tmp_P, tmp_I = naive.prescrump(
265+
T,
266+
m,
267+
T,
268+
s=s,
269+
exclusion_zone=zone,
270+
T_A_subseq_isconstant=isconstant_func,
271+
T_B_subseq_isconstant=isconstant_func,
272+
)
273+
ref_P, ref_I, _, _ = naive.scrump(
274+
T,
275+
m,
276+
T,
277+
percentage,
278+
zone,
279+
True,
280+
s,
281+
T_A_subseq_isconstant=isconstant_func,
282+
T_B_subseq_isconstant=isconstant_func,
283+
)
284+
naive.merge_topk_PI(ref_P, tmp_P, ref_I, tmp_I)
285+
ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P
286+
287+
# Compare raw pan
288+
cmp_PAN = pan._PAN
289+
290+
naive.replace_inf(ref_PAN)
291+
naive.replace_inf(cmp_PAN)
292+
293+
npt.assert_almost_equal(ref_PAN, cmp_PAN)
294+
295+
# Compare transformed pan
296+
cmp_pan = pan.PAN_
297+
ref_pan = naive.transform_pan(
298+
pan._PAN, pan._M, threshold, pan._bfs_indices, pan._n_processed
299+
)
300+
301+
naive.replace_inf(ref_pan)
302+
naive.replace_inf(cmp_pan)
303+
304+
npt.assert_almost_equal(ref_pan, cmp_pan)
305+
306+
307+
@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning")
308+
def test_stimped_with_isconstant(dask_cluster):
309+
T = np.random.uniform(-1, 1, [64])
310+
isconstant_func = functools.partial(
311+
naive.isconstant_func_stddev_threshold, stddev_threshold=0.5
312+
)
313+
314+
with Client(dask_cluster) as dask_client:
315+
threshold = 0.2
316+
min_m = 3
317+
n = T.shape[0] - min_m + 1
318+
319+
pan = stimped(
320+
dask_client,
321+
T,
322+
min_m=min_m,
323+
max_m=None,
324+
step=1,
325+
# normalize=True,
326+
T_subseq_isconstant_func=isconstant_func,
327+
)
328+
329+
for i in range(n):
330+
pan.update()
331+
332+
ref_PAN = np.full((pan.M_.shape[0], T.shape[0]), fill_value=np.inf)
333+
334+
for idx, m in enumerate(pan.M_[:n]):
335+
zone = int(np.ceil(m / 4))
336+
ref_mp = naive.stump(
337+
T,
338+
m,
339+
T_B=None,
340+
exclusion_zone=zone,
341+
T_A_subseq_isconstant=isconstant_func,
342+
)
343+
ref_PAN[pan._bfs_indices[idx], : ref_mp.shape[0]] = ref_mp[:, 0]
344+
345+
# Compare raw pan
346+
cmp_PAN = pan._PAN
347+
348+
naive.replace_inf(ref_PAN)
349+
naive.replace_inf(cmp_PAN)
350+
351+
npt.assert_almost_equal(ref_PAN, cmp_PAN)
352+
353+
# Compare transformed pan
354+
cmp_pan = pan.PAN_
355+
ref_pan = naive.transform_pan(
356+
pan._PAN, pan._M, threshold, pan._bfs_indices, pan._n_processed
357+
)
358+
359+
naive.replace_inf(ref_pan)
360+
naive.replace_inf(cmp_pan)
361+
362+
npt.assert_almost_equal(ref_pan, cmp_pan)

0 commit comments

Comments
 (0)