@@ -20,10 +20,10 @@ def _compute_P_ABBA(
20
20
21
21
The MPdist distance measure considers two time series to be similar if they share
22
22
many subsequences, regardless of the order of matching subsequences. MPdist
23
- concatenates and sorts the output of an AB-join and a BA-join and returns the value
24
- of the `k`th smallest number as the reported distance. Note that MPdist is a
25
- measure and not a metric. Therefore, it does not obey the triangular inequality but
26
- the method is highly scalable.
23
+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
24
+ value as the reported distance. Note that MPdist is a measure and not a metric.
25
+ Therefore, it does not obey the triangular inequality but the method is highly
26
+ scalable.
27
27
28
28
Parameters
29
29
----------
@@ -81,15 +81,15 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
81
81
82
82
The MPdist distance measure considers two time series to be similar if they share
83
83
many subsequences, regardless of the order of matching subsequences. MPdist
84
- concatenates and sorts the output of an AB-join and a BA-join and returns the value
85
- of the `k`th smallest number as the reported distance. Note that MPdist is a
86
- measure and not a metric. Therefore, it does not obey the triangular inequality but
87
- the method is highly scalable.
84
+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
85
+ value as the reported distance. Note that MPdist is a measure and not a metric.
86
+ Therefore, it does not obey the triangular inequality but the method is highly
87
+ scalable.
88
88
89
89
Parameters
90
90
----------
91
91
P_ABBA : ndarray
92
- A pre-sorted array resulting from the concatenation of the outputs from an
92
+ An unsorted array resulting from the concatenation of the outputs from an
93
93
AB-joinand BA-join for two time series, `T_A` and `T_B`
94
94
95
95
k : int
@@ -98,7 +98,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
98
98
99
99
custom_func : object, default None
100
100
A custom user defined function for selecting the desired value from the
101
- sorted `P_ABBA` array. This function may need to leverage `functools.partial`
101
+ unsorted `P_ABBA` array. This function may need to leverage `functools.partial`
102
102
and should take `P_ABBA` as its only input parameter and return a single
103
103
`MPdist` value. The `percentage` and `k` parameters are ignored when
104
104
`custom_func` is not None.
@@ -112,10 +112,12 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
112
112
if custom_func is not None :
113
113
MPdist = custom_func (P_ABBA )
114
114
else :
115
- MPdist = P_ABBA [k ]
115
+ partition = np .partition (P_ABBA , k )
116
+ MPdist = partition [k ]
116
117
if ~ np .isfinite (MPdist ):
117
- k = max (0 , np .count_nonzero (np .isfinite (P_ABBA [:k ])) - 1 )
118
- MPdist = P_ABBA [k ]
118
+ partition [:k ].sort ()
119
+ k = max (0 , np .count_nonzero (np .isfinite (partition [:k ])) - 1 )
120
+ MPdist = partition [k ]
119
121
120
122
return MPdist
121
123
@@ -137,10 +139,10 @@ def _mpdist(
137
139
138
140
The MPdist distance measure considers two time series to be similar if they share
139
141
many subsequences, regardless of the order of matching subsequences. MPdist
140
- concatenates and sorts the output of an AB-join and a BA-join and returns the value
141
- of the `k`th smallest number as the reported distance. Note that MPdist is a
142
- measure and not a metric. Therefore, it does not obey the triangular inequality but
143
- the method is highly scalable.
142
+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
143
+ value as the reported distance. Note that MPdist is a measure and not a metric.
144
+ Therefore, it does not obey the triangular inequality but the method is highly
145
+ scalable.
144
146
145
147
Parameters
146
148
----------
@@ -180,7 +182,7 @@ def _mpdist(
180
182
181
183
custom_func : object, default None
182
184
A custom user defined function for selecting the desired value from the
183
- sorted `P_ABBA` array. This function may need to leverage `functools.partial`
185
+ unsorted `P_ABBA` array. This function may need to leverage `functools.partial`
184
186
and should take `P_ABBA` as its only input parameter and return a single
185
187
`MPdist` value. The `percentage` and `k` parameters are ignored when
186
188
`custom_func` is not None.
@@ -202,13 +204,11 @@ def _mpdist(
202
204
P_ABBA = np .empty (n_A - m + 1 + n_B - m + 1 , dtype = np .float64 )
203
205
204
206
_compute_P_ABBA (T_A , T_B , m , P_ABBA , dask_client , device_id , mp_func )
205
- P_ABBA .sort ()
206
207
207
208
if k is not None :
208
209
k = min (int (k ), P_ABBA .shape [0 ] - 1 )
209
210
else :
210
- percentage = min (percentage , 1.0 )
211
- percentage = max (percentage , 0.0 )
211
+ percentage = np .clip (percentage , 0.0 , 1.0 )
212
212
k = min (math .ceil (percentage * (n_A + n_B )), n_A - m + 1 + n_B - m + 1 - 1 )
213
213
214
214
MPdist = _select_P_ABBA_value (P_ABBA , k , custom_func )
@@ -252,7 +252,7 @@ def _mpdist_vect(
252
252
253
253
custom_func : object, default None
254
254
A custom user defined function for selecting the desired value from the
255
- sorted `P_ABBA` array. This function may need to leverage `functools.partial`
255
+ unsorted `P_ABBA` array. This function may need to leverage `functools.partial`
256
256
and should take `P_ABBA` as its only input parameter and return a single
257
257
`MPdist` value. The `percentage` and `k` parameters are ignored when
258
258
`custom_func` is not None.
@@ -267,8 +267,7 @@ def _mpdist_vect(
267
267
P_ABBA = np .empty (2 * j )
268
268
269
269
if k is None :
270
- percentage = min (percentage , 1.0 )
271
- percentage = max (percentage , 0.0 )
270
+ percentage = np .clip (percentage , 0.0 , 1.0 )
272
271
k = min (math .ceil (percentage * (2 * Q .shape [0 ])), 2 * j - 1 )
273
272
274
273
k = min (int (k ), P_ABBA .shape [0 ] - 1 )
@@ -281,7 +280,6 @@ def _mpdist_vect(
281
280
for i in range (MPdist_vect .shape [0 ]):
282
281
P_ABBA [:j ] = rolling_row_min [:, i ]
283
282
P_ABBA [j :] = col_min [i : i + j ]
284
- P_ABBA .sort ()
285
283
MPdist_vect [i ] = _select_P_ABBA_value (P_ABBA , k , custom_func )
286
284
287
285
return MPdist_vect
@@ -295,10 +293,10 @@ def mpdist(T_A, T_B, m, percentage=0.05, k=None, normalize=True):
295
293
296
294
The MPdist distance measure considers two time series to be similar if they share
297
295
many subsequences, regardless of the order of matching subsequences. MPdist
298
- concatenates and sorts the output of an AB-join and a BA-join and returns the value
299
- of the `k`th smallest number as the reported distance. Note that MPdist is a
300
- measure and not a metric. Therefore, it does not obey the triangular inequality but
301
- the method is highly scalable.
296
+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
297
+ value as the reported distance. Note that MPdist is a measure and not a metric.
298
+ Therefore, it does not obey the triangular inequality but the method is highly
299
+ scalable.
302
300
303
301
Parameters
304
302
----------
@@ -349,10 +347,10 @@ def mpdisted(dask_client, T_A, T_B, m, percentage=0.05, k=None, normalize=True):
349
347
350
348
The MPdist distance measure considers two time series to be similar if they share
351
349
many subsequences, regardless of the order of matching subsequences. MPdist
352
- concatenates and sorts the output of an AB-join and a BA-join and returns the value
353
- of the `k`th smallest number as the reported distance. Note that MPdist is a
354
- measure and not a metric. Therefore, it does not obey the triangular inequality but
355
- the method is highly scalable.
350
+ concatenates the output of an AB-join and a BA-join and returns the `k`th smallest
351
+ value as the reported distance. Note that MPdist is a measure and not a metric.
352
+ Therefore, it does not obey the triangular inequality but the method is highly
353
+ scalable.
356
354
357
355
Parameters
358
356
----------
0 commit comments