@@ -144,8 +144,26 @@ def __init__(
144
144
145
145
class ChunkingSettings (AttrDict [Any ]):
146
146
"""
147
- :arg strategy: (required) The chunking strategy: `sentence` or `word`.
148
- Defaults to `sentence` if omitted.
147
+ :arg strategy: (required) The chunking strategy: `sentence`, `word`,
148
+ `none` or `recursive`. * If `strategy` is set to `recursive`,
149
+ you must also specify: - `max_chunk_size` - either `separators`
150
+ or`separator_group` Learn more about different chunking
151
+ strategies in the linked documentation. Defaults to `sentence` if
152
+ omitted.
153
+ :arg separator_group: (required) This parameter is only applicable
154
+ when using the `recursive` chunking strategy. Sets a predefined
155
+ list of separators in the saved chunking settings based on the
156
+ selected text type. Values can be `markdown` or `plaintext`.
157
+ Using this parameter is an alternative to manually specifying a
158
+ custom `separators` list.
159
+ :arg separators: (required) A list of strings used as possible split
160
+ points when chunking text with the `recursive` strategy. Each
161
+ string can be a plain string or a regular expression (regex)
162
+ pattern. The system tries each separator in order to split the
163
+ text, starting from the first item in the list. After splitting,
164
+ it attempts to recombine smaller pieces into larger chunks that
165
+ stay within the `max_chunk_size` limit, to reduce the total number
166
+ of chunks generated.
149
167
:arg max_chunk_size: (required) The maximum size of a chunk in words.
150
168
This value cannot be higher than `300` or lower than `20` (for
151
169
`sentence` strategy) or `10` (for `word` strategy). Defaults to
@@ -160,6 +178,8 @@ class ChunkingSettings(AttrDict[Any]):
160
178
"""
161
179
162
180
strategy : Union [str , DefaultType ]
181
+ separator_group : Union [str , DefaultType ]
182
+ separators : Union [Sequence [str ], DefaultType ]
163
183
max_chunk_size : Union [int , DefaultType ]
164
184
overlap : Union [int , DefaultType ]
165
185
sentence_overlap : Union [int , DefaultType ]
@@ -168,13 +188,19 @@ def __init__(
168
188
self ,
169
189
* ,
170
190
strategy : Union [str , DefaultType ] = DEFAULT ,
191
+ separator_group : Union [str , DefaultType ] = DEFAULT ,
192
+ separators : Union [Sequence [str ], DefaultType ] = DEFAULT ,
171
193
max_chunk_size : Union [int , DefaultType ] = DEFAULT ,
172
194
overlap : Union [int , DefaultType ] = DEFAULT ,
173
195
sentence_overlap : Union [int , DefaultType ] = DEFAULT ,
174
196
** kwargs : Any ,
175
197
):
176
198
if strategy is not DEFAULT :
177
199
kwargs ["strategy" ] = strategy
200
+ if separator_group is not DEFAULT :
201
+ kwargs ["separator_group" ] = separator_group
202
+ if separators is not DEFAULT :
203
+ kwargs ["separators" ] = separators
178
204
if max_chunk_size is not DEFAULT :
179
205
kwargs ["max_chunk_size" ] = max_chunk_size
180
206
if overlap is not DEFAULT :
@@ -3723,6 +3749,38 @@ def __init__(
3723
3749
super ().__init__ (kwargs )
3724
3750
3725
3751
3752
+ class SparseVectorIndexOptions (AttrDict [Any ]):
3753
+ """
3754
+ :arg prune: Whether to perform pruning, omitting the non-significant
3755
+ tokens from the query to improve query performance. If prune is
3756
+ true but the pruning_config is not specified, pruning will occur
3757
+ but default values will be used. Default: false
3758
+ :arg pruning_config: Optional pruning configuration. If enabled, this
3759
+ will omit non-significant tokens from the query in order to
3760
+ improve query performance. This is only used if prune is set to
3761
+ true. If prune is set to true but pruning_config is not specified,
3762
+ default values will be used.
3763
+ """
3764
+
3765
+ prune : Union [bool , DefaultType ]
3766
+ pruning_config : Union ["TokenPruningConfig" , Dict [str , Any ], DefaultType ]
3767
+
3768
+ def __init__ (
3769
+ self ,
3770
+ * ,
3771
+ prune : Union [bool , DefaultType ] = DEFAULT ,
3772
+ pruning_config : Union [
3773
+ "TokenPruningConfig" , Dict [str , Any ], DefaultType
3774
+ ] = DEFAULT ,
3775
+ ** kwargs : Any ,
3776
+ ):
3777
+ if prune is not DEFAULT :
3778
+ kwargs ["prune" ] = prune
3779
+ if pruning_config is not DEFAULT :
3780
+ kwargs ["pruning_config" ] = pruning_config
3781
+ super ().__init__ (kwargs )
3782
+
3783
+
3726
3784
class SuggestContext (AttrDict [Any ]):
3727
3785
"""
3728
3786
:arg name: (required)
@@ -5166,9 +5224,11 @@ def buckets_as_dict(self) -> Mapping[str, "FiltersBucket"]:
5166
5224
class FiltersBucket (AttrDict [Any ]):
5167
5225
"""
5168
5226
:arg doc_count: (required)
5227
+ :arg key:
5169
5228
"""
5170
5229
5171
5230
doc_count : int
5231
+ key : str
5172
5232
5173
5233
5174
5234
class FrequentItemSetsAggregate (AttrDict [Any ]):
0 commit comments