diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 1e994b151b..3df9d54e3f 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -349,6 +349,7 @@ inference-api-put-anthropic,https://www.elastic.co/guide/en/elasticsearch/refere inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-openai.html,, inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-ai-studio.html,, inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-cohere.html,, +inference-api-put-custom,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom,, inference-api-put-deepseek,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-deepseek.html,, inference-api-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elasticsearch.html,, inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html,, diff --git a/specification/_json_spec/inference.put_custom.json b/specification/_json_spec/inference.put_custom.json new file mode 100644 index 0000000000..c12108683d --- /dev/null +++ b/specification/_json_spec/inference.put_custom.json @@ -0,0 +1,35 @@ +{ + "inference.put_custom": { + "documentation": { + "url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom", + "description": "Configure a custom inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{custom_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "custom_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index d288e4febf..eb2b90ea74 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -758,6 +758,256 @@ export class CohereTaskSettings { truncate?: CohereTruncateType } +export class CustomServiceSettings { + /** + * Specifies the HTTPS header parameters – such as `Authentication` or `Contet-Type` – that are required to access the custom service. + * For example: + * ``` + * "headers":{ + * "Authorization": "Bearer ${api_key}", + * "Content-Type": "application/json;charset=utf-8" + * } + * ``` + */ + headers?: UserDefinedValue + /** + * Specifies the input type translation values that are used to replace the `${input_type}` template in the request body. + * For example: + * ``` + * "input_type": { + * "translation": { + * "ingest": "do_ingest", + * "search": "do_search" + * }, + * "default": "a_default" + * }, + * ``` + * If the subsequent inference requests come from a search context, the `search` key will be used and the template will be replaced with `do_search`. + * If it comes from the ingest context `do_ingest` is used. If it's a different context that is not specified, the default value will be used. If no default is specified an empty string is used. + * `translation` can be: + * * `classification` + * * `clustering` + * * `ingest` + * * `search` + */ + input_type?: UserDefinedValue + /** + * Specifies the query parameters as a list of tuples. The arrays inside the `query_parameters` must have two items, a key and a value. + * For example: + * ``` + * "query_parameters":[ + * ["param_key", "some_value"], + * ["param_key", "another_value"], + * ["other_key", "other_value"] + * ] + * ``` + * If the base url is `https://www.elastic.co` it results in: `https://www.elastic.co?param_key=some_value¶m_key=another_value&other_key=other_value`. + */ + query_parameters?: UserDefinedValue + /** + * The request configuration object. + */ + request: CustomRequestParams + /** + * The response configuration object. + */ + response: CustomResponseParams + /** + * Specifies secret parameters, like `api_key` or `api_token`, that are required to access the custom service. + * For example: + * ``` + * "secret_parameters":{ + * "api_key":"" + * } + * ``` + */ + secret_parameters: UserDefinedValue + /** + * The URL endpoint to use for the requests. + */ + url?: string +} + +export class CustomRequestParams { + /** + * The body structure of the request. It requires passing in the string-escaped result of the JSON format HTTP request body. + * For example: + * ``` + * "request": "{\"input\":${input}}" + * ``` + * > info + * > The content string needs to be a single line except when using the Kibana console. + */ + content: string +} + +export class CustomResponseParams { + /** + * Specifies the JSON parser that is used to parse the response from the custom service. + * Different task types require different json_parser parameters. + * For example: + * ``` + * # text_embedding + * # For a response like this: + * + * { + * "object": "list", + * "data": [ + * { + * "object": "embedding", + * "index": 0, + * "embedding": [ + * 0.014539449, + * -0.015288644 + * ] + * } + * ], + * "model": "text-embedding-ada-002-v2", + * "usage": { + * "prompt_tokens": 8, + * "total_tokens": 8 + * } + * } + * + * # the json_parser definition should look like this: + * + * "response":{ + * "json_parser":{ + * "text_embeddings":"$.data[*].embedding[*]" + * } + * } + * + * # sparse_embedding + * # For a response like this: + * + * { + * "request_id": "75C50B5B-E79E-4930-****-F48DBB392231", + * "latency": 22, + * "usage": { + * "token_count": 11 + * }, + * "result": { + * "sparse_embeddings": [ + * { + * "index": 0, + * "embedding": [ + * { + * "token_id": 6, + * "weight": 0.101 + * }, + * { + * "token_id": 163040, + * "weight": 0.28417 + * } + * ] + * } + * ] + * } + * } + * + * # the json_parser definition should look like this: + * + * "response":{ + * "json_parser":{ + * "token_path":"$.result.sparse_embeddings[*].embedding[*].token_id", + * "weight_path":"$.result.sparse_embeddings[*].embedding[*].weight" + * } + * } + * + * # rerank + * # For a response like this: + * + * { + * "results": [ + * { + * "index": 3, + * "relevance_score": 0.999071, + * "document": "abc" + * }, + * { + * "index": 4, + * "relevance_score": 0.7867867, + * "document": "123" + * }, + * { + * "index": 0, + * "relevance_score": 0.32713068, + * "document": "super" + * } + * ], + * } + * + * # the json_parser definition should look like this: + * + * "response":{ + * "json_parser":{ + * "reranked_index":"$.result.scores[*].index", // optional + * "relevance_score":"$.result.scores[*].score", + * "document_text":"xxx" // optional + * } + * } + * + * # completion + * # For a response like this: + * + * { + * "id": "chatcmpl-B9MBs8CjcvOU2jLn4n570S5qMJKcT", + * "object": "chat.completion", + * "created": 1741569952, + * "model": "gpt-4.1-2025-04-14", + * "choices": [ + * { + * "index": 0, + * "message": { + * "role": "assistant", + * "content": "Hello! How can I assist you today?", + * "refusal": null, + * "annotations": [] + * }, + * "logprobs": null, + * "finish_reason": "stop" + * } + * ] + * } + * + * # the json_parser definition should look like this: + * + * "response":{ + * "json_parser":{ + * "completion_result":"$.choices[*].message.content" + * } + * } + */ + json_parser: UserDefinedValue +} + +export enum CustomTaskType { + text_embedding, + sparse_embedding, + rerank, + completion +} + +export enum CustomServiceType { + custom +} + +export class CustomTaskSettings { + /** + * Specifies parameters that are required to run the custom service. The parameters depend on the model your custom service uses. + * For example: + * ``` + * "task_settings":{ + * "parameters":{ + * "input_type":"query", + * "return_token":true + * } + * } + * ``` + */ + parameters?: UserDefinedValue +} + export class EisServiceSettings { /** * The name of the model to use for the inference task. diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index eaa8067433..e2d723345f 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -27,6 +27,7 @@ import { TaskTypeAzureAIStudio, TaskTypeAzureOpenAI, TaskTypeCohere, + TaskTypeCustom, TaskTypeDeepSeek, TaskTypeElasticsearch, TaskTypeELSER, @@ -75,18 +76,6 @@ export class InferenceEndpointInfo extends InferenceEndpoint { */ task_type: TaskType } - -export class InferenceEndpointInfoJinaAi extends InferenceEndpoint { - /** - * The inference Id - */ - inference_id: string - /** - * The task type - */ - task_type: TaskTypeJinaAi -} - export class InferenceEndpointInfoAlibabaCloudAI extends InferenceEndpoint { /** * The inference Id @@ -153,6 +142,16 @@ export class InferenceEndpointInfoCohere extends InferenceEndpoint { task_type: TaskTypeCohere } +export class InferenceEndpointInfoCustom extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeCustom +} export class InferenceEndpointInfoDeepSeek extends InferenceEndpoint { /** * The inference Id @@ -219,6 +218,17 @@ export class InferenceEndpointInfoHuggingFace extends InferenceEndpoint { task_type: TaskTypeHuggingFace } +export class InferenceEndpointInfoJinaAi extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeJinaAi +} + export class InferenceEndpointInfoMistral extends InferenceEndpoint { /** * The inference Id diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index 5e511fe9b4..66f14ec500 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -65,6 +65,13 @@ export enum TaskTypeCohere { completion } +export enum TaskTypeCustom { + text_embedding, + sparse_embedding, + rerank, + completion +} + export enum TaskTypeDeepSeek { completion, chat_completion diff --git a/specification/inference/put_custom/PutCustomRequest.ts b/specification/inference/put_custom/PutCustomRequest.ts new file mode 100644 index 0000000000..04d1e056a8 --- /dev/null +++ b/specification/inference/put_custom/PutCustomRequest.ts @@ -0,0 +1,117 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { + CustomServiceSettings, + CustomServiceType, + CustomTaskSettings, + CustomTaskType +} from '@inference/_types/CommonTypes' +import { InferenceChunkingSettings } from '@inference/_types/Services' + +/** + * Create a custom inference endpoint. + * + * The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations. + * The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets. + * The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key. + * Templates are portions of a string that start with `${` and end with `}`. + * The parameters `secret_parameters` and `task_settings` are checked for keys for template replacement. Template replacement is supported in the `request`, `headers`, `url`, and `query_parameters`. + * If the definition (key) is not found for a template, an error message is returned. + * In case of an endpoint definition like the following: + * ``` + * PUT _inference/text_embedding/test-text-embedding + * { + * "service": "custom", + * "service_settings": { + * "secret_parameters": { + * "api_key": "" + * }, + * "url": "...endpoints.huggingface.cloud/v1/embeddings", + * "headers": { + * "Authorization": "Bearer ${api_key}", + * "Content-Type": "application/json" + * }, + * "request": "{\"input\": ${input}}", + * "response": { + * "json_parser": { + * "text_embeddings":"$.data[*].embedding[*]" + * } + * } + * } + * } + * ``` + * To replace `${api_key}` the `secret_parameters` and `task_settings` are checked for a key named `api_key`. + * + * > info + * > Templates should not be surrounded by quotes. + * + * Pre-defined templates: + * * `${input}` refers to the array of input strings that comes from the `input` field of the subsequent inference requests. + * * `${input_type}` refers to the input type translation values. + * * `${query}` refers to the query field used specifically for reranking tasks. + * * `${top_n}` refers to the `top_n` field available when performing rerank requests. + * * `${return_documents}` refers to the `return_documents` field available when performing rerank requests. + * @rest_spec_name inference.put_custom + * @availability stack since=8.13.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-custom + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{custom_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: CustomTaskType + /** + * The unique identifier of the inference endpoint. + */ + custom_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `custom`. + */ + service: CustomServiceType + /** + * Settings used to install the inference model. + * These settings are specific to the `custom` service. + */ + service_settings: CustomServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: CustomTaskSettings + } +} diff --git a/specification/inference/put_custom/PutCustomResponse.ts b/specification/inference/put_custom/PutCustomResponse.ts new file mode 100644 index 0000000000..c09467c944 --- /dev/null +++ b/specification/inference/put_custom/PutCustomResponse.ts @@ -0,0 +1,25 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfoCustom } from '@inference/_types/Services' + +export class Response { + /** @codegen_name endpoint_info */ + body: InferenceEndpointInfoCustom +} diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample1.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample1.yaml new file mode 100644 index 0000000000..dabe71ed86 --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample1.yaml @@ -0,0 +1,24 @@ +summary: Custom text embedding task (OpenAI) +description: Run `PUT _inference/text_embedding/custom-embeddings` to create an inference endpoint that performs a text embedding task. +method_request: 'PUT _inference/text_embedding/custom-embeddings' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "https://api.openai.com/v1/embeddings", + "headers": { + "Authorization": "Bearer ${api_key}", + "Content-Type": "application/json;charset=utf-8" + }, + "request": "{\"input\": ${input}, \"model\": \"text-embedding-3-small\"}", + "response": { + "json_parser": { + "text_embeddings": "$.data[*].embedding[*]" + } + } + } + } diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample2.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample2.yaml new file mode 100644 index 0000000000..1fb61b58c1 --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample2.yaml @@ -0,0 +1,25 @@ +summary: Custom rerank task (Cohere APIv2) +description: Run `PUT _inference/rerank/custom-rerank` to create an inference endpoint that performs a rerank task. +method_request: 'PUT _inference/rerank/custom-rerank' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "https://api.cohere.com/v2/rerank", + "headers": { + "Authorization": "bearer ${api_key}", + "Content-Type": "application/json" + }, + "request": "{\"documents\": ${input}, \"query\": ${query}, \"model\": \"rerank-v3.5\"}", + "response": { + "json_parser": { + "reranked_index":"$.results[*].index", + "relevance_score":"$.results[*].relevance_score" + } + } + } + } diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample3.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample3.yaml new file mode 100644 index 0000000000..70e5609051 --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample3.yaml @@ -0,0 +1,31 @@ +summary: Custom text embedding task (Cohere APIv2) +description: Run `PUT _inference/text_embedding/custom-text-embedding` to create an inference endpoint that performs a text embedding task. +method_request: 'PUT _inference/text_embedding/custom-text-embedding' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "https://api.cohere.com/v2/embed", + "headers": { + "Authorization": "bearer ${api_key}", + "Content-Type": "application/json" + }, + "request": "{\"texts\": ${input}, \"model\": \"embed-v4.0\", \"input_type\": ${input_type}}", + "response": { + "json_parser": { + "text_embeddings":"$.embeddings.float[*]" + } + }, + "input_type": { + "translation": { + "ingest": "search_document", + "search": "search_query" + }, + "default": "search_document" + } + } + } diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample4.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample4.yaml new file mode 100644 index 0000000000..4ecaaf020b --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample4.yaml @@ -0,0 +1,25 @@ +summary: Custom rerank task (Jina AI) +description: Run `PUT _inference/rerank/custom-rerank-jina` to create an inference endpoint that performs a rerank task. +method_request: 'PUT _inference/rerank/custom-rerank-jina' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "https://api.jina.ai/v1/rerank", + "headers": { + "Content-Type": "application/json", + "Authorization": "Bearer ${api_key}" + }, + "request": "{\"model\": \"jina-reranker-v2-base-multilingual\",\"query\": ${query},\"documents\":${input}}", + "response": { + "json_parser": { + "relevance_score": "$.results[*].relevance_score", + "reranked_index": "$.results[*].index" + } + } + } + } diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample5.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample5.yaml new file mode 100644 index 0000000000..c9f86dad8d --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample5.yaml @@ -0,0 +1,24 @@ +summary: Custom text embedding task (Hugging Face) +description: Run `PUT _inference/text_embedding/custom-text-embedding-hf` to create an inference endpoint that performs a text embedding task by using the Qwen/Qwen3-Embedding-8B model. +method_request: 'PUT _inference/text_embedding/custom-text-embedding-hf' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "/v1/embeddings", + "headers": { + "Authorization": "Bearer ${api_key}", + "Content-Type": "application/json" + }, + "request": "{\"input\": ${input}}", + "response": { + "json_parser": { + "text_embeddings":"$.data[*].embedding[*]" + } + } + } + } diff --git a/specification/inference/put_deepseek/request/PutDeepSeekRequestExample1.yaml b/specification/inference/put_deepseek/examples/PutDeepSeekRequestExample1.yaml similarity index 100% rename from specification/inference/put_deepseek/request/PutDeepSeekRequestExample1.yaml rename to specification/inference/put_deepseek/examples/PutDeepSeekRequestExample1.yaml diff --git a/specification/inference/put_deepseek/request/PutDeepSeekRequestExample2.yaml b/specification/inference/put_deepseek/examples/PutDeepSeekRequestExample2.yaml similarity index 100% rename from specification/inference/put_deepseek/request/PutDeepSeekRequestExample2.yaml rename to specification/inference/put_deepseek/examples/PutDeepSeekRequestExample2.yaml