elastic · szabosteve · Jul 21, 2025 · Jul 21, 2025 · Jul 21, 2025
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
@@ -349,6 +349,7 @@ inference-api-put-anthropic,https://www.elastic.co/guide/en/elasticsearch/refere
 inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-openai.html,,
 inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-ai-studio.html,,
 inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-cohere.html,,
+inference-api-put-custom,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom,,
 inference-api-put-deepseek,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-deepseek.html,,
 inference-api-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elasticsearch.html,,
 inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html,,

diff --git a/specification/_json_spec/inference.put_custom.json b/specification/_json_spec/inference.put_custom.json
@@ -0,0 +1,35 @@
+{
+  "inference.put_custom": {
+    "documentation": {
+      "url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom",
+      "description": "Configure a custom inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{custom_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "custom_inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's task and service settings"
+    }
+  }
+}
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -758,6 +758,256 @@ export class CohereTaskSettings {
   truncate?: CohereTruncateType
 }
 
+export class CustomServiceSettings {
+  /**
+   * Specifies the HTTPS header parameters – such as `Authentication` or `Contet-Type` – that are required to access the custom service.
+   * For example:
+   * ```
+   * "headers":{
+   *   "Authorization": "Bearer ${api_key}",
+   *   "Content-Type": "application/json;charset=utf-8"
+   * }
+   * ```
+   */
+  headers?: UserDefinedValue
+  /**
+   * Specifies the input type translation values that are used to replace the `${input_type}` template in the request body.
+   * For example:
+   * ```
+   * "input_type": {
+   *   "translation": {
+   *     "ingest": "do_ingest",
+   *     "search": "do_search"
+   *   },
+   *   "default": "a_default"
+   * },
+   * ```
+   * If the subsequent inference requests come from a search context, the `search` key will be used and the template will be replaced with `do_search`.
+   * If it comes from the ingest context `do_ingest` is used. If it's a different context that is not specified, the default value will be used. If no default is specified an empty string is used.
+   * `translation` can be:
+   * * `classification`
+   * * `clustering`
+   * * `ingest`
+   * * `search`
+   */
+  input_type?: UserDefinedValue
+  /**
+   * Specifies the query parameters as a list of tuples. The arrays inside the `query_parameters` must have two items, a key and a value.
+   * For example:
+   * ```
+   * "query_parameters":[
+   *   ["param_key", "some_value"],
+   *   ["param_key", "another_value"],
+   *   ["other_key", "other_value"]
+   * ]
+   * ```
+   * If the base url is `https://www.elastic.co` it results in: `https://www.elastic.co?param_key=some_value&param_key=another_value&other_key=other_value`.
+   */
+  query_parameters?: UserDefinedValue
+  /**
+   * The request configuration object.
+   */
+  request: CustomRequestParams
+  /**
+   * The response configuration object.
+   */
+  response: CustomResponseParams
+  /**
+   * Specifies secret parameters, like `api_key` or `api_token`, that are required to access the custom service.
+   * For example:
+   * ```
+   * "secret_parameters":{
+   *   "api_key":"<api_key>"
+   * }
+   * ```
+   */
+  secret_parameters: UserDefinedValue
+  /**
+   * The URL endpoint to use for the requests.
+   */
+  url?: string
+}
+
+export class CustomRequestParams {
+  /**
+   * The body structure of the request. It requires passing in the string-escaped result of the JSON format HTTP request body.
+   * For example:
+   * ```
+   * "request": "{\"input\":${input}}"
+   * ```
+   * > info
+   * > The content string needs to be a single line except when using the Kibana console.
+   */
+  content: string
+}
+
+export class CustomResponseParams {
+  /**
+   * Specifies the JSON parser that is used to parse the response from the custom service.
+   * Different task types require different json_parser parameters.
+   * For example:
+   * ```
+   * # text_embedding
+   * # For a response like this:
+   *
+   * {
+   *  "object": "list",
+   *  "data": [
+   *      {
+   *        "object": "embedding",
+   *        "index": 0,
+   *        "embedding": [
+   *            0.014539449,
+   *            -0.015288644
+   *        ]
+   *      }
+   *  ],
+   *  "model": "text-embedding-ada-002-v2",
+   *  "usage": {
+   *      "prompt_tokens": 8,
+   *      "total_tokens": 8
+   *  }
+   * }
+   *
+   * # the json_parser definition should look like this:
+   *
+   * "response":{
+   *   "json_parser":{
+   *     "text_embeddings":"$.data[*].embedding[*]"
+   *   }
+   * }
+   *
+   * # sparse_embedding
+   * # For a response like this:
+   *
+   * {
+   *   "request_id": "75C50B5B-E79E-4930-****-F48DBB392231",
+   *   "latency": 22,
+   *   "usage": {
+   *      "token_count": 11
+   *   },
+   *   "result": {
+   *      "sparse_embeddings": [
+   *         {
+   *           "index": 0,
+   *           "embedding": [
+   *             {
+   *               "token_id": 6,
+   *               "weight": 0.101
+   *             },
+   *             {
+   *               "token_id": 163040,
+   *               "weight": 0.28417
+   *             }
+   *           ]
+   *         }
+   *      ]
+   *   }
+   * }
+   *
+   * # the json_parser definition should look like this:
+   *
+   * "response":{
+   *   "json_parser":{
+   *     "token_path":"$.result.sparse_embeddings[*].embedding[*].token_id",
+   *     "weight_path":"$.result.sparse_embeddings[*].embedding[*].weight"
+   *   }
+   * }
+   *
+   * # rerank
+   * # For a response like this:
+   *
+   * {
+   *   "results": [
+   *     {
+   *       "index": 3,
+   *       "relevance_score": 0.999071,
+   *       "document": "abc"
+   *     },
+   *     {
+   *       "index": 4,
+   *       "relevance_score": 0.7867867,
+   *       "document": "123"
+   *     },
+   *     {
+   *       "index": 0,
+   *       "relevance_score": 0.32713068,
+   *       "document": "super"
+   *     }
+   *   ],
+   * }
+   *
+   * # the json_parser definition should look like this:
+   *
+   * "response":{
+   *   "json_parser":{
+   *     "reranked_index":"$.result.scores[*].index",    // optional
+   *     "relevance_score":"$.result.scores[*].score",
+   *     "document_text":"xxx"    // optional
+   *   }
+   * }
+   *
+   * # completion
+   * # For a response like this:
+   *
+   * {
+   *  "id": "chatcmpl-B9MBs8CjcvOU2jLn4n570S5qMJKcT",
+   *  "object": "chat.completion",
+   *  "created": 1741569952,
+   *  "model": "gpt-4.1-2025-04-14",
+   *  "choices": [
+   *    {
+   *     "index": 0,
+   *     "message": {
+   *       "role": "assistant",
+   *       "content": "Hello! How can I assist you today?",
+   *       "refusal": null,
+   *       "annotations": []
+   *     },
+   *     "logprobs": null,
+   *     "finish_reason": "stop"
+   *   }
+   *  ]
+   * }
+   *
+   * # the json_parser definition should look like this:
+   *
+   * "response":{
+   *   "json_parser":{
+   *     "completion_result":"$.choices[*].message.content"
+   *   }
+   * }
+   */
+  json_parser: UserDefinedValue
+}
+
+export enum CustomTaskType {
+  text_embedding,
+  sparse_embedding,
+  rerank,
+  completion
+}
+
+export enum CustomServiceType {
+  custom
+}
+
+export class CustomTaskSettings {
+  /**
+   * Specifies parameters that are required to run the custom service. The parameters depend on the model your custom service uses.
+   * For example:
+   * ```
+   * "task_settings":{
+   *   "parameters":{
+   *     "input_type":"query",
+   *     "return_token":true
+   *   }
+   * }
+   * ```
+   */
+  parameters?: UserDefinedValue
+}
+
 export class EisServiceSettings {
   /**
    * The name of the model to use for the inference task.

diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
@@ -27,6 +27,7 @@ import {
   TaskTypeAzureAIStudio,
   TaskTypeAzureOpenAI,
   TaskTypeCohere,
+  TaskTypeCustom,
   TaskTypeDeepSeek,
   TaskTypeElasticsearch,
   TaskTypeELSER,
@@ -75,18 +76,6 @@ export class InferenceEndpointInfo extends InferenceEndpoint {
    */
   task_type: TaskType
 }
-
-export class InferenceEndpointInfoJinaAi extends InferenceEndpoint {
-  /**
-   * The inference Id
-   */
-  inference_id: string
-  /**
-   * The task type
-   */
-  task_type: TaskTypeJinaAi
-}
-
 export class InferenceEndpointInfoAlibabaCloudAI extends InferenceEndpoint {
   /**
    * The inference Id
@@ -153,6 +142,16 @@ export class InferenceEndpointInfoCohere extends InferenceEndpoint {
   task_type: TaskTypeCohere
 }
 
+export class InferenceEndpointInfoCustom extends InferenceEndpoint {
+  /**
+   * The inference Id
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeCustom
+}
 export class InferenceEndpointInfoDeepSeek extends InferenceEndpoint {
   /**
    * The inference Id
@@ -219,6 +218,17 @@ export class InferenceEndpointInfoHuggingFace extends InferenceEndpoint {
   task_type: TaskTypeHuggingFace
 }
 
+export class InferenceEndpointInfoJinaAi extends InferenceEndpoint {
+  /**
+   * The inference Id
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeJinaAi
+}
+
 export class InferenceEndpointInfoMistral extends InferenceEndpoint {
   /**
    * The inference Id

diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
@@ -65,6 +65,13 @@ export enum TaskTypeCohere {
   completion
 }
 
+export enum TaskTypeCustom {
+  text_embedding,
+  sparse_embedding,
+  rerank,
+  completion
+}
+
 export enum TaskTypeDeepSeek {
   completion,
   chat_completion