diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 1fd6405d91..f2045a8ba9 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17762,6 +17762,92 @@ "x-state": "Added in 9.0.0" } }, + "/_inference/{task_type}/{amazonbedrock_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Amazon Bedrock inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-amazonbedrock", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "amazonbedrock_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAmazonBedrockRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{anthropic_inference_id}": { "put": { "tags": [ @@ -18590,8 +18676,8 @@ }, "PutOpenAiRequestExample2": { "summary": "A completion task", - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } } } @@ -77698,6 +77784,92 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_amazonbedrock:AmazonBedrockTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_amazonbedrock:ServiceType": { + "type": "string", + "enum": [ + "amazonbedrock" + ] + }, + "inference.put_amazonbedrock:AmazonBedrockServiceSettings": { + "type": "object", + "properties": { + "access_key": { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "type": "string" + }, + "model": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "type": "string" + }, + "region": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "secret_key": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html" + }, + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "type": "string" + } + }, + "required": [ + "access_key", + "model", + "region", + "secret_key" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference.put_amazonbedrock:AmazonBedrockTaskSettings": { + "type": "object", + "properties": { + "max_new_tokens": { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + } + } + }, "inference.put_anthropic:AnthropicTaskType": { "type": "string", "enum": [ @@ -77730,15 +77902,6 @@ "model_id" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_anthropic:AnthropicTaskSettings": { "type": "object", "properties": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 99b7323b6b..084973d6fe 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9584,6 +9584,92 @@ "x-state": "Added in 9.0.0" } }, + "/_inference/{task_type}/{amazonbedrock_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Amazon Bedrock inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-amazonbedrock", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "amazonbedrock_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAmazonBedrockRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{anthropic_inference_id}": { "put": { "tags": [ @@ -10412,8 +10498,8 @@ }, "PutOpenAiRequestExample2": { "summary": "A completion task", - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } } } @@ -48890,6 +48976,92 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_amazonbedrock:AmazonBedrockTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_amazonbedrock:ServiceType": { + "type": "string", + "enum": [ + "amazonbedrock" + ] + }, + "inference.put_amazonbedrock:AmazonBedrockServiceSettings": { + "type": "object", + "properties": { + "access_key": { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "type": "string" + }, + "model": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "type": "string" + }, + "region": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "secret_key": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html" + }, + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "type": "string" + } + }, + "required": [ + "access_key", + "model", + "region", + "secret_key" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference.put_amazonbedrock:AmazonBedrockTaskSettings": { + "type": "object", + "properties": { + "max_new_tokens": { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + } + } + }, "inference.put_anthropic:AnthropicTaskType": { "type": "string", "enum": [ @@ -48922,15 +49094,6 @@ "model_id" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_anthropic:AnthropicTaskSettings": { "type": "object", "properties": { diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 38113d72ff..2873629756 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4591,6 +4591,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-amazonbedrock", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html", + "name": "inference.put_amazonbedrock", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -27383,6 +27428,136 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + } + } + } + ] + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "amazonbedrock_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -28566,9 +28741,9 @@ "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" }, "PutOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } }, "inherits": { @@ -101987,6 +102162,35 @@ ], "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L59-L71" }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" + }, + { + "kind": "enum", + "members": [ + { + "name": "amazonbedrock" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" + }, { "kind": "enum", "members": [ @@ -122835,13 +123039,13 @@ { "kind": "interface", "name": { - "name": "AnthropicServiceSettings", - "namespace": "inference.put_anthropic" + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" }, "properties": [ { - "description": "A valid API key for the Anthropic API.", - "name": "api_key", + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "name": "access_key", "required": true, "type": { "kind": "instance_of", @@ -122852,9 +123056,10 @@ } }, { - "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", - "extDocId": "anothropic-models", - "name": "model_id", + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "model", "required": true, "type": { "kind": "instance_of", @@ -122865,7 +123070,33 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "region", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", "name": "rate_limit", "required": false, "type": { @@ -122875,9 +123106,23 @@ "namespace": "inference._types" } } + }, + { + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonbedrock-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108" + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" }, { "kind": "interface", @@ -122901,6 +123146,112 @@ ], "specLocation": "inference/_types/Services.ts#L95-L100" }, + { + "kind": "interface", + "name": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" + }, + { + "kind": "interface", + "name": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "A valid API key for the Anthropic API.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", + "extDocId": "anothropic-models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108" + }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 4c3397d5fc..7f583fec96 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9303,6 +9303,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-amazonbedrock", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html", + "name": "inference.put_amazonbedrock", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150828,6 +150873,312 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "interface", + "name": { + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "name": "access_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "region", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonbedrock-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" + }, + { + "kind": "interface", + "name": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + } + } + } + ] + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "amazonbedrock_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "amazonbedrock" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" + }, { "kind": "interface", "name": { @@ -153334,9 +153685,9 @@ "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" }, "PutOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 945582582c..5081060693 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13252,6 +13252,39 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferencePutAmazonbedrockAmazonBedrockServiceSettings { + access_key: string + model: string + provider?: string + region: string + rate_limit?: InferenceRateLimitSetting + secret_key: string +} + +export interface InferencePutAmazonbedrockAmazonBedrockTaskSettings { + max_new_tokens?: integer + temperature?: float + top_k?: float + top_p?: float +} + +export type InferencePutAmazonbedrockAmazonBedrockTaskType = 'completion' | 'text_embedding' + +export interface InferencePutAmazonbedrockRequest extends RequestBase { + task_type: InferencePutAmazonbedrockAmazonBedrockTaskType + amazonbedrock_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAmazonbedrockServiceType + service_settings: InferencePutAmazonbedrockAmazonBedrockServiceSettings + task_settings?: InferencePutAmazonbedrockAmazonBedrockTaskSettings + } +} + +export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfo + +export type InferencePutAmazonbedrockServiceType = 'amazonbedrock' + export interface InferencePutAnthropicAnthropicServiceSettings { api_key: string model_id: string diff --git a/package-lock.json b/package-lock.json index 6b2f4fc3b3..346e9c827b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "@redocly/cli": "^1.33.1", + "@redocly/cli": "^1.34.0", "@stoplight/spectral-cli": "^6.14.2" } }, @@ -486,9 +486,9 @@ } }, "node_modules/@redocly/cli": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.33.1.tgz", - "integrity": "sha512-co+Vr/RfH9Nca3eiYuYvbLxI+5RVOyJ+l56B0SmU5UHfticTUXirO0vxtFmkHmch6YIFVU6BCF4tFbj7ssF8iQ==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.0.tgz", + "integrity": "sha512-Kg/t9zMjZB5cyb0YQLa+gne5E5Rz6wZP/goug1+2qaR17UqeupidBzwqDdr3lszEK3q2A37g4+W7pvdBOkiGQA==", "license": "MIT", "dependencies": { "@opentelemetry/api": "1.9.0", @@ -497,8 +497,8 @@ "@opentelemetry/sdk-trace-node": "1.26.0", "@opentelemetry/semantic-conventions": "1.27.0", "@redocly/config": "^0.22.0", - "@redocly/openapi-core": "1.33.1", - "@redocly/respect-core": "1.33.1", + "@redocly/openapi-core": "1.34.0", + "@redocly/respect-core": "1.34.0", "abort-controller": "^3.0.0", "chokidar": "^3.5.1", "colorette": "^1.2.0", @@ -561,9 +561,9 @@ "license": "MIT" }, "node_modules/@redocly/openapi-core": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.33.1.tgz", - "integrity": "sha512-tL3v8FVwdcCAcruOZV77uxH2ZFtnY3DRPG+rgmlm9hsu5uoatofVSJIJHUroz54KJ8ryeo28wQHhOr8iReGGEQ==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.0.tgz", + "integrity": "sha512-Ji00EiLQRXq0pJIz5pAjGF9MfQvQVsQehc6uIis6sqat8tG/zh25Zi64w6HVGEDgJEzUeq/CuUlD0emu3Hdaqw==", "license": "MIT", "dependencies": { "@redocly/ajv": "^8.11.2", @@ -603,14 +603,14 @@ } }, "node_modules/@redocly/respect-core": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.33.1.tgz", - "integrity": "sha512-Sh6TahtuvSzvejkfu74KErdMX6VtrNNRJAtwH9A6R1Igo8WVmrdoFE99uAp/dOL9bpAQPg4oKtrTF60avN7YYA==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.0.tgz", + "integrity": "sha512-CO2XxJ0SUYHKixKPTQm2U6QrGLnNhQy88CnX20llCxXDKd485cSioRMZ8MMNhHrnDsUlprSuM3ui2z5JGf1ftw==", "license": "MIT", "dependencies": { "@faker-js/faker": "^7.6.0", "@redocly/ajv": "8.11.2", - "@redocly/openapi-core": "1.33.1", + "@redocly/openapi-core": "1.34.0", "better-ajv-errors": "^1.2.0", "colorette": "^2.0.20", "concat-stream": "^2.0.0", diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 038b2b6b89..08d221524a 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -3,6 +3,8 @@ add-nodes,https://www.elastic.co/guide/en/elasticsearch/reference/current/add-el alias-update,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-alias aliases-update,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-update-aliases analysis-analyzers,https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-analyzers.html +amazonbedrock-models,https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html +amazonbedrock-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html analysis-charfilters,https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-charfilters.html analysis-normalizers,https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-normalizers.html analysis-standard-analyzer,https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html @@ -328,6 +330,7 @@ inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/op inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put +inference-api-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html inference-api-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elasticsearch.html diff --git a/specification/_json_spec/inference.put_amazonbedrock.json b/specification/_json_spec/inference.put_amazonbedrock.json new file mode 100644 index 0000000000..266a1800a3 --- /dev/null +++ b/specification/_json_spec/inference.put_amazonbedrock.json @@ -0,0 +1,35 @@ +{ + "inference.put_amazonbedrock": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html", + "description": "Configure an Amazon Bedrock inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "amazonbedrock_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts new file mode 100644 index 0000000000..8ac3d0262f --- /dev/null +++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts @@ -0,0 +1,163 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { float, integer } from '@_types/Numeric' + +/** + * Create an Amazon Bedrock inference endpoint. + * + * Creates an inference endpoint to perform an inference task with the `amazonbedrock` service. + * + * >info + * > You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_amazonbedrock + * @availability stack since=8.12.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-amazonbedrock + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{amazonbedrock_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: AmazonBedrockTaskType + /** + * The unique identifier of the inference endpoint. + */ + amazonbedrock_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `amazonbedrock`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `amazonbedrock` service. + */ + service_settings: AmazonBedrockServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AmazonBedrockTaskSettings + } +} + +export enum AmazonBedrockTaskType { + completion, + text_embedding +} + +export enum ServiceType { + amazonbedrock +} + +export class AmazonBedrockServiceSettings { + /** + * A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests. + */ + access_key: string + /** + * The base model ID or an ARN to a custom model based on a foundational model. + * The base model IDs can be found in the Amazon Bedrock documentation. + * Note that the model ID must be available for the provider chosen and your IAM user must have access to the model. + * @ext_doc_id amazonbedrock-models + */ + model: string + /** + * The model provider for your deployment. + * Note that some providers may support only certain task types. + * Supported providers include: + * + * * `amazontitan` - available for `text_embedding` and `completion` task types + * * `anthropic` - available for `completion` task type only + * * `ai21labs` - available for `completion` task type only + * * `cohere` - available for `text_embedding` and `completion` task types + * * `meta` - available for `completion` task type only + * * `mistral` - available for `completion` task type only + */ + provider?: string + /** + * The region that your model or ARN is deployed in. + * The list of available regions per model can be found in the Amazon Bedrock documentation. + * @ext_doc_id amazonbedrock-models + */ + region: string + /** + * This setting helps to minimize the number of rate limit errors returned from Watsonx. + * By default, the `watsonxai` service sets the number of requests allowed per minute to 120. + */ + rate_limit?: RateLimitSetting + /** + * A valid AWS secret key that is paired with the `access_key`. + * For informationg about creating and managing access and secret keys, refer to the AWS documentation. + * @ext_doc_id amazonbedrock-secret-keys + */ + secret_key: string +} + +export class AmazonBedrockTaskSettings { + /** + * For a `completion` task, it sets the maximum number for the output tokens to be generated. + * @server_default 64 + */ + max_new_tokens?: integer + /** + * For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results. + * At temperature 0.0 the model is most deterministic, at temperature 1.0 most random. + * It should not be used if `top_p` or `top_k` is specified. + */ + temperature?: float + /** + * For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability. + * It is only available for anthropic, cohere, and mistral providers. + * It is an alternative to `temperature`; it should not be used if `temperature` is specified. + */ + top_k?: float + /** + * For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens. + * Top-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence. + * It is an alternative to `temperature`; it should not be used if `temperature` is specified. + */ + top_p?: float +} diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml new file mode 100644 index 0000000000..cded037d23 --- /dev/null +++ b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml @@ -0,0 +1,15 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task. +# method_request: "PUT _inference/text_embedding/amazon_bedrock_embeddings" +# type: "request" +value: |- + { + "service": "amazonbedrock", + "service_settings": { + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-embed-text-v2:0" + } + } diff --git a/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml new file mode 100644 index 0000000000..d21fd0d2aa --- /dev/null +++ b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml @@ -0,0 +1,12 @@ +summary: A completion task +description: Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type. +# method_request: "PUT _inference/completion/openai-completion" +# type: "request" +value: |- + { + "service": "openai", + "service_settings": { + "api_key": "OpenAI-API-Key", + "model_id": "gpt-3.5-turbo" + } + } diff --git a/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml index d21fd0d2aa..4bd73086b3 100644 --- a/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml +++ b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml @@ -1,12 +1,15 @@ summary: A completion task -description: Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type. -# method_request: "PUT _inference/completion/openai-completion" +description: Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task. +# method_request: "PUT _inference/completion/amazon_bedrock_completion" # type: "request" value: |- { - "service": "openai", + "service": "amazonbedrock", "service_settings": { - "api_key": "OpenAI-API-Key", - "model_id": "gpt-3.5-turbo" + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-text-premier-v1:0" } }