diff --git a/docs/docset.yml b/docs/docset.yml new file mode 100644 index 000000000..882aea405 --- /dev/null +++ b/docs/docset.yml @@ -0,0 +1,488 @@ +project: 'Python client' +cross_links: + - apm-agent-python + - docs-content + - elasticsearch +toc: + - toc: reference + - toc: release-notes +subs: + ref: "https://www.elastic.co/guide/en/elasticsearch/reference/current" + ref-bare: "https://www.elastic.co/guide/en/elasticsearch/reference" + ref-8x: "https://www.elastic.co/guide/en/elasticsearch/reference/8.1" + ref-80: "https://www.elastic.co/guide/en/elasticsearch/reference/8.0" + ref-7x: "https://www.elastic.co/guide/en/elasticsearch/reference/7.17" + ref-70: "https://www.elastic.co/guide/en/elasticsearch/reference/7.0" + ref-60: "https://www.elastic.co/guide/en/elasticsearch/reference/6.0" + ref-64: "https://www.elastic.co/guide/en/elasticsearch/reference/6.4" + xpack-ref: "https://www.elastic.co/guide/en/x-pack/6.2" + logstash-ref: "https://www.elastic.co/guide/en/logstash/current" + kibana-ref: "https://www.elastic.co/guide/en/kibana/current" + kibana-ref-all: "https://www.elastic.co/guide/en/kibana" + beats-ref-root: "https://www.elastic.co/guide/en/beats" + beats-ref: "https://www.elastic.co/guide/en/beats/libbeat/current" + beats-ref-60: "https://www.elastic.co/guide/en/beats/libbeat/6.0" + beats-ref-63: "https://www.elastic.co/guide/en/beats/libbeat/6.3" + beats-devguide: "https://www.elastic.co/guide/en/beats/devguide/current" + auditbeat-ref: "https://www.elastic.co/guide/en/beats/auditbeat/current" + packetbeat-ref: "https://www.elastic.co/guide/en/beats/packetbeat/current" + metricbeat-ref: "https://www.elastic.co/guide/en/beats/metricbeat/current" + filebeat-ref: "https://www.elastic.co/guide/en/beats/filebeat/current" + functionbeat-ref: "https://www.elastic.co/guide/en/beats/functionbeat/current" + winlogbeat-ref: "https://www.elastic.co/guide/en/beats/winlogbeat/current" + heartbeat-ref: "https://www.elastic.co/guide/en/beats/heartbeat/current" + journalbeat-ref: "https://www.elastic.co/guide/en/beats/journalbeat/current" + ingest-guide: "https://www.elastic.co/guide/en/ingest/current" + fleet-guide: "https://www.elastic.co/guide/en/fleet/current" + apm-guide-ref: "https://www.elastic.co/guide/en/apm/guide/current" + apm-guide-7x: "https://www.elastic.co/guide/en/apm/guide/7.17" + apm-app-ref: "https://www.elastic.co/guide/en/kibana/current" + apm-agents-ref: "https://www.elastic.co/guide/en/apm/agent" + apm-android-ref: "https://www.elastic.co/guide/en/apm/agent/android/current" + apm-py-ref: "https://www.elastic.co/guide/en/apm/agent/python/current" + apm-py-ref-3x: "https://www.elastic.co/guide/en/apm/agent/python/3.x" + apm-node-ref-index: "https://www.elastic.co/guide/en/apm/agent/nodejs" + apm-node-ref: "https://www.elastic.co/guide/en/apm/agent/nodejs/current" + apm-node-ref-1x: "https://www.elastic.co/guide/en/apm/agent/nodejs/1.x" + apm-rum-ref: "https://www.elastic.co/guide/en/apm/agent/rum-js/current" + apm-ruby-ref: "https://www.elastic.co/guide/en/apm/agent/ruby/current" + apm-java-ref: "https://www.elastic.co/guide/en/apm/agent/java/current" + apm-go-ref: "https://www.elastic.co/guide/en/apm/agent/go/current" + apm-dotnet-ref: "https://www.elastic.co/guide/en/apm/agent/dotnet/current" + apm-php-ref: "https://www.elastic.co/guide/en/apm/agent/php/current" + apm-ios-ref: "https://www.elastic.co/guide/en/apm/agent/swift/current" + apm-lambda-ref: "https://www.elastic.co/guide/en/apm/lambda/current" + apm-attacher-ref: "https://www.elastic.co/guide/en/apm/attacher/current" + docker-logging-ref: "https://www.elastic.co/guide/en/beats/loggingplugin/current" + esf-ref: "https://www.elastic.co/guide/en/esf/current" + kinesis-firehose-ref: "https://www.elastic.co/guide/en/kinesis/{{kinesis_version}}" + estc-welcome-current: "https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current" + estc-welcome: "https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current" + estc-welcome-all: "https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions" + hadoop-ref: "https://www.elastic.co/guide/en/elasticsearch/hadoop/current" + stack-ref: "https://www.elastic.co/guide/en/elastic-stack/current" + stack-ref-67: "https://www.elastic.co/guide/en/elastic-stack/6.7" + stack-ref-68: "https://www.elastic.co/guide/en/elastic-stack/6.8" + stack-ref-70: "https://www.elastic.co/guide/en/elastic-stack/7.0" + stack-ref-80: "https://www.elastic.co/guide/en/elastic-stack/8.0" + stack-ov: "https://www.elastic.co/guide/en/elastic-stack-overview/current" + stack-gs: "https://www.elastic.co/guide/en/elastic-stack-get-started/current" + stack-gs-current: "https://www.elastic.co/guide/en/elastic-stack-get-started/current" + javaclient: "https://www.elastic.co/guide/en/elasticsearch/client/java-api/current" + java-api-client: "https://www.elastic.co/guide/en/elasticsearch/client/java-api-client/current" + java-rest: "https://www.elastic.co/guide/en/elasticsearch/client/java-rest/current" + jsclient: "https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current" + jsclient-current: "https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current" + es-ruby-client: "https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current" + es-dotnet-client: "https://www.elastic.co/guide/en/elasticsearch/client/net-api/current" + es-php-client: "https://www.elastic.co/guide/en/elasticsearch/client/php-api/current" + es-python-client: "https://www.elastic.co/guide/en/elasticsearch/client/python-api/current" + defguide: "https://www.elastic.co/guide/en/elasticsearch/guide/2.x" + painless: "https://www.elastic.co/guide/en/elasticsearch/painless/current" + plugins: "https://www.elastic.co/guide/en/elasticsearch/plugins/current" + plugins-8x: "https://www.elastic.co/guide/en/elasticsearch/plugins/8.1" + plugins-7x: "https://www.elastic.co/guide/en/elasticsearch/plugins/7.17" + plugins-6x: "https://www.elastic.co/guide/en/elasticsearch/plugins/6.8" + glossary: "https://www.elastic.co/guide/en/elastic-stack-glossary/current" + upgrade_guide: "https://www.elastic.co/products/upgrade_guide" + blog-ref: "https://www.elastic.co/blog/" + curator-ref: "https://www.elastic.co/guide/en/elasticsearch/client/curator/current" + curator-ref-current: "https://www.elastic.co/guide/en/elasticsearch/client/curator/current" + metrics-ref: "https://www.elastic.co/guide/en/metrics/current" + metrics-guide: "https://www.elastic.co/guide/en/metrics/guide/current" + logs-ref: "https://www.elastic.co/guide/en/logs/current" + logs-guide: "https://www.elastic.co/guide/en/logs/guide/current" + uptime-guide: "https://www.elastic.co/guide/en/uptime/current" + observability-guide: "https://www.elastic.co/guide/en/observability/current" + observability-guide-all: "https://www.elastic.co/guide/en/observability" + siem-guide: "https://www.elastic.co/guide/en/siem/guide/current" + security-guide: "https://www.elastic.co/guide/en/security/current" + security-guide-all: "https://www.elastic.co/guide/en/security" + endpoint-guide: "https://www.elastic.co/guide/en/endpoint/current" + sql-odbc: "https://www.elastic.co/guide/en/elasticsearch/sql-odbc/current" + ecs-ref: "https://www.elastic.co/guide/en/ecs/current" + ecs-logging-ref: "https://www.elastic.co/guide/en/ecs-logging/overview/current" + ecs-logging-go-logrus-ref: "https://www.elastic.co/guide/en/ecs-logging/go-logrus/current" + ecs-logging-go-zap-ref: "https://www.elastic.co/guide/en/ecs-logging/go-zap/current" + ecs-logging-go-zerolog-ref: "https://www.elastic.co/guide/en/ecs-logging/go-zap/current" + ecs-logging-java-ref: "https://www.elastic.co/guide/en/ecs-logging/java/current" + ecs-logging-dotnet-ref: "https://www.elastic.co/guide/en/ecs-logging/dotnet/current" + ecs-logging-nodejs-ref: "https://www.elastic.co/guide/en/ecs-logging/nodejs/current" + ecs-logging-php-ref: "https://www.elastic.co/guide/en/ecs-logging/php/current" + ecs-logging-python-ref: "https://www.elastic.co/guide/en/ecs-logging/python/current" + ecs-logging-ruby-ref: "https://www.elastic.co/guide/en/ecs-logging/ruby/current" + ml-docs: "https://www.elastic.co/guide/en/machine-learning/current" + eland-docs: "https://www.elastic.co/guide/en/elasticsearch/client/eland/current" + eql-ref: "https://eql.readthedocs.io/en/latest/query-guide" + extendtrial: "https://www.elastic.co/trialextension" + wikipedia: "https://en.wikipedia.org/wiki" + forum: "https://discuss.elastic.co/" + xpack-forum: "https://discuss.elastic.co/c/50-x-pack" + security-forum: "https://discuss.elastic.co/c/x-pack/shield" + watcher-forum: "https://discuss.elastic.co/c/x-pack/watcher" + monitoring-forum: "https://discuss.elastic.co/c/x-pack/marvel" + graph-forum: "https://discuss.elastic.co/c/x-pack/graph" + apm-forum: "https://discuss.elastic.co/c/apm" + enterprise-search-ref: "https://www.elastic.co/guide/en/enterprise-search/current" + app-search-ref: "https://www.elastic.co/guide/en/app-search/current" + workplace-search-ref: "https://www.elastic.co/guide/en/workplace-search/current" + enterprise-search-node-ref: "https://www.elastic.co/guide/en/enterprise-search-clients/enterprise-search-node/current" + enterprise-search-php-ref: "https://www.elastic.co/guide/en/enterprise-search-clients/php/current" + enterprise-search-python-ref: "https://www.elastic.co/guide/en/enterprise-search-clients/python/current" + enterprise-search-ruby-ref: "https://www.elastic.co/guide/en/enterprise-search-clients/ruby/current" + elastic-maps-service: "https://maps.elastic.co" + integrations-docs: "https://docs.elastic.co/en/integrations" + integrations-devguide: "https://www.elastic.co/guide/en/integrations-developer/current" + time-units: "https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#time-units" + byte-units: "https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#byte-units" + apm-py-ref-v: "https://www.elastic.co/guide/en/apm/agent/python/current" + apm-node-ref-v: "https://www.elastic.co/guide/en/apm/agent/nodejs/current" + apm-rum-ref-v: "https://www.elastic.co/guide/en/apm/agent/rum-js/current" + apm-ruby-ref-v: "https://www.elastic.co/guide/en/apm/agent/ruby/current" + apm-java-ref-v: "https://www.elastic.co/guide/en/apm/agent/java/current" + apm-go-ref-v: "https://www.elastic.co/guide/en/apm/agent/go/current" + apm-ios-ref-v: "https://www.elastic.co/guide/en/apm/agent/swift/current" + apm-dotnet-ref-v: "https://www.elastic.co/guide/en/apm/agent/dotnet/current" + apm-php-ref-v: "https://www.elastic.co/guide/en/apm/agent/php/current" + ecloud: "Elastic Cloud" + esf: "Elastic Serverless Forwarder" + ess: "Elasticsearch Service" + ece: "Elastic Cloud Enterprise" + eck: "Elastic Cloud on Kubernetes" + serverless-full: "Elastic Cloud Serverless" + serverless-short: "Serverless" + es-serverless: "Elasticsearch Serverless" + es3: "Elasticsearch Serverless" + obs-serverless: "Elastic Observability Serverless" + sec-serverless: "Elastic Security Serverless" + serverless-docs: "https://docs.elastic.co/serverless" + cloud: "https://www.elastic.co/guide/en/cloud/current" + ess-utm-params: "?page=docs&placement=docs-body" + ess-baymax: "?page=docs&placement=docs-body" + ess-trial: "https://cloud.elastic.co/registration?page=docs&placement=docs-body" + ess-product: "https://www.elastic.co/cloud/elasticsearch-service?page=docs&placement=docs-body" + ess-console: "https://cloud.elastic.co?page=docs&placement=docs-body" + ess-console-name: "Elasticsearch Service Console" + ess-deployments: "https://cloud.elastic.co/deployments?page=docs&placement=docs-body" + ece-ref: "https://www.elastic.co/guide/en/cloud-enterprise/current" + eck-ref: "https://www.elastic.co/guide/en/cloud-on-k8s/current" + ess-leadin: "You can run Elasticsearch on your own hardware or use our hosted Elasticsearch Service that is available on AWS, GCP, and Azure. https://cloud.elastic.co/registration{ess-utm-params}[Try the Elasticsearch Service for free]." + ess-leadin-short: "Our hosted Elasticsearch Service is available on AWS, GCP, and Azure, and you can https://cloud.elastic.co/registration{ess-utm-params}[try it for free]." + ess-icon: "image:https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg[link=\"https://cloud.elastic.co/registration{ess-utm-params}\", title=\"Supported on Elasticsearch Service\"]" + ece-icon: "image:https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud_ece.svg[link=\"https://cloud.elastic.co/registration{ess-utm-params}\", title=\"Supported on Elastic Cloud Enterprise\"]" + cloud-only: "This feature is designed for indirect use by https://cloud.elastic.co/registration{ess-utm-params}[Elasticsearch Service], https://www.elastic.co/guide/en/cloud-enterprise/{ece-version-link}[Elastic Cloud Enterprise], and https://www.elastic.co/guide/en/cloud-on-k8s/current[Elastic Cloud on Kubernetes]. Direct use is not supported." + ess-setting-change: "image:https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg[link=\"{ess-trial}\", title=\"Supported on {ess}\"] indicates a change to a supported https://www.elastic.co/guide/en/cloud/current/ec-add-user-settings.html[user setting] for Elasticsearch Service." + ess-skip-section: "If you use Elasticsearch Service, skip this section. Elasticsearch Service handles these changes for you." + api-cloud: "https://www.elastic.co/docs/api/doc/cloud" + api-ece: "https://www.elastic.co/docs/api/doc/cloud-enterprise" + api-kibana-serverless: "https://www.elastic.co/docs/api/doc/serverless" + es-feature-flag: "This feature is in development and not yet available for use. This documentation is provided for informational purposes only." + es-ref-dir: "'{{elasticsearch-root}}/docs/reference'" + apm-app: "APM app" + uptime-app: "Uptime app" + synthetics-app: "Synthetics app" + logs-app: "Logs app" + metrics-app: "Metrics app" + infrastructure-app: "Infrastructure app" + siem-app: "SIEM app" + security-app: "Elastic Security app" + ml-app: "Machine Learning" + dev-tools-app: "Dev Tools" + ingest-manager-app: "Ingest Manager" + stack-manage-app: "Stack Management" + stack-monitor-app: "Stack Monitoring" + alerts-ui: "Alerts and Actions" + rules-ui: "Rules" + rac-ui: "Rules and Connectors" + connectors-ui: "Connectors" + connectors-feature: "Actions and Connectors" + stack-rules-feature: "Stack Rules" + user-experience: "User Experience" + ems: "Elastic Maps Service" + ems-init: "EMS" + hosted-ems: "Elastic Maps Server" + ipm-app: "Index Pattern Management" + ingest-pipelines: "ingest pipelines" + ingest-pipelines-app: "Ingest Pipelines" + ingest-pipelines-cap: "Ingest pipelines" + ls-pipelines: "Logstash pipelines" + ls-pipelines-app: "Logstash Pipelines" + maint-windows: "maintenance windows" + maint-windows-app: "Maintenance Windows" + maint-windows-cap: "Maintenance windows" + custom-roles-app: "Custom Roles" + data-source: "data view" + data-sources: "data views" + data-source-caps: "Data View" + data-sources-caps: "Data Views" + data-source-cap: "Data view" + data-sources-cap: "Data views" + project-settings: "Project settings" + manage-app: "Management" + index-manage-app: "Index Management" + data-views-app: "Data Views" + rules-app: "Rules" + saved-objects-app: "Saved Objects" + tags-app: "Tags" + api-keys-app: "API keys" + transforms-app: "Transforms" + connectors-app: "Connectors" + files-app: "Files" + reports-app: "Reports" + maps-app: "Maps" + alerts-app: "Alerts" + crawler: "Enterprise Search web crawler" + ents: "Enterprise Search" + app-search-crawler: "App Search web crawler" + agent: "Elastic Agent" + agents: "Elastic Agents" + fleet: "Fleet" + fleet-server: "Fleet Server" + integrations-server: "Integrations Server" + ingest-manager: "Ingest Manager" + ingest-management: "ingest management" + package-manager: "Elastic Package Manager" + integrations: "Integrations" + package-registry: "Elastic Package Registry" + artifact-registry: "Elastic Artifact Registry" + aws: "AWS" + stack: "Elastic Stack" + xpack: "X-Pack" + es: "Elasticsearch" + kib: "Kibana" + esms: "Elastic Stack Monitoring Service" + esms-init: "ESMS" + ls: "Logstash" + beats: "Beats" + auditbeat: "Auditbeat" + filebeat: "Filebeat" + heartbeat: "Heartbeat" + metricbeat: "Metricbeat" + packetbeat: "Packetbeat" + winlogbeat: "Winlogbeat" + functionbeat: "Functionbeat" + journalbeat: "Journalbeat" + es-sql: "Elasticsearch SQL" + esql: "ES|QL" + elastic-agent: "Elastic Agent" + k8s: "Kubernetes" + log-driver-long: "Elastic Logging Plugin for Docker" + security: "X-Pack security" + security-features: "security features" + operator-feature: "operator privileges feature" + es-security-features: "Elasticsearch security features" + stack-security-features: "Elastic Stack security features" + endpoint-sec: "Endpoint Security" + endpoint-cloud-sec: "Endpoint and Cloud Security" + elastic-defend: "Elastic Defend" + elastic-sec: "Elastic Security" + elastic-endpoint: "Elastic Endpoint" + swimlane: "Swimlane" + sn: "ServiceNow" + sn-itsm: "ServiceNow ITSM" + sn-itom: "ServiceNow ITOM" + sn-sir: "ServiceNow SecOps" + jira: "Jira" + ibm-r: "IBM Resilient" + webhook: "Webhook" + webhook-cm: "Webhook - Case Management" + opsgenie: "Opsgenie" + bedrock: "Amazon Bedrock" + gemini: "Google Gemini" + hive: "TheHive" + monitoring: "X-Pack monitoring" + monitor-features: "monitoring features" + stack-monitor-features: "Elastic Stack monitoring features" + watcher: "Watcher" + alert-features: "alerting features" + reporting: "X-Pack reporting" + report-features: "reporting features" + graph: "X-Pack graph" + graph-features: "graph analytics features" + searchprofiler: "Search Profiler" + xpackml: "X-Pack machine learning" + ml: "machine learning" + ml-cap: "Machine learning" + ml-init: "ML" + ml-features: "machine learning features" + stack-ml-features: "Elastic Stack machine learning features" + ccr: "cross-cluster replication" + ccr-cap: "Cross-cluster replication" + ccr-init: "CCR" + ccs: "cross-cluster search" + ccs-cap: "Cross-cluster search" + ccs-init: "CCS" + ilm: "index lifecycle management" + ilm-cap: "Index lifecycle management" + ilm-init: "ILM" + dlm: "data lifecycle management" + dlm-cap: "Data lifecycle management" + dlm-init: "DLM" + search-snap: "searchable snapshot" + search-snaps: "searchable snapshots" + search-snaps-cap: "Searchable snapshots" + slm: "snapshot lifecycle management" + slm-cap: "Snapshot lifecycle management" + slm-init: "SLM" + rollup-features: "data rollup features" + ipm: "index pattern management" + ipm-cap: "Index pattern" + rollup: "rollup" + rollup-cap: "Rollup" + rollups: "rollups" + rollups-cap: "Rollups" + rollup-job: "rollup job" + rollup-jobs: "rollup jobs" + rollup-jobs-cap: "Rollup jobs" + dfeed: "datafeed" + dfeeds: "datafeeds" + dfeed-cap: "Datafeed" + dfeeds-cap: "Datafeeds" + ml-jobs: "machine learning jobs" + ml-jobs-cap: "Machine learning jobs" + anomaly-detect: "anomaly detection" + anomaly-detect-cap: "Anomaly detection" + anomaly-job: "anomaly detection job" + anomaly-jobs: "anomaly detection jobs" + anomaly-jobs-cap: "Anomaly detection jobs" + dataframe: "data frame" + dataframes: "data frames" + dataframe-cap: "Data frame" + dataframes-cap: "Data frames" + watcher-transform: "payload transform" + watcher-transforms: "payload transforms" + watcher-transform-cap: "Payload transform" + watcher-transforms-cap: "Payload transforms" + transform: "transform" + transforms: "transforms" + transform-cap: "Transform" + transforms-cap: "Transforms" + dataframe-transform: "transform" + dataframe-transform-cap: "Transform" + dataframe-transforms: "transforms" + dataframe-transforms-cap: "Transforms" + dfanalytics-cap: "Data frame analytics" + dfanalytics: "data frame analytics" + dataframe-analytics-config: "'{dataframe} analytics config'" + dfanalytics-job: "'{dataframe} analytics job'" + dfanalytics-jobs: "'{dataframe} analytics jobs'" + dfanalytics-jobs-cap: "'{dataframe-cap} analytics jobs'" + cdataframe: "continuous data frame" + cdataframes: "continuous data frames" + cdataframe-cap: "Continuous data frame" + cdataframes-cap: "Continuous data frames" + cdataframe-transform: "continuous transform" + cdataframe-transforms: "continuous transforms" + cdataframe-transforms-cap: "Continuous transforms" + ctransform: "continuous transform" + ctransform-cap: "Continuous transform" + ctransforms: "continuous transforms" + ctransforms-cap: "Continuous transforms" + oldetection: "outlier detection" + oldetection-cap: "Outlier detection" + olscore: "outlier score" + olscores: "outlier scores" + fiscore: "feature influence score" + evaluatedf-api: "evaluate {dataframe} analytics API" + evaluatedf-api-cap: "Evaluate {dataframe} analytics API" + binarysc: "binary soft classification" + binarysc-cap: "Binary soft classification" + regression: "regression" + regression-cap: "Regression" + reganalysis: "regression analysis" + reganalysis-cap: "Regression analysis" + depvar: "dependent variable" + feature-var: "feature variable" + feature-vars: "feature variables" + feature-vars-cap: "Feature variables" + classification: "classification" + classification-cap: "Classification" + classanalysis: "classification analysis" + classanalysis-cap: "Classification analysis" + infer-cap: "Inference" + infer: "inference" + lang-ident-cap: "Language identification" + lang-ident: "language identification" + data-viz: "Data Visualizer" + file-data-viz: "File Data Visualizer" + feat-imp: "feature importance" + feat-imp-cap: "Feature importance" + nlp: "natural language processing" + nlp-cap: "Natural language processing" + apm-agent: "APM agent" + apm-go-agent: "Elastic APM Go agent" + apm-go-agents: "Elastic APM Go agents" + apm-ios-agent: "Elastic APM iOS agent" + apm-ios-agents: "Elastic APM iOS agents" + apm-java-agent: "Elastic APM Java agent" + apm-java-agents: "Elastic APM Java agents" + apm-dotnet-agent: "Elastic APM .NET agent" + apm-dotnet-agents: "Elastic APM .NET agents" + apm-node-agent: "Elastic APM Node.js agent" + apm-node-agents: "Elastic APM Node.js agents" + apm-php-agent: "Elastic APM PHP agent" + apm-php-agents: "Elastic APM PHP agents" + apm-py-agent: "Elastic APM Python agent" + apm-py-agents: "Elastic APM Python agents" + apm-ruby-agent: "Elastic APM Ruby agent" + apm-ruby-agents: "Elastic APM Ruby agents" + apm-rum-agent: "Elastic APM Real User Monitoring (RUM) JavaScript agent" + apm-rum-agents: "Elastic APM RUM JavaScript agents" + apm-lambda-ext: "Elastic APM AWS Lambda extension" + project-monitors: "project monitors" + project-monitors-cap: "Project monitors" + private-location: "Private Location" + private-locations: "Private Locations" + pwd: "YOUR_PASSWORD" + esh: "ES-Hadoop" + default-dist: "default distribution" + oss-dist: "OSS-only distribution" + observability: "Observability" + api-request-title: "Request" + api-prereq-title: "Prerequisites" + api-description-title: "Description" + api-path-parms-title: "Path parameters" + api-query-parms-title: "Query parameters" + api-request-body-title: "Request body" + api-response-codes-title: "Response codes" + api-response-body-title: "Response body" + api-example-title: "Example" + api-examples-title: "Examples" + api-definitions-title: "Properties" + multi-arg: "†footnoteref:[multi-arg,This parameter accepts multiple arguments.]" + multi-arg-ref: "†footnoteref:[multi-arg]" + yes-icon: "image:https://doc-icons.s3.us-east-2.amazonaws.com/icon-yes.png[Yes,20,15]" + no-icon: "image:https://doc-icons.s3.us-east-2.amazonaws.com/icon-no.png[No,20,15]" + es-repo: "https://github.com/elastic/elasticsearch/" + es-issue: "https://github.com/elastic/elasticsearch/issues/" + es-pull: "https://github.com/elastic/elasticsearch/pull/" + es-commit: "https://github.com/elastic/elasticsearch/commit/" + kib-repo: "https://github.com/elastic/kibana/" + kib-issue: "https://github.com/elastic/kibana/issues/" + kibana-issue: "'{kib-repo}issues/'" + kib-pull: "https://github.com/elastic/kibana/pull/" + kibana-pull: "'{kib-repo}pull/'" + kib-commit: "https://github.com/elastic/kibana/commit/" + ml-repo: "https://github.com/elastic/ml-cpp/" + ml-issue: "https://github.com/elastic/ml-cpp/issues/" + ml-pull: "https://github.com/elastic/ml-cpp/pull/" + ml-commit: "https://github.com/elastic/ml-cpp/commit/" + apm-repo: "https://github.com/elastic/apm-server/" + apm-issue: "https://github.com/elastic/apm-server/issues/" + apm-pull: "https://github.com/elastic/apm-server/pull/" + kibana-blob: "https://github.com/elastic/kibana/blob/current/" + apm-get-started-ref: "https://www.elastic.co/guide/en/apm/get-started/current" + apm-server-ref: "https://www.elastic.co/guide/en/apm/server/current" + apm-server-ref-v: "https://www.elastic.co/guide/en/apm/server/current" + apm-server-ref-m: "https://www.elastic.co/guide/en/apm/server/master" + apm-server-ref-62: "https://www.elastic.co/guide/en/apm/server/6.2" + apm-server-ref-64: "https://www.elastic.co/guide/en/apm/server/6.4" + apm-server-ref-70: "https://www.elastic.co/guide/en/apm/server/7.0" + apm-overview-ref-v: "https://www.elastic.co/guide/en/apm/get-started/current" + apm-overview-ref-70: "https://www.elastic.co/guide/en/apm/get-started/7.0" + apm-overview-ref-m: "https://www.elastic.co/guide/en/apm/get-started/master" + infra-guide: "https://www.elastic.co/guide/en/infrastructure/guide/current" + a-data-source: "a data view" + icon-bug: "pass:[]" + icon-checkInCircleFilled: "pass:[]" + icon-warningFilled: "pass:[]" diff --git a/docs/guide/async.asciidoc b/docs/guide/async.asciidoc deleted file mode 100644 index 9f3c04acd..000000000 --- a/docs/guide/async.asciidoc +++ /dev/null @@ -1,141 +0,0 @@ -[[async]] -== Using with asyncio - -The `elasticsearch` package supports async/await with -https://docs.python.org/3/library/asyncio.html[asyncio] and -https://docs.aiohttp.org[aiohttp]. You can either install `aiohttp` -directly or use the `[async]` extra: - -[source,bash] ----- -$ python -m pip install elasticsearch aiohttp - -# - OR - - -$ python -m pip install elasticsearch[async] ----- - -[discrete] -=== Getting Started with Async - -After installation all async API endpoints are available via -`~elasticsearch.AsyncElasticsearch` and are used in the same way as -other APIs, with an extra `await`: - -[source,python] ----- -import asyncio -from elasticsearch import AsyncElasticsearch - -client = AsyncElasticsearch() - -async def main(): - resp = await client.search( - index="documents", - body={"query": {"match_all": {}}}, - size=20, - ) - print(resp) - -loop = asyncio.get_event_loop() -loop.run_until_complete(main()) ----- - -All APIs that are available under the sync client are also available -under the async client. - -https://elasticsearch-py.readthedocs.io/en/latest/async.html#api-reference[Reference documentation] - -[discrete] -=== ASGI Applications and Elastic APM - -https://asgi.readthedocs.io[ASGI] (Asynchronous Server Gateway -Interface) is a way to serve Python web applications making use of -async I/O to achieve better performance. Some examples of ASGI -frameworks include FastAPI, Django 3.0+, and Starlette. If you're -using one of these frameworks along with Elasticsearch then you should -be using `~elasticsearch.AsyncElasticsearch` to avoid blocking the event -loop with synchronous network calls for optimal performance. - -https://www.elastic.co/guide/en/apm/agent/python/current/index.html[Elastic -APM] also supports tracing of async Elasticsearch queries just the same -as synchronous queries. For an example on how to configure -`AsyncElasticsearch` with a popular ASGI framework -https://fastapi.tiangolo.com/[FastAPI] and APM tracing there is a -https://github.com/elastic/elasticsearch-py/tree/master/examples/fastapi-apm[pre-built -example] in the `examples/fastapi-apm` directory. - -See also the <> page. - -[discrete] -=== Frequently Asked Questions - -[discrete] -==== ValueError when initializing `AsyncElasticsearch`? - -If when trying to use `AsyncElasticsearch` you receive -`ValueError: You must have 'aiohttp' installed to use AiohttpHttpNode` -you should ensure that you have `aiohttp` installed in your environment -(check with `$ python -m pip freeze | grep aiohttp`). Otherwise, -async support won't be available. - -[discrete] -==== What about the `elasticsearch-async` package? - -Previously asyncio was supported separately via the -https://github.com/elastic/elasticsearch-py-async[elasticsearch-async] -package. The `elasticsearch-async` package has been deprecated in favor -of `AsyncElasticsearch` provided by the `elasticsearch` package in v7.8 -and onwards. - -[discrete] -==== Receiving 'Unclosed client session / connector' warning? - -This warning is created by `aiohttp` when an open HTTP connection is -garbage collected. You'll typically run into this when closing your -application. To resolve the issue ensure that -`~elasticsearch.AsyncElasticsearch.close` is called before the -`~elasticsearch.AsyncElasticsearch` instance is garbage collected. - -For example if using FastAPI that might look like this: - -[source,python] ----- -import os -from contextlib import asynccontextmanager - -from fastapi import FastAPI -from elasticsearch import AsyncElasticsearch - -ELASTICSEARCH_URL = os.environ["ELASTICSEARCH_URL"] -client = None - -@asynccontextmanager -async def lifespan(app: FastAPI): - global client - client = AsyncElasticsearch(ELASTICSEARCH_URL) - yield - await client.close() - -app = FastAPI(lifespan=lifespan) - -@app.get("/") -async def main(): - return await client.info() ----- - -You can run this example by saving it to `main.py` and executing -`ELASTICSEARCH_URL=http://localhost:9200 uvicorn main:app`. - -[discrete] -=== Async Helpers - -Async variants of all helpers are available in `elasticsearch.helpers` -and are all prefixed with `async_*`. You'll notice that these APIs -are identical to the ones in the sync <> documentation. - -All async helpers that accept an iterator or generator also accept async -iterators and async generators. - -https://elasticsearch-py.readthedocs.io/en/latest/async.html#async-helpers[Reference documentation] - diff --git a/docs/guide/connecting.asciidoc b/docs/guide/connecting.asciidoc deleted file mode 100644 index 101bded51..000000000 --- a/docs/guide/connecting.asciidoc +++ /dev/null @@ -1,438 +0,0 @@ -[[connecting]] -== Connecting - -This page contains the information you need to connect the Client with {es}. - -[discrete] -[[connect-ec]] -=== Connecting to Elastic Cloud - -https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html[Elastic Cloud] -is the easiest way to get started with {es}. When connecting to Elastic Cloud -with the Python {es} client you should always use the `cloud_id` -parameter to connect. You can find this value within the "Manage Deployment" -page after you've created a cluster (look in the top-left if you're in Kibana). - -We recommend using a Cloud ID whenever possible because your client will be -automatically configured for optimal use with Elastic Cloud including HTTPS and -HTTP compression. - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Password for the 'elastic' user generated by Elasticsearch -ELASTIC_PASSWORD = "" - -# Found in the 'Manage Deployment' page -CLOUD_ID = "deployment-name:dXMtZWFzdDQuZ2Nw..." - -# Create the client instance -client = Elasticsearch( - cloud_id=CLOUD_ID, - basic_auth=("elastic", ELASTIC_PASSWORD) -) - -# Successful response! -client.info() -# {'name': 'instance-0000000000', 'cluster_name': ...} ----- - -[discrete] -[[connect-self-managed-new]] -=== Connecting to a self-managed cluster - -By default {es} will start with security features like authentication and TLS -enabled. To connect to the {es} cluster you'll need to configure the Python {es} -client to use HTTPS with the generated CA certificate in order to make requests -successfully. - -If you're just getting started with {es} we recommend reading the documentation -on https://www.elastic.co/guide/en/elasticsearch/reference/current/settings.html[configuring] -and -https://www.elastic.co/guide/en/elasticsearch/reference/current/starting-elasticsearch.html[starting {es}] -to ensure your cluster is running as expected. - -When you start {es} for the first time you'll see a distinct block like the one -below in the output from {es} (you may have to scroll up if it's been a while): - -```sh ----------------------------------------------------------------- --> Elasticsearch security features have been automatically configured! --> Authentication is enabled and cluster connections are encrypted. - --> Password for the elastic user (reset with `bin/elasticsearch-reset-password -u elastic`): - lhQpLELkjkrawaBoaz0Q - --> HTTP CA certificate SHA-256 fingerprint: - a52dd93511e8c6045e21f16654b77c9ee0f34aea26d9f40320b531c474676228 -... ----------------------------------------------------------------- -``` - -Note down the `elastic` user password and HTTP CA fingerprint for the next -sections. In the examples below they will be stored in the variables -`ELASTIC_PASSWORD` and `CERT_FINGERPRINT` respectively. - -Depending on the circumstances there are two options for verifying the HTTPS -connection, either verifying with the CA certificate itself or via the HTTP CA -certificate fingerprint. - -[discrete] -==== Verifying HTTPS with CA certificates - -Using the `ca_certs` option is the default way the Python {es} client verifies -an HTTPS connection. - -The generated root CA certificate can be found in the `certs` directory in your -{es} config location (`$ES_CONF_PATH/certs/http_ca.crt`). If you're running {es} -in Docker there is -https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html[additional documentation for retrieving the CA certificate]. - -Once you have the `http_ca.crt` file somewhere accessible pass the path to the -client via `ca_certs`: - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Password for the 'elastic' user generated by Elasticsearch -ELASTIC_PASSWORD = "" - -# Create the client instance -client = Elasticsearch( - "https://localhost:9200", - ca_certs="/path/to/http_ca.crt", - basic_auth=("elastic", ELASTIC_PASSWORD) -) - -# Successful response! -client.info() -# {'name': 'instance-0000000000', 'cluster_name': ...} ----- - -NOTE: If you don't specify `ca_certs` or `ssl_assert_fingerprint` then the -https://certifiio.readthedocs.io[certifi package] will be used for `ca_certs` by -default if available. - -[discrete] -==== Verifying HTTPS with certificate fingerprints (Python 3.10 or later) - -NOTE: Using this method **requires using Python 3.10 or later** and isn't -available when using the `aiohttp` HTTP client library so can't be used with -`AsyncElasticsearch`. - -This method of verifying the HTTPS connection takes advantage of the certificate -fingerprint value noted down earlier. Take this SHA256 fingerprint value and -pass it to the Python {es} client via `ssl_assert_fingerprint`: - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Fingerprint either from Elasticsearch startup or above script. -# Colons and uppercase/lowercase don't matter when using -# the 'ssl_assert_fingerprint' parameter -CERT_FINGERPRINT = "A5:2D:D9:35:11:E8:C6:04:5E:21:F1:66:54:B7:7C:9E:E0:F3:4A:EA:26:D9:F4:03:20:B5:31:C4:74:67:62:28" - -# Password for the 'elastic' user generated by Elasticsearch -ELASTIC_PASSWORD = "" - -client = Elasticsearch( - "https://localhost:9200", - ssl_assert_fingerprint=CERT_FINGERPRINT, - basic_auth=("elastic", ELASTIC_PASSWORD) -) - -# Successful response! -client.info() -# {'name': 'instance-0000000000', 'cluster_name': ...} ----- - -The certificate fingerprint can be calculated using `openssl x509` with the -certificate file: - -[source,sh] ----- -openssl x509 -fingerprint -sha256 -noout -in /path/to/http_ca.crt ----- - -If you don't have access to the generated CA file from {es} you can use the -following script to output the root CA fingerprint of the {es} instance with -`openssl s_client`: - -[source,sh] ----- -# Replace the values of 'localhost' and '9200' to the -# corresponding host and port values for the cluster. -openssl s_client -connect localhost:9200 -servername localhost -showcerts /dev/null \ - | openssl x509 -fingerprint -sha256 -noout -in /dev/stdin ----- - -The output of `openssl x509` will look something like this: - -[source,sh] ----- -SHA256 Fingerprint=A5:2D:D9:35:11:E8:C6:04:5E:21:F1:66:54:B7:7C:9E:E0:F3:4A:EA:26:D9:F4:03:20:B5:31:C4:74:67:62:28 ----- - - -[discrete] -[[connect-no-security]] -=== Connecting without security enabled - -WARNING: Running {es} without security enabled is not recommended. - -If your cluster is configured with -https://www.elastic.co/guide/en/elasticsearch/reference/current/security-settings.html[security explicitly disabled] -then you can connect via HTTP: - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Create the client instance -client = Elasticsearch("http://localhost:9200") - -# Successful response! -client.info() -# {'name': 'instance-0000000000', 'cluster_name': ...} ----- - -[discrete] -[[connect-url]] -=== Connecting to multiple nodes - -The Python {es} client supports sending API requests to multiple nodes in the -cluster. This means that work will be more evenly spread across the cluster -instead of hammering the same node over and over with requests. To configure the -client with multiple nodes you can pass a list of URLs, each URL will be used as -a separate node in the pool. - -[source,python] ----- -from elasticsearch import Elasticsearch - -# List of nodes to connect use with different hosts and ports. -NODES = [ - "https://localhost:9200", - "https://localhost:9201", - "https://localhost:9202", -] - -# Password for the 'elastic' user generated by Elasticsearch -ELASTIC_PASSWORD = "" - -client = Elasticsearch( - NODES, - ca_certs="/path/to/http_ca.crt", - basic_auth=("elastic", ELASTIC_PASSWORD) -) ----- - -By default nodes are selected using round-robin, but alternate node selection -strategies can be configured with `node_selector_class` parameter. - -NOTE: If your {es} cluster is behind a load balancer like when using Elastic -Cloud you won't need to configure multiple nodes. Instead use the load balancer -host and port. - - -[discrete] -[[authentication]] -=== Authentication - -This section contains code snippets to show you how to connect to various {es} -providers. All authentication methods are supported on the client constructor -or via the per-request `.options()` method: - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Authenticate from the constructor -client = Elasticsearch( - "https://localhost:9200", - ca_certs="/path/to/http_ca.crt", - basic_auth=("username", "password") -) - -# Authenticate via the .options() method: -client.options( - basic_auth=("username", "password") -).indices.get(index="*") - -# You can persist the authenticated client to use -# later or use for multiple API calls: -auth_client = client.options(api_key="api_key") -for i in range(10): - auth_client.index( - index="example-index", - document={"field": i} - ) ----- - - -[discrete] -[[auth-basic]] -==== HTTP Basic authentication (Username and Password) - -HTTP Basic authentication uses the `basic_auth` parameter by passing in a -username and password within a tuple: - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Adds the HTTP header 'Authorization: Basic ' -client = Elasticsearch( - "https://localhost:9200", - ca_certs="/path/to/http_ca.crt", - basic_auth=("username", "password") -) ----- - - -[discrete] -[[auth-bearer]] -==== HTTP Bearer authentication - -HTTP Bearer authentication uses the `bearer_auth` parameter by passing the token -as a string. This authentication method is used by -https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/security-api-create-service-token.html[Service Account Tokens] -and https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/security-api-get-token.html[Bearer Tokens]. - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Adds the HTTP header 'Authorization: Bearer token-value' -client = Elasticsearch( - "https://localhost:9200", - bearer_auth="token-value" -) ----- - - -[discrete] -[[auth-apikey]] -==== API Key authentication - -You can configure the client to use {es}'s API Key for connecting to your -cluster. These can be generated through the -https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Elasticsearch Create API key API] -or https://www.elastic.co/guide/en/kibana/current/api-keys.html#create-api-key[Kibana Stack Management]. - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Adds the HTTP header 'Authorization: ApiKey ' -client = Elasticsearch( - "https://localhost:9200", - ca_certs="/path/to/http_ca.crt", - api_key="api_key", -) ----- - -[discrete] -[[compatibility-mode]] -=== Enabling the Compatibility Mode - -The {es} server version 8.0 is introducing a new compatibility mode that allows -you a smoother upgrade experience from 7 to 8. In a nutshell, you can use the -latest 7.x Python {es} {es} client with an 8.x {es} server, giving more room to -coordinate the upgrade of your codebase to the next major version. - -If you want to leverage this functionality, please make sure that you are using -the latest 7.x Python {es} client and set the environment variable -`ELASTIC_CLIENT_APIVERSIONING` to `true`. The client is handling the rest -internally. For every 8.0 and beyond Python {es} client, you're all set! The -compatibility mode is enabled by default. - -[discrete] -[[connecting-faas]] -=== Using the Client in a Function-as-a-Service Environment - -This section illustrates the best practices for leveraging the {es} client in a -Function-as-a-Service (FaaS) environment. - -The most influential optimization is to initialize the client outside of the -function, the global scope. - -This practice does not only improve performance but also enables background -functionality as – for example – -https://www.elastic.co/blog/elasticsearch-sniffing-best-practices-what-when-why-how[sniffing]. -The following examples provide a skeleton for the best practices. - -IMPORTANT: The async client shouldn't be used within Function-as-a-Service as a new event - loop must be started for each invocation. Instead the synchronous `Elasticsearch` - client is recommended. - -[discrete] -[[connecting-faas-gcp]] -==== GCP Cloud Functions - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Client initialization -client = Elasticsearch( - cloud_id="deployment-name:ABCD...", - api_key=... -) - -def main(request): - # Use the client - client.search(index=..., query={"match_all": {}}) - ----- - -[discrete] -[[connecting-faas-aws]] -==== AWS Lambda - -[source,python] ----- -from elasticsearch import Elasticsearch - -# Client initialization -client = Elasticsearch( - cloud_id="deployment-name:ABCD...", - api_key=... -) - -def main(event, context): - # Use the client - client.search(index=..., query={"match_all": {}}) - ----- - -[discrete] -[[connecting-faas-azure]] -==== Azure Functions - -[source,python] ----- -import azure.functions as func -from elasticsearch import Elasticsearch - -# Client initialization -client = Elasticsearch( - cloud_id="deployment-name:ABCD...", - api_key=... -) - -def main(request: func.HttpRequest) -> func.HttpResponse: - # Use the client - client.search(index=..., query={"match_all": {}}) - ----- - -Resources used to assess these recommendations: - -* https://cloud.google.com/functions/docs/bestpractices/tips#use_global_variables_to_reuse_objects_in_future_invocations[GCP Cloud Functions: Tips & Tricks] -* https://docs.aws.amazon.com/lambda/latest/dg/best-practices.html[Best practices for working with AWS Lambda functions] -* https://docs.microsoft.com/en-us/azure/azure-functions/functions-reference-python?tabs=azurecli-linux%2Capplication-level#global-variables[Azure Functions Python developer guide] -* https://docs.aws.amazon.com/lambda/latest/operatorguide/global-scope.html[AWS Lambda: Comparing the effect of global scope] diff --git a/docs/guide/dsl/asyncio.asciidoc b/docs/guide/dsl/asyncio.asciidoc deleted file mode 100644 index ff7799dfb..000000000 --- a/docs/guide/dsl/asyncio.asciidoc +++ /dev/null @@ -1,103 +0,0 @@ -[[asyncio]] -==== Using asyncio with Elasticsearch Python DSL - -The DSL module supports async/await with -https://docs.python.org/3/library/asyncio.html[asyncio]. To ensure that -you have all the required dependencies, install the `[async]` -extra: - -[source,bash] ----- -$ python -m pip install "elasticsearch[async]" ----- - -===== Connections - -Use the `async_connections` module to manage your asynchronous -connections. - -[source,python] ----- -from elasticsearch.dsl import async_connections - -async_connections.create_connection(hosts=['localhost'], timeout=20) ----- - -All the options available in the `connections` module can be used with -`async_connections`. - -====== How to avoid 'Unclosed client session / connector' warnings on exit - -These warnings come from the `aiohttp` package, which is used internally -by the `AsyncElasticsearch` client. They appear often when the -application exits and are caused by HTTP connections that are open when -they are garbage collected. To avoid these warnings, make sure that you -close your connections. - -[source,python] ----- -es = async_connections.get_connection() -await es.close() ----- - -===== Search DSL - -Use the `AsyncSearch` class to perform asynchronous searches. - -[source,python] ----- -from elasticsearch.dsl import AsyncSearch - -s = AsyncSearch().query("match", title="python") -async for hit in s: - print(hit.title) ----- - -Instead of using the `AsyncSearch` object as an asynchronous iterator, -you can explicitly call the `execute()` method to get a `Response` -object. - -[source,python] ----- -s = AsyncSearch().query("match", title="python") -response = await s.execute() -for hit in response: - print(hit.title) ----- - -An `AsyncMultiSearch` is available as well. - -[source,python] ----- -from elasticsearch.dsl import AsyncMultiSearch - -ms = AsyncMultiSearch(index='blogs') - -ms = ms.add(AsyncSearch().filter('term', tags='python')) -ms = ms.add(AsyncSearch().filter('term', tags='elasticsearch')) - -responses = await ms.execute() - -for response in responses: - print("Results for query %r." % response.search.query) - for hit in response: - print(hit.title) ----- - -===== Asynchronous Documents, Indexes, and more - -The `Document`, `Index`, `IndexTemplate`, `Mapping`, `UpdateByQuery` and -`FacetedSearch` classes all have asynchronous versions that use the same -name with an `Async` prefix. These classes expose the same interfaces as -the synchronous versions, but any methods that perform I/O are defined -as coroutines. - -Auxiliary classes that do not perform I/O do not have asynchronous -versions. The same classes can be used in synchronous and asynchronous -applications. - -When using a custom analyzer in an asynchronous -application, use the `async_simulate()` method to invoke the Analyze -API on it. - -Consult the `api` section for details about each specific method. diff --git a/docs/guide/dsl/configuration.asciidoc b/docs/guide/dsl/configuration.asciidoc deleted file mode 100644 index c8cb2b4f0..000000000 --- a/docs/guide/dsl/configuration.asciidoc +++ /dev/null @@ -1,125 +0,0 @@ -=== Configuration - -There are several ways to configure connections for the library. The -easiest and most useful approach is to define one default connection -that can be used every time an API call is made without explicitly -passing in other connections. - -[NOTE] -==== -Unless you want to access multiple clusters from your application, it is -highly recommended that you use the `create_connection` method and -all operations will use that connection automatically. -==== - -==== Default connection - -To define a default connection that can be used globally, use the -`connections` module and the `create_connection` method like this: - -[source,python] ----- -from elasticsearch.dsl import connections - -connections.create_connection(hosts=['localhost'], timeout=20) ----- - -===== Single connection with an alias - -You can define the `alias` or name of a connection so you can easily -refer to it later. The default value for `alias` is `default`. - -[source,python] ----- -from elasticsearch.dsl import connections - -connections.create_connection(alias='my_new_connection', hosts=['localhost'], timeout=60) ----- - -Additional keyword arguments (`hosts` and `timeout` in our example) will -be passed to the `Elasticsearch` class from `elasticsearch-py`. - -To see all possible configuration options refer to the -https://elasticsearch-py.readthedocs.io/en/latest/api/elasticsearch.html[documentation]. - -==== Multiple clusters - -You can define multiple connections to multiple clusters at the same -time using the `configure` method: - -[source,python] ----- -from elasticsearch.dsl import connections - -connections.configure( - default={'hosts': 'localhost'}, - dev={ - 'hosts': ['esdev1.example.com:9200'], - 'sniff_on_start': True - } -) ----- - -Such connections will be constructed lazily when requested for the first -time. - -You can alternatively define multiple connections by adding them one by -one as shown in the following example: - -[source,python] ----- -# if you have configuration options to be passed to Elasticsearch.__init__ -# this also shows creating a connection with the alias 'qa' -connections.create_connection('qa', hosts=['esqa1.example.com'], sniff_on_start=True) - -# if you already have an Elasticsearch instance ready -connections.add_connection('another_qa', my_client) ----- - -===== Using aliases - -When using multiple connections, you can refer to them using the string -alias specified when you created the connection. - -This example shows how to use an alias to a connection: - -[source,python] ----- -s = Search(using='qa') ----- - -A `KeyError` will be raised if there is no connection registered with -that alias. - -==== Manual - -If you don't want to supply a global configuration, you can always pass -in your own connection as an instance of `elasticsearch.Elasticsearch` -with the parameter `using` wherever it is accepted like this: - -[source,python] ----- -s = Search(using=Elasticsearch('localhost')) ----- - -You can even use this approach to override any connection the object -might be already associated with: - -[source,python] ----- -s = s.using(Elasticsearch('otherhost:9200')) ----- - -[NOTE] -==== -When using the `dsl` module, it is highly recommended that you -use the built-in serializer -(`elasticsearch.dsl.serializer.serializer`) to ensure your objects -are correctly serialized into `JSON` every time. The -`create_connection` method that is described here (and that the -`configure` method uses under the hood) will do that automatically for -you, unless you explicitly specify your own serializer. The built-in -serializer also allows you to serialize your own objects - just define a -`to_dict()` method on your objects and that method will be -automatically called when serializing your custom objects to `JSON`. -==== diff --git a/docs/guide/dsl/examples.asciidoc b/docs/guide/dsl/examples.asciidoc deleted file mode 100644 index 5d22f84e5..000000000 --- a/docs/guide/dsl/examples.asciidoc +++ /dev/null @@ -1,5 +0,0 @@ -=== Examples - -Please see the -https://github.com/elastic/elasticsearch-py/tree/master/examples/dsl[DSL examples] -directory to see some complex examples using the DSL module. diff --git a/docs/guide/dsl/faceted_search.asciidoc b/docs/guide/dsl/faceted_search.asciidoc deleted file mode 100644 index 6d05cae2d..000000000 --- a/docs/guide/dsl/faceted_search.asciidoc +++ /dev/null @@ -1,145 +0,0 @@ -[[faceted_search]] -==== Faceted Search - -The library comes with a simple abstraction aimed at helping you develop -faceted navigation for your data. - -[NOTE] -==== -This API is experimental and will be subject to change. Any feedback is -welcome. -==== - -===== Configuration - -You can provide several configuration options (as class attributes) when -declaring a `FacetedSearch` subclass: - -- `index`: - the name of the index (as string) to search through, defaults to - `'_all'`. -- `doc_types`: - list of `Document` subclasses or strings to be used, defaults to - `['_all']`. -- `fields`: - list of fields on the document type to search through. The list will - be passes to `MultiMatch` query so can contain boost values - (`'title^5'`), defaults to `['*']`. -- `facets`: - dictionary of facets to display/filter on. The key is the name - displayed and values should be instances of any `Facet` subclass, for - example: `{'tags': TermsFacet(field='tags')}` - -====== Facets - -There are several different facets available: - -- `TermsFacet`: - provides an option to split documents into groups based on a value of - a field, for example `TermsFacet(field='category')` -- `DateHistogramFacet`: - split documents into time intervals, example: - `DateHistogramFacet(field="published_date", calendar_interval="day")` -- `HistogramFacet`: - similar to `DateHistogramFacet` but for numerical values: - `HistogramFacet(field="rating", interval=2)` -- `RangeFacet`: - allows you to define your own ranges for a numerical fields: - `RangeFacet(field="comment_count", ranges=[("few", (None, 2)), ("lots", (2, None))])` -- `NestedFacet`: - is just a simple facet that wraps another to provide access to nested - documents: - `NestedFacet('variants', TermsFacet(field='variants.color'))` - -By default facet results will only calculate document count, if you wish -for a different metric you can pass in any single value metric -aggregation as the `metric` kwarg -(`TermsFacet(field='tags', metric=A('max', field=timestamp))`). When -specifying `metric` the results will be, by default, sorted in -descending order by that metric. To change it to ascending specify -`metric_sort="asc"` and to just sort by document count use -`metric_sort=False`. - -====== Advanced - -If you require any custom behavior or modifications simply override one -or more of the methods responsible for the class' functions: - -- `search(self)`: - is responsible for constructing the `Search` object used. Override - this if you want to customize the search object (for example by adding - a global filter for published articles only). -- `query(self, search)`: - adds the query position of the search (if search input specified), by - default using `MultiField` query. Override this if you want to modify - the query type used. -- `highlight(self, search)`: - defines the highlighting on the `Search` object and returns a new one. - Default behavior is to highlight on all fields specified for search. - -===== Usage - -The custom subclass can be instantiated empty to provide an empty search -(matching everything) or with `query`, `filters` and `sort`. - -- `query`: - is used to pass in the text of the query to be performed. If `None` is - passed in (default) a `MatchAll` query will be used. For example - `'python web'` -- `filters`: - is a dictionary containing all the facet filters that you wish to - apply. Use the name of the facet (from `.facets` attribute) as the key - and one of the possible values as value. For example - `{'tags': 'python'}`. -- `sort`: - is a tuple or list of fields on which the results should be sorted. - The format of the individual fields are to be the same as those passed - to `~elasticsearch.dsl.Search.sort`. - -====== Response - -the response returned from the `FacetedSearch` object (by calling -`.execute()`) is a subclass of the standard `Response` class that adds a -property called `facets` which contains a dictionary with lists of -buckets -each represented by a tuple of key, document count and a flag -indicating whether this value has been filtered on. - -===== Example - -[source,python] ----- -from datetime import date - -from elasticsearch.dsl import FacetedSearch, TermsFacet, DateHistogramFacet - -class BlogSearch(FacetedSearch): - doc_types = [Article, ] - # fields that should be searched - fields = ['tags', 'title', 'body'] - - facets = { - # use bucket aggregations to define facets - 'tags': TermsFacet(field='tags'), - 'publishing_frequency': DateHistogramFacet(field='published_from', interval='month') - } - - def search(self): - # override methods to add custom pieces - s = super().search() - return s.filter('range', publish_from={'lte': 'now/h'}) - -bs = BlogSearch('python web', {'publishing_frequency': date(2015, 6)}) -response = bs.execute() - -# access hits and other attributes as usual -total = response.hits.total -print('total hits', total.relation, total.value) -for hit in response: - print(hit.meta.score, hit.title) - -for (tag, count, selected) in response.facets.tags: - print(tag, ' (SELECTED):' if selected else ':', count) - -for (month, count, selected) in response.facets.publishing_frequency: - print(month.strftime('%B %Y'), ' (SELECTED):' if selected else ':', count) ----- diff --git a/docs/guide/dsl/howto.asciidoc b/docs/guide/dsl/howto.asciidoc deleted file mode 100644 index 3328234bb..000000000 --- a/docs/guide/dsl/howto.asciidoc +++ /dev/null @@ -1,7 +0,0 @@ -=== How-To Guides - -include::search_dsl.asciidoc[] -include::persistence.asciidoc[] -include::faceted_search.asciidoc[] -include::update_by_query.asciidoc[] -include::asyncio.asciidoc[] diff --git a/docs/guide/dsl/persistence.asciidoc b/docs/guide/dsl/persistence.asciidoc deleted file mode 100644 index ff478dadf..000000000 --- a/docs/guide/dsl/persistence.asciidoc +++ /dev/null @@ -1,761 +0,0 @@ -==== Persistence - -You can use the DSL module to define your mappings and a basic -persistent layer for your application. - -For more comprehensive examples have a look at the -https://github.com/elastic/elasticsearch-py/tree/main/examples/dsl[DSL examples] -directory in the repository. - -[[doc_type]] -===== Document - -If you want to create a model-like wrapper around your documents, use -the `Document` class. It can also be used to create all the necessary -mappings and settings in elasticsearch (see `life-cycle` for details). - -[source,python] ----- -from datetime import datetime -from elasticsearch.dsl import Document, Date, Nested, Boolean, \ - analyzer, InnerDoc, Completion, Keyword, Text - -html_strip = analyzer('html_strip', - tokenizer="standard", - filter=["standard", "lowercase", "stop", "snowball"], - char_filter=["html_strip"] -) - -class Comment(InnerDoc): - author = Text(fields={'raw': Keyword()}) - content = Text(analyzer='snowball') - created_at = Date() - - def age(self): - return datetime.now() - self.created_at - -class Post(Document): - title = Text() - title_suggest = Completion() - created_at = Date() - published = Boolean() - category = Text( - analyzer=html_strip, - fields={'raw': Keyword()} - ) - - comments = Nested(Comment) - - class Index: - name = 'blog' - - def add_comment(self, author, content): - self.comments.append( - Comment(author=author, content=content, created_at=datetime.now())) - - def save(self, ** kwargs): - self.created_at = datetime.now() - return super().save(** kwargs) ----- - -====== Data types - -The `Document` instances use native python types like `str` and -`datetime`. In case of `Object` or `Nested` fields an instance of the -`InnerDoc` subclass is used, as in the `add_comment` method in the -above example where we are creating an instance of the `Comment` class. - -There are some specific types that were created as part of this library -to make working with some field types easier, for example the `Range` -object used in any of the -https://www.elastic.co/guide/en/elasticsearch/reference/current/range.html[range -fields]: - -[source,python] ----- -from elasticsearch.dsl import Document, DateRange, Keyword, Range - -class RoomBooking(Document): - room = Keyword() - dates = DateRange() - - -rb = RoomBooking( - room='Conference Room II', - dates=Range( - gte=datetime(2018, 11, 17, 9, 0, 0), - lt=datetime(2018, 11, 17, 10, 0, 0) - ) -) - -# Range supports the in operator correctly: -datetime(2018, 11, 17, 9, 30, 0) in rb.dates # True - -# you can also get the limits and whether they are inclusive or exclusive: -rb.dates.lower # datetime(2018, 11, 17, 9, 0, 0), True -rb.dates.upper # datetime(2018, 11, 17, 10, 0, 0), False - -# empty range is unbounded -Range().lower # None, False ----- - -====== Python Type Hints - -Document fields can be defined using standard Python type hints if -desired. Here are some simple examples: - -[source,python] ----- -from typing import Optional - -class Post(Document): - title: str # same as title = Text(required=True) - created_at: Optional[datetime] # same as created_at = Date(required=False) - published: bool # same as published = Boolean(required=True) ----- - -It is important to note that when using `Field` subclasses such as -`Text`, `Date` and `Boolean`, they must be given in the right-side of an -assignment, as shown in examples above. Using these classes as type -hints will result in errors. - -Python types are mapped to their corresponding field type according to -the following table: - -.Python type to DSL field mappings -[cols=",",options="header",] -|=== -|Python type |DSL field -|`str` |`Text(required=True)` -|`bool` |`Boolean(required=True)` -|`int` |`Integer(required=True)` -|`float` |`Float(required=True)` -|`bytes` |`Binary(required=True)` -|`datetime` |`Date(required=True)` -|`date` |`Date(format="yyyy-MM-dd", required=True)` -|=== - -To type a field as optional, the standard `Optional` modifier from the -Python `typing` package can be used. When using Python 3.10 or newer, -"pipe" syntax can also be used, by adding `| None` to a type. The -`List` modifier can be added to a field to convert it to an array, -similar to using the `multi=True` argument on the field object. - -[source,python] ----- -from typing import Optional, List - -class MyDoc(Document): - pub_date: Optional[datetime] # same as pub_date = Date() - middle_name: str | None # same as middle_name = Text() - authors: List[str] # same as authors = Text(multi=True, required=True) - comments: Optional[List[str]] # same as comments = Text(multi=True) ----- - -A field can also be given a type hint of an `InnerDoc` subclass, in -which case it becomes an `Object` field of that class. When the -`InnerDoc` subclass is wrapped with `List`, a `Nested` field is created -instead. - -[source,python] ----- -from typing import List - -class Address(InnerDoc): - ... - -class Comment(InnerDoc): - ... - -class Post(Document): - address: Address # same as address = Object(Address, required=True) - comments: List[Comment] # same as comments = Nested(Comment, required=True) ----- - -Unfortunately it is impossible to have Python type hints that uniquely -identify every possible Elasticsearch field type. To choose a field type -that is different than the ones in the table above, the field instance -can be added explicitly as a right-side assignment in the field -declaration. The next example creates a field that is typed as -`Optional[str]`, but is mapped to `Keyword` instead of `Text`: - -[source,python] ----- -class MyDocument(Document): - category: Optional[str] = Keyword() ----- - -This form can also be used when additional options need to be given to -initialize the field, such as when using custom analyzer settings or -changing the `required` default: - -[source,python] ----- -class Comment(InnerDoc): - content: str = Text(analyzer='snowball', required=True) ----- - -When using type hints as above, subclasses of `Document` and `InnerDoc` -inherit some of the behaviors associated with Python dataclasses, as -defined by https://peps.python.org/pep-0681/[PEP 681] and the -https://typing.readthedocs.io/en/latest/spec/dataclasses.html#dataclass-transform[dataclass_transform -decorator]. To add per-field dataclass options such as `default` or -`default_factory`, the `mapped_field()` wrapper can be used on -the right side of a typed field declaration: - -[source,python] ----- -class MyDocument(Document): - title: str = mapped_field(default="no title") - created_at: datetime = mapped_field(default_factory=datetime.now) - published: bool = mapped_field(default=False) - category: str = mapped_field(Keyword(required=True), default="general") ----- - -When using the `mapped_field()` wrapper function, an explicit field -type instance can be passed as a first positional argument, as the -`category` field does in the example above. - -Static type checkers such as https://mypy-lang.org/[mypy] and -https://github.com/microsoft/pyright[pyright] can use the type hints and -the dataclass-specific options added to the `mapped_field()` -function to improve type inference and provide better real-time -suggestions in IDEs. - -One situation in which type checkers can't infer the correct type is -when using fields as class attributes. Consider the following example: - -[source,python] ----- -class MyDocument(Document): - title: str - -doc = MyDocument() -# doc.title is typed as "str" (correct) -# MyDocument.title is also typed as "str" (incorrect) ----- - -To help type checkers correctly identify class attributes as such, the -`M` generic must be used as a wrapper to the type hint, as shown in the -next examples: - -[source,python] ----- -from elasticsearch.dsl import M - -class MyDocument(Document): - title: M[str] - created_at: M[datetime] = mapped_field(default_factory=datetime.now) - -doc = MyDocument() -# doc.title is typed as "str" -# doc.created_at is typed as "datetime" -# MyDocument.title is typed as "InstrumentedField" -# MyDocument.created_at is typed as "InstrumentedField" ----- - -Note that the `M` type hint does not provide any runtime behavior and -its use is not required, but it can be useful to eliminate spurious type -errors in IDEs or type checking builds. - -The `InstrumentedField` objects returned when fields are accessed as -class attributes are proxies for the field instances that can be used -anywhere a field needs to be referenced, such as when specifying sort -options in a `Search` object: - -[source,python] ----- -# sort by creation date descending, and title ascending -s = MyDocument.search().sort(-MyDocument.created_at, MyDocument.title) ----- - -When specifying sorting order, the `{plus}` and `-` unary operators can -be used on the class field attributes to indicate ascending and -descending order. - -Finally, the `ClassVar` annotation can be used to define a regular class -attribute that should not be mapped to the Elasticsearch index: - -[source,python] ----- -from typing import ClassVar - -class MyDoc(Document): - title: M[str] created_at: M[datetime] = - mapped_field(default_factory=datetime.now) my_var: - ClassVar[str] # regular class variable, ignored by Elasticsearch ----- - -====== Note on dates - -The DSL module will always respect the timezone information (or -lack thereof) on the `datetime` objects passed in or stored in -Elasticsearch. Elasticsearch itself interprets all datetimes with no -timezone information as `UTC`. If you wish to reflect this in your -python code, you can specify `default_timezone` when instantiating a -`Date` field: - -[source,python] ----- -class Post(Document): - created_at = Date(default_timezone='UTC') ----- - -In that case any `datetime` object passed in (or parsed from -elasticsearch) will be treated as if it were in `UTC` timezone. - -[[life-cycle]] -====== Document life cycle - -Before you first use the `Post` document type, you need to create the -mappings in Elasticsearch. For that you can either use the `index` -object or create the mappings directly by calling the `init` class -method: - -[source,python] ----- -# create the mappings in Elasticsearch -Post.init() ----- - -This code will typically be run in the setup for your application during -a code deploy, similar to running database migrations. - -To create a new `Post` document just instantiate the class and pass in -any fields you wish to set, you can then use standard attribute setting -to change/add more fields. Note that you are not limited to the fields -defined explicitly: - -[source,python] ----- -# instantiate the document -first = Post(title='My First Blog Post, yay!', published=True) -# assign some field values, can be values or lists of values -first.category = ['everything', 'nothing'] -# every document has an id in meta -first.meta.id = 47 - - -# save the document into the cluster -first.save() ----- - -All the metadata fields (`id`, `routing`, `index` etc) can be accessed -(and set) via a `meta` attribute or directly using the underscored -variant: - -[source,python] ----- -post = Post(meta={'id': 42}) - -# prints 42 -print(post.meta.id) - -# override default index -post.meta.index = 'my-blog' ----- - -[NOTE] -==== -Having all metadata accessible through `meta` means that this name is -reserved and you shouldn't have a field called `meta` on your document. -If you, however, need it you can still access the data using the get -item (as opposed to attribute) syntax: `post['meta']`. -==== - -To retrieve an existing document use the `get` class method: - -[source,python] ----- -# retrieve the document -first = Post.get(id=42) -# now we can call methods, change fields, ... -first.add_comment('me', 'This is nice!') -# and save the changes into the cluster again -first.save() ----- - -The -https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update.html[Update -API] can also be used via the `update` method. By default any keyword -arguments, beyond the parameters of the API, will be considered fields -with new values. Those fields will be updated on the local copy of the -document and then sent over as partial document to be updated: - -[source,python] ----- -# retrieve the document -first = Post.get(id=42) -# you can update just individual fields which will call the update API -# and also update the document in place -first.update(published=True, published_by='me') ----- - -In case you wish to use a `painless` script to perform the update you -can pass in the script string as `script` or the `id` of a -https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-using.html#script-stored-scripts[stored script] -via `script_id`. All additional keyword arguments to the -`update` method will then be passed in as parameters of the script. The -document will not be updated in place. - -[source,python] ----- -# retrieve the document -first = Post.get(id=42) -# we execute a script in elasticsearch with additional kwargs being passed -# as params into the script -first.update(script='ctx._source.category.add(params.new_category)', - new_category='testing') ----- - -If the document is not found in elasticsearch an exception -(`elasticsearch.NotFoundError`) will be raised. If you wish to return -`None` instead just pass in `ignore=404` to suppress the exception: - -[source,python] ----- -p = Post.get(id='not-in-es', ignore=404) -p is None ----- - -When you wish to retrieve multiple documents at the same time by their -`id` you can use the `mget` method: - -[source,python] ----- -posts = Post.mget([42, 47, 256]) ----- - -`mget` will, by default, raise a `NotFoundError` if any of the documents -wasn't found and `RequestError` if any of the document had resulted in -error. You can control this behavior by setting parameters: - -- `raise_on_error`: - If `True` (default) then any error will cause an exception to be - raised. Otherwise all documents containing errors will be treated as - missing. -- `missing`: - Can have three possible values: `'none'` (default), `'raise'` and - `'skip'`. If a document is missing or errored it will either be - replaced with `None`, an exception will be raised or the document will - be skipped in the output list entirely. - -The index associated with the `Document` is accessible via the -`_index` class property which gives you access to the `index` class. - -The `_index` attribute is also home to the `load_mappings` -method which will update the mapping on the `Index` from elasticsearch. -This is very useful if you use dynamic mappings and want the class to be -aware of those fields (for example if you wish the `Date` fields to be -properly (de)serialized): - -[source,python] ----- -Post._index.load_mappings() ----- - -To delete a document just call its `delete` method: - -[source,python] ----- -first = Post.get(id=42) -first.delete() ----- - -====== Analysis - -To specify `analyzer` values for `Text` fields you can just use the name -of the analyzer (as a string) and either rely on the analyzer being -defined (like built-in analyzers) or define the analyzer yourself -manually. - -Alternatively you can create your own analyzer and have the persistence -layer handle its creation, from our example earlier: - -[source,python] ----- -from elasticsearch.dsl import analyzer, tokenizer - -my_analyzer = analyzer('my_analyzer', - tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), - filter=['lowercase'] -) ----- - -Each analysis object needs to have a name (`my_analyzer` and -`trigram` in our example) and tokenizers, token filters and char filters -also need to specify type (`nGram` in our example). - -Once you have an instance of a custom `analyzer` you can also call the -https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-analyze.html[analyze -API] on it by using the `simulate` method: - -[source,python] ----- -response = my_analyzer.simulate('Hello World!') - -# ['hel', 'ell', 'llo', 'lo ', 'o w', ' wo', 'wor', 'orl', 'rld', 'ld!'] -tokens = [t.token for t in response.tokens] ----- - -[NOTE] -==== -When creating a mapping which relies on a custom analyzer the index must -either not exist or be closed. To create multiple `Document`-defined -mappings you can use the `index` object. -==== - -====== Search - -To search for this document type, use the `search` class method: - -[source,python] ----- -# by calling .search we get back a standard Search object -s = Post.search() -# the search is already limited to the index and doc_type of our document -s = s.filter('term', published=True).query('match', title='first') - - -results = s.execute() - -# when you execute the search the results are wrapped in your document class (Post) -for post in results: - print(post.meta.score, post.title) ----- - -Alternatively you can just take a `Search` object and restrict it to -return our document type, wrapped in correct class: - -[source,python] ----- -s = Search() -s = s.doc_type(Post) ----- - -You can also combine document classes with standard doc types (just -strings), which will be treated as before. You can also pass in multiple -`Document` subclasses and each document in the response will be wrapped -in it's class. - -If you want to run suggestions, just use the `suggest` method on the -`Search` object: - -[source,python] ----- -s = Post.search() -s = s.suggest('title_suggestions', 'pyth', completion={'field': 'title_suggest'}) - -response = s.execute() - -for result in response.suggest.title_suggestions: - print('Suggestions for %s:' % result.text) - for option in result.options: - print(' %s (%r)' % (option.text, option.payload)) ----- - -====== `class Meta` options - -In the `Meta` class inside your document definition you can define -various metadata for your document: - -- `mapping`: - optional instance of `Mapping` class to use as base for the mappings - created from the fields on the document class itself. - -Any attributes on the `Meta` class that are instance of `MetaField` will -be used to control the mapping of the meta fields (`_all`, `dynamic` -etc). Just name the parameter (without the leading underscore) as the -field you wish to map and pass any parameters to the `MetaField` class: - -[source,python] ----- -class Post(Document): - title = Text() - - class Meta: - all = MetaField(enabled=False) - dynamic = MetaField('strict') ----- - -====== `class Index` options - -This section of the `Document` definition can contain any information -about the index, its name, settings and other attributes: - -- `name`: - name of the index to use, if it contains a wildcard (`*`) then it - cannot be used for any write operations and an `index` kwarg will have - to be passed explicitly when calling methods like `.save()`. -- `using`: - default connection alias to use, defaults to `'default'` -- `settings`: - dictionary containing any settings for the `Index` object like - `number_of_shards`. -- `analyzers`: - additional list of analyzers that should be defined on an index (see - `analysis` for details). -- `aliases`: - dictionary with any aliases definitions - -====== Document Inheritance - -You can use standard Python inheritance to extend models, this can be -useful in a few scenarios. For example if you want to have a -`BaseDocument` defining some common fields that several different -`Document` classes should share: - -[source,python] ----- -class User(InnerDoc): - username = Text(fields={'keyword': Keyword()}) - email = Text() - -class BaseDocument(Document): - created_by = Object(User) - created_date = Date() - last_updated = Date() - - def save(**kwargs): - if not self.created_date: - self.created_date = datetime.now() - self.last_updated = datetime.now() - return super(BaseDocument, self).save(**kwargs) - -class BlogPost(BaseDocument): - class Index: - name = 'blog' ----- - -Another use case would be using the -https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html[join -type] to have multiple different entities in a single index. You can see -an -https://github.com/elastic/elasticsearch-py/blob/master/examples/dsl/parent_child.py[example] -of this approach. Note that in this case, if the subclasses don't define -their own [.title-ref]#Index# classes, the mappings are merged and -shared between all the subclasses. - -===== Index - -In typical scenario using `class Index` on a `Document` class is -sufficient to perform any action. In a few cases though it can be useful -to manipulate an `Index` object directly. - -`Index` is a class responsible for holding all the metadata related to -an index in elasticsearch - mappings and settings. It is most useful -when defining your mappings since it allows for easy creation of -multiple mappings at the same time. This is especially useful when -setting up your elasticsearch objects in a migration: - -[source,python] ----- -from elasticsearch.dsl import Index, Document, Text, analyzer - -blogs = Index('blogs') - -# define custom settings -blogs.settings( - number_of_shards=1, - number_of_replicas=0 -) - -# define aliases -blogs.aliases( - old_blogs={} -) - -# register a document with the index -blogs.document(Post) - -# can also be used as class decorator when defining the Document -@blogs.document -class Post(Document): - title = Text() - -# You can attach custom analyzers to the index - -html_strip = analyzer('html_strip', - tokenizer="standard", - filter=["standard", "lowercase", "stop", "snowball"], - char_filter=["html_strip"] -) - -blogs.analyzer(html_strip) - -# delete the index, ignore if it doesn't exist -blogs.delete(ignore=404) - -# create the index in elasticsearch -blogs.create() ----- - -You can also set up a template for your indices and use the `clone` -method to create specific copies: - -[source,python] ----- -blogs = Index('blogs', using='production') -blogs.settings(number_of_shards=2) -blogs.document(Post) - -# create a copy of the index with different name -company_blogs = blogs.clone('company-blogs') - -# create a different copy on different cluster -dev_blogs = blogs.clone('blogs', using='dev') -# and change its settings -dev_blogs.setting(number_of_shards=1) ----- - -[[index-template]] -====== IndexTemplate - -The DSL module also exposes an option to manage -https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html[index -templates] in elasticsearch using the `IndexTemplate` class which has -very similar API to `Index`. - -Once an index template is saved in elasticsearch it's contents will be -automatically applied to new indices (existing indices are completely -unaffected by templates) that match the template pattern (any index -starting with `blogs-` in our example), even if the index is created -automatically upon indexing a document into that index. - -Potential workflow for a set of time based indices governed by a single -template: - -[source,python] ----- -from datetime import datetime - -from elasticsearch.dsl import Document, Date, Text - - -class Log(Document): - content = Text() - timestamp = Date() - - class Index: - name = "logs-*" - settings = { - "number_of_shards": 2 - } - - def save(self, **kwargs): - # assign now if no timestamp given - if not self.timestamp: - self.timestamp = datetime.now() - - # override the index to go to the proper timeslot - kwargs['index'] = self.timestamp.strftime('logs-%Y%m%d') - return super().save(**kwargs) - -# once, as part of application setup, during deploy/migrations: -logs = Log._index.as_template('logs', order=0) -logs.save() - -# to perform search across all logs: -search = Log.search() ----- - diff --git a/docs/guide/dsl/search_dsl.asciidoc b/docs/guide/dsl/search_dsl.asciidoc deleted file mode 100644 index dfc3b256a..000000000 --- a/docs/guide/dsl/search_dsl.asciidoc +++ /dev/null @@ -1,704 +0,0 @@ -[[search_dsl]] -==== Search DSL - -===== The `Search` object - -The `Search` object represents the entire search request: - -* queries -* filters -* aggregations -* k-nearest neighbor searches -* sort -* pagination -* highlighting -* suggestions -* collapsing -* additional parameters -* associated client - -The API is designed to be chainable. With the exception of the -aggregations functionality this means that the `Search` object is -immutable -all changes to the object will result in a shallow copy being -created which contains the changes. This means you can safely pass the -`Search` object to foreign code without fear of it modifying your -objects as long as it sticks to the `Search` object APIs. - -You can pass an instance of the -https://elasticsearch-py.readthedocs.io/[elasticsearch client] when -instantiating the `Search` object: - -[source,python] ----- -from elasticsearch import Elasticsearch -from elasticsearch.dsl import Search - -client = Elasticsearch() - -s = Search(using=client) ----- - -You can also define the client at a later time (for more options see the -`configuration` chapter): - -[source,python] ----- -s = s.using(client) ----- - -[NOTE] -==== -All methods return a _copy_ of the object, making it safe to pass to -outside code. -==== - -The API is chainable, allowing you to combine multiple method calls in -one statement: - -[source,python] ----- -s = Search().using(client).query("match", title="python") ----- - -To send the request to Elasticsearch: - -[source,python] ----- -response = s.execute() ----- - -If you just want to iterate over the hits returned by your search you -can iterate over the `Search` object: - -[source,python] ----- -for hit in s: - print(hit.title) ----- - -Search results will be cached. Subsequent calls to `execute` or trying -to iterate over an already executed `Search` object will not trigger -additional requests being sent to Elasticsearch. To force a request -specify `ignore_cache=True` when calling `execute`. - -For debugging purposes you can serialize the `Search` object to a `dict` -explicitly: - -[source,python] ----- -print(s.to_dict()) ----- - -====== Delete By Query - -You can delete the documents matching a search by calling `delete` on -the `Search` object instead of `execute` like this: - -[source,python] ----- -s = Search(index='i').query("match", title="python") -response = s.delete() ----- - -====== Queries - -The library provides classes for all Elasticsearch query types. Pass all -the parameters as keyword arguments. The classes accept any keyword -arguments, the dsl then takes all arguments passed to the constructor -and serializes them as top-level keys in the resulting dictionary (and -thus the resulting json being sent to elasticsearch). This means that -there is a clear one-to-one mapping between the raw query and its -equivalent in the DSL: - -[source,python] ----- -from elasticsearch.dsl.query import MultiMatch, Match - -# {"multi_match": {"query": "python django", "fields": ["title", "body"]}} -MultiMatch(query='python django', fields=['title', 'body']) - -# {"match": {"title": {"query": "web framework", "type": "phrase"}}} -Match(title={"query": "web framework", "type": "phrase"}) ----- - -[NOTE] -==== -In some cases this approach is not possible due to python's restriction -on identifiers - for example if your field is called `@timestamp`. In -that case you have to fall back to unpacking a dictionary: -`Range(*+ {'@timestamp': {'lt': 'now'}})` -==== - -You can use the `Q` shortcut to construct the instance using a name with -parameters or the raw `dict`: - -[source,python] ----- -from elasticsearch.dsl import Q - -Q("multi_match", query='python django', fields=['title', 'body']) -Q({"multi_match": {"query": "python django", "fields": ["title", "body"]}}) ----- - -To add the query to the `Search` object, use the `.query()` method: - -[source,python] ----- -q = Q("multi_match", query='python django', fields=['title', 'body']) -s = s.query(q) ----- - -The method also accepts all the parameters as the `Q` shortcut: - -[source,python] ----- -s = s.query("multi_match", query='python django', fields=['title', 'body']) ----- - -If you already have a query object, or a `dict` representing one, you -can just override the query used in the `Search` object: - -[source,python] ----- -s.query = Q('bool', must=[Q('match', title='python'), Q('match', body='best')]) ----- - -====== Dotted fields - -Sometimes you want to refer to a field within another field, either as a -multi-field (`title.keyword`) or in a structured `json` document like -`address.city`. To make it easier, the `Q` shortcut (as well as the -`query`, `filter`, and `exclude` methods on `Search` class) allows you -to use `_+` (double underscore) in place of a dot in a keyword -argument: - -[source,python] ----- -s = Search() -s = s.filter('term', category__keyword='Python') -s = s.query('match', address__city='prague') ----- - -Alternatively you can always fall back to python's kwarg unpacking if -you prefer: - -[source,python] ----- -s = Search() -s = s.filter('term', **{'category.keyword': 'Python'}) -s = s.query('match', **{'address.city': 'prague'}) ----- - -====== Query combination - -Query objects can be combined using logical operators: - -[source,python] ----- -Q("match", title='python') | Q("match", title='django') -# {"bool": {"should": [...]}} - -Q("match", title='python') & Q("match", title='django') -# {"bool": {"must": [...]}} - -~Q("match", title="python") -# {"bool": {"must_not": [...]}} ----- - -When you call the `.query()` method multiple times, the `&` operator -will be used internally: - -[source,python] ----- -s = s.query().query() -print(s.to_dict()) -# {"query": {"bool": {...}}} ----- - -If you want to have precise control over the query form, use the `Q` -shortcut to directly construct the combined query: - -[source,python] ----- -q = Q('bool', - must=[Q('match', title='python')], - should=[Q(...), Q(...)], - minimum_should_match=1 -) -s = Search().query(q) ----- - -====== Filters - -If you want to add a query in a -https://www.elastic.co/guide/en/elasticsearch/reference/2.0/query-filter-context.html[filter -context] you can use the `filter()` method to make things easier: - -[source,python] ----- -s = Search() -s = s.filter('terms', tags=['search', 'python']) ----- - -Behind the scenes this will produce a `Bool` query and place the -specified `terms` query into its `filter` branch, making it equivalent -to: - -[source,python] ----- -s = Search() -s = s.query('bool', filter=[Q('terms', tags=['search', 'python'])]) ----- - -If you want to use the post_filter element for faceted navigation, -use the `.post_filter()` method. - -You can also `exclude()` items from your query like this: - -[source,python] ----- -s = Search() -s = s.exclude('terms', tags=['search', 'python']) ----- - -which is shorthand for: -`s = s.query('bool', filter=[~Q('terms', tags=['search', 'python'])])` - -====== Aggregations - -To define an aggregation, you can use the `A` shortcut: - -[source,python] ----- -from elasticsearch.dsl import A - -A('terms', field='tags') -# {"terms": {"field": "tags"}} ----- - -To nest aggregations, you can use the `.bucket()`, `.metric()` and -`.pipeline()` methods: - -[source,python] ----- -a = A('terms', field='category') -# {'terms': {'field': 'category'}} - -a.metric('clicks_per_category', 'sum', field='clicks')\ - .bucket('tags_per_category', 'terms', field='tags') -# { -# 'terms': {'field': 'category'}, -# 'aggs': { -# 'clicks_per_category': {'sum': {'field': 'clicks'}}, -# 'tags_per_category': {'terms': {'field': 'tags'}} -# } -# } ----- - -To add aggregations to the `Search` object, use the `.aggs` property, -which acts as a top-level aggregation: - -[source,python] ----- -s = Search() -a = A('terms', field='category') -s.aggs.bucket('category_terms', a) -# { -# 'aggs': { -# 'category_terms': { -# 'terms': { -# 'field': 'category' -# } -# } -# } -# } ----- - -or - -[source,python] ----- -s = Search() -s.aggs.bucket('articles_per_day', 'date_histogram', field='publish_date', interval='day')\ - .metric('clicks_per_day', 'sum', field='clicks')\ - .pipeline('moving_click_average', 'moving_avg', buckets_path='clicks_per_day')\ - .bucket('tags_per_day', 'terms', field='tags') - -s.to_dict() -# { -# "aggs": { -# "articles_per_day": { -# "date_histogram": { "interval": "day", "field": "publish_date" }, -# "aggs": { -# "clicks_per_day": { "sum": { "field": "clicks" } }, -# "moving_click_average": { "moving_avg": { "buckets_path": "clicks_per_day" } }, -# "tags_per_day": { "terms": { "field": "tags" } } -# } -# } -# } -# } ----- - -You can access an existing bucket by its name: - -[source,python] ----- -s = Search() - -s.aggs.bucket('per_category', 'terms', field='category') -s.aggs['per_category'].metric('clicks_per_category', 'sum', field='clicks') -s.aggs['per_category'].bucket('tags_per_category', 'terms', field='tags') ----- - -[NOTE] -==== -When chaining multiple aggregations, there is a difference between what -`.bucket()` and `.metric()` methods return - `.bucket()` returns the -newly defined bucket while `.metric()` returns its parent bucket to -allow further chaining. -==== - -As opposed to other methods on the `Search` objects, defining -aggregations is done in-place (does not return a copy). - -====== K-Nearest Neighbor Searches - -To issue a kNN search, use the `.knn()` method: - -[source,python] ----- -s = Search() -vector = get_embedding("search text") - -s = s.knn( - field="embedding", - k=5, - num_candidates=10, - query_vector=vector -) ----- - -The `field`, `k` and `num_candidates` arguments can be given as -positional or keyword arguments and are required. In addition to these, -`query_vector` or `query_vector_builder` must be given as -well. - -The `.knn()` method can be invoked multiple times to include multiple -kNN searches in the request. - -====== Sorting - -To specify sorting order, use the `.sort()` method: - -[source,python] ----- -s = Search().sort( - 'category', - '-title', - {"lines" : {"order" : "asc", "mode" : "avg"}} -) ----- - -It accepts positional arguments which can be either strings or -dictionaries. String value is a field name, optionally prefixed by the -`-` sign to specify a descending order. - -To reset the sorting, just call the method with no arguments: - -[source,python] ----- -s = s.sort() ----- - -====== Pagination - -To specify the from/size parameters, use the Python slicing API: - -[source,python] ----- -s = s[10:20] -# {"from": 10, "size": 10} - -s = s[:20] -# {"size": 20} - -s = s[10:] -# {"from": 10} - -s = s[10:20][2:] -# {"from": 12, "size": 8} ----- - -If you want to access all the documents matched by your query you can -use the `scan` method which uses the scan/scroll elasticsearch API: - -[source,python] ----- -for hit in s.scan(): - print(hit.title) ----- - -Note that in this case the results won't be sorted. - -====== Highlighting - -To set common attributes for highlighting use the -`highlight_options` method: - -[source,python] ----- -s = s.highlight_options(order='score') ----- - -Enabling highlighting for individual fields is done using the -`highlight` method: - -[source,python] ----- -s = s.highlight('title') -# or, including parameters: -s = s.highlight('title', fragment_size=50) ----- - -The fragments in the response will then be available on each `Result` -object as `.meta.highlight.FIELD` which will contain the list of -fragments: - -[source,python] ----- -response = s.execute() -for hit in response: - for fragment in hit.meta.highlight.title: - print(fragment) ----- - -====== Suggestions - -To specify a suggest request on your `Search` object use the `suggest` -method: - -[source,python] ----- -# check for correct spelling -s = s.suggest('my_suggestion', 'pyhton', term={'field': 'title'}) ----- - -The first argument is the name of the suggestions (name under which it -will be returned), second is the actual text you wish the suggester to -work on and the keyword arguments will be added to the suggest's json -as-is which means that it should be one of `term`, `phrase` or -`completion` to indicate which type of suggester should be used. - -====== Collapsing - -To collapse search results use the `collapse` method on your `Search` -object: - -[source,python] ----- -s = Search().query("match", message="GET /search") -# collapse results by user_id -s = s.collapse("user_id") ----- - -The top hits will only include one result per `user_id`. You can -also expand each collapsed top hit with the `inner_hits` parameter, -`max_concurrent_group_searches` being the number of -concurrent requests allowed to retrieve the inner hits per group: - -[source,python] ----- -inner_hits = {"name": "recent_search", "size": 5, "sort": [{"@timestamp": "desc"}]} -s = s.collapse("user_id", inner_hits=inner_hits, max_concurrent_group_searches=4) ----- - -====== More Like This Query - -To use Elasticsearch's `more_like_this` functionality, you can use -the MoreLikeThis query type. - -A simple example is below - -[source,python] ----- -from elasticsearch.dsl.query import MoreLikeThis -from elasticsearch.dsl import Search - -my_text = 'I want to find something similar' - -s = Search() -# We're going to match based only on two fields, in this case text and title -s = s.query(MoreLikeThis(like=my_text, fields=['text', 'title'])) -# You can also exclude fields from the result to make the response quicker in the normal way -s = s.source(exclude=["text"]) -response = s.execute() - -for hit in response: - print(hit.title) ----- - -====== Extra properties and parameters - -To set extra properties of the search request, use the `.extra()` -method. This can be used to define keys in the body that cannot be -defined via a specific API method like `explain` or `search_after`: - -[source,python] ----- -s = s.extra(explain=True) ----- - -To set query parameters, use the `.params()` method: - -[source,python] ----- -s = s.params(routing="42") ----- - -If you need to limit the fields being returned by elasticsearch, use the -`source()` method: - -[source,python] ----- -# only return the selected fields -s = s.source(['title', 'body']) -# don't return any fields, just the metadata -s = s.source(False) -# explicitly include/exclude fields -s = s.source(includes=["title"], excludes=["user.*"]) -# reset the field selection -s = s.source(None) ----- - -====== Serialization and Deserialization - -The search object can be serialized into a dictionary by using the -`.to_dict()` method. - -You can also create a `Search` object from a `dict` using the -`from_dict` class method. This will create a new `Search` object and -populate it using the data from the dict: - -[source,python] ----- -s = Search.from_dict({"query": {"match": {"title": "python"}}}) ----- - -If you wish to modify an existing `Search` object, overriding it's -properties, instead use the `update_from_dict` method that -alters an instance *in-place*: - -[source,python] ----- -s = Search(index='i') -s.update_from_dict({"query": {"match": {"title": "python"}}, "size": 42}) ----- - -===== Response - -You can execute your search by calling the `.execute()` method that will -return a `Response` object. The `Response` object allows you access to -any key from the response dictionary via attribute access. It also -provides some convenient helpers: - -[source,python] ----- -response = s.execute() - -print(response.success()) -# True - -print(response.took) -# 12 - -print(response.hits.total.relation) -# eq -print(response.hits.total.value) -# 142 - -print(response.suggest.my_suggestions) ----- - -If you want to inspect the contents of the `response` objects, just use -its `to_dict` method to get access to the raw data for pretty -printing. - -====== Hits - -To access to the hits returned by the search, access the `hits` property -or just iterate over the `Response` object: - -[source,python] ----- -response = s.execute() -print('Total %d hits found.' % response.hits.total) -for h in response: - print(h.title, h.body) ----- - -[NOTE] -==== -If you are only seeing partial results (e.g. 10000 or even 10 results), -consider using the option `s.extra(track_total_hits=True)` to -get a full hit count. -==== - -====== Result - -The individual hits is wrapped in a convenience class that allows -attribute access to the keys in the returned dictionary. All the -metadata for the results are accessible via `meta` (without the leading -`_`): - -[source,python] ----- -response = s.execute() -h = response.hits[0] -print('/%s/%s/%s returned with score %f' % ( - h.meta.index, h.meta.doc_type, h.meta.id, h.meta.score)) ----- - -[NOTE] -==== -If your document has a field called `meta` you have to access it using -the get item syntax: `hit['meta']`. -==== - -====== Aggregations - -Aggregations are available through the `aggregations` property: - -[source,python] ----- -for tag in response.aggregations.per_tag.buckets: - print(tag.key, tag.max_lines.value) ----- - -===== `MultiSearch` - -If you need to execute multiple searches at the same time you can use -the `MultiSearch` class which will use the `_msearch` API: - -[source,python] ----- -from elasticsearch.dsl import MultiSearch, Search - -ms = MultiSearch(index='blogs') - -ms = ms.add(Search().filter('term', tags='python')) -ms = ms.add(Search().filter('term', tags='elasticsearch')) - -responses = ms.execute() - -for response in responses: - print("Results for query %r." % response._search.query) - for hit in response: - print(hit.title) ----- - -===== `EmptySearch` - -The `EmptySearch` class can be used as a fully compatible version of -`Search` that will return no results, regardless of any queries -configured. - diff --git a/docs/guide/dsl/update_by_query.asciidoc b/docs/guide/dsl/update_by_query.asciidoc deleted file mode 100644 index b4a550a77..000000000 --- a/docs/guide/dsl/update_by_query.asciidoc +++ /dev/null @@ -1,168 +0,0 @@ -[[update_by_query]] -==== Update By Query - -===== The `Update By Query` object - -The `Update By Query` object enables the use of the -https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html[_update_by_query] -endpoint to perform an update on documents that match a search query. - -The object is implemented as a modification of the `Search` object, -containing a subset of its query methods, as well as a script method, -which is used to make updates. - -The `Update By Query` object implements the following `Search` query -types: - -* queries -* filters -* excludes - -For more information on queries, see the `search_dsl` chapter. - -Like the `Search` object, the API is designed to be chainable. This -means that the `Update By Query` object is immutable: all changes to the -object will result in a shallow copy being created which contains the -changes. This means you can safely pass the `Update By Query` object to -foreign code without fear of it modifying your objects as long as it -sticks to the `Update By Query` object APIs. - -You can define your client in a number of ways, but the preferred method -is to use a global configuration. For more information on defining a -client, see the `configuration` chapter. - -Once your client is defined, you can instantiate a copy of the -`Update By Query` object as seen below: - -[source,python] ----- -from elasticsearch.dsl import UpdateByQuery - -ubq = UpdateByQuery().using(client) -# or -ubq = UpdateByQuery(using=client) ----- - -[NOTE] -==== -All methods return a _copy_ of the object, making it safe to pass to -outside code. -==== - -The API is chainable, allowing you to combine multiple method calls in -one statement: - -[source,python] ----- -ubq = UpdateByQuery().using(client).query("match", title="python") ----- - -To send the request to Elasticsearch: - -[source,python] ----- -response = ubq.execute() ----- - -It should be noted, that there are limits to the chaining using the -script method: calling script multiple times will overwrite the previous -value. That is, only a single script can be sent with a call. An attempt -to use two scripts will result in only the second script being stored. - -Given the below example: - -[source,python] ----- -ubq = UpdateByQuery() \ - .using(client) \ - .script(source="ctx._source.likes++") \ - .script(source="ctx._source.likes+=2") ----- - -This means that the stored script by this client will be -`'source': 'ctx._source.likes{plus}=2'` and the previous call will -not be stored. - -For debugging purposes you can serialize the `Update By Query` object to -a `dict` explicitly: - -[source,python] ----- -print(ubq.to_dict()) ----- - -Also, to use variables in script see below example: - -[source,python] ----- -ubq.script( - source="ctx._source.messages.removeIf(x -> x.somefield == params.some_var)", - params={ - 'some_var': 'some_string_val' - } -) ----- - -====== Serialization and Deserialization - -The search object can be serialized into a dictionary by using the -`.to_dict()` method. - -You can also create a `Update By Query` object from a `dict` using the -`from_dict` class method. This will create a new `Update By Query` -object and populate it using the data from the dict: - -[source,python] ----- -ubq = UpdateByQuery.from_dict({"query": {"match": {"title": "python"}}}) ----- - -If you wish to modify an existing `Update By Query` object, overriding -it's properties, instead use the `update_from_dict` method that -alters an instance *in-place*: - -[source,python] ----- -ubq = UpdateByQuery(index='i') -ubq.update_from_dict({"query": {"match": {"title": "python"}}, "size": 42}) ----- - -====== Extra properties and parameters - -To set extra properties of the search request, use the `.extra()` -method. This can be used to define keys in the body that cannot be -defined via a specific API method like `explain`: - -[source,python] ----- -ubq = ubq.extra(explain=True) ----- - -To set query parameters, use the `.params()` method: - -[source,python] ----- -ubq = ubq.params(routing="42") ----- - -===== Response - -You can execute your search by calling the `.execute()` method that will -return a `Response` object. The `Response` object allows you access to -any key from the response dictionary via attribute access. It also -provides some convenient helpers: - -[source,python] ----- -response = ubq.execute() - -print(response.success()) -# True - -print(response.took) -# 12 ----- - -If you want to inspect the contents of the `response` objects, just use -its `to_dict` method to get access to the raw data for pretty -printing. diff --git a/docs/guide/examples.asciidoc b/docs/guide/examples.asciidoc deleted file mode 100644 index 575f43bbe..000000000 --- a/docs/guide/examples.asciidoc +++ /dev/null @@ -1,287 +0,0 @@ -[[examples]] -== Examples - -Below you can find examples of how to use the most frequently called APIs with -the Python client. - -* <> -* <> -* <> -* <> -* <> -* <> - -[discrete] -[[ex-index]] -=== Indexing a document - -To index a document, you need to specify three pieces of information: `index`, -`id`, and a `document`: - -[source,py] ----------------------------- -from datetime import datetime -from elasticsearch import Elasticsearch -client = Elasticsearch('https://localhost:9200') - -doc = { - 'author': 'author_name', - 'text': 'Interesting content...', - 'timestamp': datetime.now(), -} -resp = client.index(index="test-index", id=1, document=doc) -print(resp['result']) ----------------------------- - - -[discrete] -[[ex-get]] -=== Getting a document - -To get a document, you need to specify its `index` and `id`: - -[source,py] ----------------------------- -resp = client.get(index="test-index", id=1) -print(resp['_source']) ----------------------------- - - -[discrete] -[[ex-refresh]] -=== Refreshing an index - -You can perform the refresh operation on an index: - -[source,py] ----------------------------- -client.indices.refresh(index="test-index") ----------------------------- - - -[discrete] -[[ex-search]] -=== Searching for a document - -The `search()` method returns results that are matching a query: - -[source,py] ----------------------------- -resp = client.search(index="test-index", query={"match_all": {}}) -print("Got %d Hits:" % resp['hits']['total']['value']) -for hit in resp['hits']['hits']: - print("%(timestamp)s %(author)s: %(text)s" % hit["_source"]) ----------------------------- - - -[discrete] -[[ex-update]] -=== Updating a document - -To update a document, you need to specify three pieces of information: `index`, -`id`, and a `doc`: - -[source,py] ----------------------------- -from datetime import datetime -from elasticsearch import Elasticsearch - -client = Elasticsearch('https://localhost:9200') - -doc = { - 'author': 'author_name', - 'text': 'Interesting modified content...', - 'timestamp': datetime.now(), -} -resp = client.update(index="test-index", id=1, doc=doc) -print(resp['result']) ----------------------------- - - -[discrete] -[[ex-delete]] -=== Deleting a document - -You can delete a document by specifying its `index`, and `id` in the `delete()` -method: - -[source,py] ----------------------------- -client.delete(index="test-index", id=1) ----------------------------- - -[discrete] -[[ex-interactive]] -=== Interactive examples - -The https://github.com/elastic/elasticsearch-labs[elasticsearch-labs] -repo contains interactive and executable -https://github.com/elastic/elasticsearch-labs/tree/main/notebooks[Python -notebooks], sample apps, and resources for testing out Elasticsearch, -using the Python client. These examples are mainly focused on vector -search, hybrid search and generative AI use cases, but you'll also find -examples of basic operations like creating index mappings and performing -lexical search. - -[discrete] -==== Search notebooks - -The -https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/search[Search] -folder is a good place to start if you're new to Elasticsearch. This -folder contains a number of notebooks that demonstrate the fundamentals -of Elasticsearch, like indexing vectors, running lexical, semantic and -_hybrid_ searches, and more. - -The following notebooks are available: - -[arabic, start=0] -* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/00-quick-start.ipynb[Quick -start] -* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/01-keyword-querying-filtering.ipynb[Keyword, -querying, filtering] -* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/02-hybrid-search.ipynb[Hybrid -search] -* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/03-ELSER.ipynb[Semantic -search with ELSER] -* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/04-multilingual.ipynb[Multilingual -semantic search] -* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/05-query-rules.ipynb[Query -rules] -* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/06-synonyms-api.ipynb[Synonyms -API quick start] - -Here's a brief overview of what you'll learn in each notebook. - -[discrete] -===== Quick start - -In the -https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/00-quick-start.ipynb[00-quick-start.ipynb] -notebook you'll learn how to: - -* Use the Elasticsearch Python client for various operations. -* Create and define an index for a sample dataset with -`dense_vector` fields. -* Transform book titles into embeddings using -https://www.sbert.net[Sentence Transformers] and index them into -Elasticsearch. -* Perform k-nearest neighbors (knn) semantic searches. -* Integrate traditional text-based search with semantic search, for a -hybrid search system. -* Use reciprocal rank fusion (RRF) to intelligently combine search -results from different retrieval systems. - -[discrete] -===== Keyword, querying, filtering - -In the -https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/01-keyword-querying-filtering.ipynb[01-keyword-querying-filtering.ipynb] -notebook, you'll learn how to: - -* Use -https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html[query -and filter contexts] to search and filter documents in Elasticsearch. -* Execute full-text searches with `match` and `multi-match` queries. -* Query and filter documents based on `text`, `number`, `date`, or -`boolean` values. -* Run multi-field searches using the `multi-match` query. -* Prioritize specific fields in the `multi-match` query for tailored -results. - -[discrete] -===== Hybrid search - -In the -https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/02-hybrid-search.ipynb[02-hybrid-search.ipynb] -notebook, you'll learn how to: - -* Combine results of traditional text-based search with semantic search, -for a hybrid search system. -* Transform fields in the sample dataset into embeddings using the -Sentence Transformer model and index them into Elasticsearch. -* Use the -https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html#rrf-api[RRF -API] to combine the results of a `match` query and a `kNN` semantic -search. -* Walk through a super simple toy example that demonstrates, step by -step, how RRF ranking works. - -[discrete] -===== Semantic search with ELSER - -In the -https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/03-ELSER.ipynb[03-ELSER.ipynb] -notebook, you'll learn how to: - -* Use the Elastic Learned Sparse Encoder (ELSER) for text -expansion-powered semantic search, out of the box — without training, -fine-tuning, or embeddings generation. -* Download and deploy the ELSER model in your Elastic environment. -* Create an Elasticsearch index named [.title-ref]#search-movies# with -specific mappings and index a dataset of movie descriptions. -* Create an ingest pipeline containing an inference processor for ELSER -model execution. -* Reindex the data from [.title-ref]#search-movies# into another index, -[.title-ref]#elser-movies#, using the ELSER pipeline for text expansion. -* Observe the results of running the documents through the model by -inspecting the additional terms it adds to documents, which enhance -searchability. -* Perform simple keyword searches on the [.title-ref]#elser-movies# -index to assess the impact of ELSER's text expansion. -* Execute ELSER-powered semantic searches using the `text_expansion` -query. - -[discrete] -===== Multilingual semantic search - -In the -https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/04-multilingual.ipynb[04-multilingual.ipynb] -notebook, you'll learn how to: - -* Use a multilingual embedding model for semantic search across -languages. -* Transform fields in the sample dataset into embeddings using the -Sentence Transformer model and index them into Elasticsearch. -* Use filtering with a `kNN` semantic search. -* Walk through a super simple toy example that demonstrates, step by -step, how multilingual search works across languages, and within -non-English languages. - -[discrete] -===== Query rules - -In the -https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/05-query-rules.ipynb[05-query-rules.ipynb] -notebook, you'll learn how to: - -* Use the query rules management APIs to create and edit promotional -rules based on contextual queries. -* Apply these query rules by using the `rule_query` in Query DSL. - -[discrete] -===== Synonyms API quick start - -In the -https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/06-synonyms-api.ipynb[06-synonyms-api.ipynb] -notebook, you'll learn how to: - -* Use the synonyms management API to create a synonyms set to enhance -your search recall. -* Configure an index to use search-time synonyms. -* Update synonyms in real time. -* Run queries that are enhanced by synonyms. - -[discrete] -==== Other notebooks - -* https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/generative-ai[Generative -AI]. Notebooks that demonstrate various use cases for Elasticsearch as -the retrieval engine and vector store for LLM-powered applications. -* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations[Integrations]. -Notebooks that demonstrate how to integrate popular services and -projects with Elasticsearch, including OpenAI, Hugging Face, and -LlamaIndex -* https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/langchain[Langchain]. -Notebooks that demonstrate how to integrate Elastic with LangChain, a -framework for developing applications powered by language models. diff --git a/docs/guide/helpers.asciidoc b/docs/guide/helpers.asciidoc deleted file mode 100644 index 8157904c4..000000000 --- a/docs/guide/helpers.asciidoc +++ /dev/null @@ -1,90 +0,0 @@ -[[client-helpers]] -== Client helpers - -You can find here a collection of simple helper functions that abstract some -specifics of the raw API. For detailed examples, refer to -https://elasticsearch-py.readthedocs.io/en/stable/helpers.html[this page]. - - -[discrete] -[[bulk-helpers]] -=== Bulk helpers - -There are several helpers for the bulk API since its requirement for specific -formatting and other considerations can make it cumbersome if used directly. - -All bulk helpers accept an instance of `{es}` class and an iterable `action` -(any iterable, can also be a generator, which is ideal in most cases since it -allows you to index large datasets without the need of loading them into -memory). - -The items in the iterable `action` should be the documents we wish to index in -several formats. The most common one is the same as returned by `search()`, for -example: - -[source,yml] ----------------------------- -{ - '_index': 'index-name', - '_id': 42, - '_routing': 5, - 'pipeline': 'my-ingest-pipeline', - '_source': { - "title": "Hello World!", - "body": "..." - } -} ----------------------------- - -Alternatively, if `_source` is not present, it pops all metadata fields from -the doc and use the rest as the document data: - -[source,yml] ----------------------------- -{ - "_id": 42, - "_routing": 5, - "title": "Hello World!", - "body": "..." -} ----------------------------- - -The `bulk()` api accepts `index`, `create`, `delete`, and `update` actions. Use -the `_op_type` field to specify an action (`_op_type` defaults to `index`): - -[source,yml] ----------------------------- -{ - '_op_type': 'delete', - '_index': 'index-name', - '_id': 42, -} -{ - '_op_type': 'update', - '_index': 'index-name', - '_id': 42, - 'doc': {'question': 'The life, universe and everything.'} -} ----------------------------- - - -[discrete] -[[scan]] -=== Scan - -Simple abstraction on top of the `scroll()` API - a simple iterator that yields -all hits as returned by underlining scroll requests. - -By default scan does not return results in any pre-determined order. To have a -standard order in the returned documents (either by score or explicit sort -definition) when scrolling, use `preserve_order=True`. This may be an expensive -operation and will negate the performance benefits of using `scan`. - - -[source,py] ----------------------------- -scan(es, - query={"query": {"match": {"title": "python"}}}, - index="orders-*" -) ----------------------------- \ No newline at end of file diff --git a/docs/guide/index.asciidoc b/docs/guide/index.asciidoc deleted file mode 100644 index 687710626..000000000 --- a/docs/guide/index.asciidoc +++ /dev/null @@ -1,31 +0,0 @@ -= Elasticsearch Python Client - -:doctype: book - -include::{asciidoc-dir}/../../shared/versions/stack/{source_branch}.asciidoc[] - -include::{asciidoc-dir}/../../shared/attributes.asciidoc[] - -include::overview.asciidoc[] - -include::getting-started.asciidoc[] - -include::installation.asciidoc[] - -include::connecting.asciidoc[] - -include::configuration.asciidoc[] - -include::async.asciidoc[] - -include::migration.asciidoc[] - -include::integrations.asciidoc[] - -include::examples.asciidoc[] - -include::elasticsearch-dsl.asciidoc[] - -include::helpers.asciidoc[] - -include::release-notes.asciidoc[] diff --git a/docs/guide/installation.asciidoc b/docs/guide/installation.asciidoc deleted file mode 100644 index f96265d1d..000000000 --- a/docs/guide/installation.asciidoc +++ /dev/null @@ -1,25 +0,0 @@ -[[installation]] -== Installation - -**https://www.elastic.co/downloads/elasticsearch[Download the latest version of Elasticsearch]** -or -**https://cloud.elastic.co/registration?elektra=en-ess-sign-up-page[sign-up]** -**for a free trial of Elastic Cloud**. - -The Python client for {es} can be installed with pip: - -[source,sh] -------------------------------------- -$ python -m pip install elasticsearch -------------------------------------- - -If your application uses async/await in Python you can install with the `async` -extra: - -[source,sh] --------------------------------------------- -$ python -m pip install elasticsearch[async] --------------------------------------------- - -Read more about -https://elasticsearch-py.readthedocs.io/en/master/async.html[how to use asyncio with this project]. diff --git a/docs/guide/integrations.asciidoc b/docs/guide/integrations.asciidoc deleted file mode 100644 index d82e67498..000000000 --- a/docs/guide/integrations.asciidoc +++ /dev/null @@ -1,75 +0,0 @@ -[[integrations]] -== Integrations - -You can find integration options and information on this page. - - -[discrete] -[[opentelemetry-intro]] -=== OpenTelemetry instrumentation - -The Python Elasticsearch client supports native OpenTelemetry instrumentation following the https://opentelemetry.io/docs/specs/semconv/database/elasticsearch/[OpenTelemetry Semantic Conventions for Elasticsearch]. -Refer to the <> page for details. - - -[discrete] -[[esql-intro]] -=== ES|QL - -{ref}/esql.html[ES|QL] is available through the Python Elasticsearch client. -Refer to the <> page to learn more about using ES|QL and Pandas together with dataframes. - - -[discrete] -[[transport]] -=== Transport - -The handling of connections, retries, and pooling is handled by the https://github.com/elastic/elastic-transport-python[Elastic Transport Python] library. -Documentation on the low-level classes is available on https://elastic-transport-python.readthedocs.io[Read the Docs]. - - -[discrete] -[[opaque-id]] -=== Tracking requests with Opaque ID - -You can enrich your requests against Elasticsearch with an identifier string, that allows you to discover this identifier in https://www.elastic.co/guide/en/elasticsearch/reference/current/logging.html#deprecation-logging[deprecation logs], to support you with https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-slowlog.html#_identifying_search_slow_log_origin[identifying search slow log origin] -or to help with https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html#_identifying_running_tasks[identifying running tasks]. - -The opaque ID can be set via the `opaque_id` parameter via the client `.options()` method: - -[source,python] ------------------------------------- -client = Elasticsearch(...) -client.options(opaque_id="request-id-...").search(...) ------------------------------------- - - -[discrete] -[[type-hints]] -=== Type Hints - -Starting in `elasticsearch-py` v7.10.0 the library now ships with https://www.python.org/dev/peps/pep-0484[type hints] and supports basic static type analysis with tools like http://mypy-lang.org[Mypy] and https://github.com/microsoft/pyright[Pyright]. - -If we write a script that has a type error like using `request_timeout` with a `str` argument instead of `float` and then run Mypy on the script: - -[source,python] ------------------------------------- -# script.py -from elasticsearch import Elasticsearch - -client = Elasticsearch(...) -client.options( - request_timeout="5" # type error! -).search(...) - -# $ mypy script.py -# script.py:5: error: Argument "request_timeout" to "search" of "Elasticsearch" has -# incompatible type "str"; expected "Union[int, float, None]" -# Found 1 error in 1 file (checked 1 source file) ------------------------------------- - -Type hints also allow tools like your IDE to check types and provide better auto-complete functionality. - - -include::open-telemetry.asciidoc[] -include::esql-pandas.asciidoc[] \ No newline at end of file diff --git a/docs/guide/migration.asciidoc b/docs/guide/migration.asciidoc deleted file mode 100644 index 1230399d7..000000000 --- a/docs/guide/migration.asciidoc +++ /dev/null @@ -1,377 +0,0 @@ -[[migration]] -== Migrating to 8.0 - -The client has major changes that require changes to how you use the client. -Below outlines all the changes you'll have to take into account when upgrading -from 7.x to 8.0. - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -[discrete] -[[migration-compat-mode]] -=== Enable compatibility mode and upgrade Elasticsearch - -Upgrade your Elasticsearch client to 7.16: - -[source,bash] ------------------------------------- -$ python -m pip install --upgrade 'elasticsearch>=7.16,<8' ------------------------------------- - -If you have an existing application enable the compatibility mode -by setting `ELASTIC_CLIENT_APIVERSIONING=1` environment variable. -This will instruct the Elasticsearch server to accept and respond -with 7.x-compatibile requests and responses. - -After you've done this you can https://www.elastic.co/guide/en/elasticsearch/reference/current/setup-upgrade.html[upgrade the Elasticsearch server to 8.0.0]. - -[discrete] -[[migration-upgrade-client]] -=== Upgrading the client - -After you've deployed your application with the 7.16 client and -using an 8.0.0 Elasticsearch server you can upgrade your client to -be 8.0. - -[source,bash] ------------------------------------- -$ python -m pip install --upgrade 'elasticsearch>=8,<9' ------------------------------------- - -[discrete] -[[migration-remove-deprecations]] -=== Removing deprecation warnings - -You'll likely notice after upgrading the client to 8.0 your code is -either raising errors or `DeprecationWarning` to signal where you need -to change your code before using the 8.0 client. - - -[discrete] -[[migration-strict-client-config]] -==== Strict client configuration - -Previously the client would use `scheme="http"`, `host="localhost"`, and `port=9200` defaults -when specifying which node(s) to connect to. Starting in 8.0 these defaults have been removed -and instead require explicit configuration of scheme, host, and port or to be configured -using `cloud_id` to avoid confusion about which Elasticsearch instance is being connected to. - -This choice was made because starting 8.0.0 Elasticsearch enables HTTPS is by default, so it's no -longer a good assumption that `http://localhost:9200` is the locally running cluster. - -See documentation on <> and <>. - -For quick examples, using a configuration like one of the two below works best: - -[source,python] ------------------------------------- -from elasticsearch import Elasticsearch - -# If you're connecting to an instance on Elastic Cloud: -client = Elasticsearch( - cloud_id="cluster-1:dXMa5Fx...", - - # Include your authentication like 'api_key' - # 'basic_auth', or 'bearer_auth' here. - basic_auth=("elastic", "") -) - -# If you're connecting to an instance hosted elsewhere: -client = Elasticsearch( - # Notice that the scheme (https://) host (localhost), - # and port (9200) are explicit here: - "http://localhost:9200", - - # Include your authentication like 'api_key' - # 'basic_auth', or 'bearer_auth' here: - api_key="api_key" -) ------------------------------------- - -[discrete] -[[migration-keyword-only-args]] -==== Keyword-only arguments for APIs - -APIs used to support both positional and keyword arguments, however -using **keyword-only arguments was always recommended** in the documentation. -Starting in 7.14 using positional arguments would raise a `DeprecationWarning` but would still work. - -Now starting in 8.0 keyword-only arguments are now required for APIs for better forwards-compatibility -with new API options. When attempting to use positional arguments a `TypeError` will be raised. - -[source,python] ------------------------------------- -# 8.0+ SUPPORTED USAGE: -client.indices.get(index="*") - -# 7.x UNSUPPORTED USAGE (Don't do this!): -client.indices.get("*") ------------------------------------- - -[discrete] -[[migration-options]] -==== Start using .options() for transport parameters - -Previously some per-request options like `api_key` and `ignore` were allowed within -client API methods. Starting in 8.0 this is deprecated for all APIs and for a small -number of APIs may break in unexpected ways if not changed. - -The parameters `headers`, `api_key`, `http_auth`, `opaque_id`, `request_timeout`, and `ignore` -are effected: - -[source,python] ------------------------------------- -from elasticsearch import Elasticsearch - -client = Elasticsearch("http://localhost:9200") - -# 8.0+ SUPPORTED USAGE: -client.options(api_key="api_key").search(index="blogs") - -# 7.x DEPRECATED USAGE (Don't do this!): -client.search(index="blogs", api_key=("id", "api_key")) ------------------------------------- - -Some of these parameters have been renamed to be more readable and to fit other APIs. -`ignore` should be `ignore_status` and `http_auth` should be `basic_auth`: - -[source,python] ------------------------------------- -# 8.0+ SUPPORTED USAGES: -client.options(basic_auth=("username", "password")).search(...) -client.options(ignore_status=404).indices.delete(index=...) - -# 7.x DEPRECATED USAGES (Don't do this!): -client.search(http_auth=("username", "password"), ...) -client.indices.delete(index=..., ignore=404) ------------------------------------- - -APIs where this change is breaking and doesn't have a deprecation period due to conflicts -between the client API and Elasticsearch's API: - -- `sql.query` using `request_timeout` -- `security.grant_api_key` using `api_key` -- `render_search_template` using `params` -- `search_template` using `params` - -You should immediately evaluate the usage of these parameters and start using `.options(...)` -to avoid unexpected behavior. Below is an example of migrating away from using per-request `api_key` -with the `security.grant_api_key` API: - -[source,python] ------------------------------------- -# 8.0+ SUPPORTED USAGE: -resp = ( - client.options( - # This is the API key being used for the request - api_key="request-api-key" - ).security.grant_api_key( - # This is the API key being granted - api_key={ - "name": "granted-api-key" - }, - grant_type="password", - username="elastic", - password="changeme" - ) -) - -# 7.x DEPRECATED USAGE (Don't do this!): -resp = ( - # This is the API key being used for the request - client.security.grant_api_key( - api_key=("request-id", "request-api-key"), - body={ - # This is the API key being granted - "api_key": { - "name": "granted-api-key" - }, - "grant_type": "password", - "username": "elastic", - "password": "changeme" - } - ) -) ------------------------------------- - -Starting with the 8.12 client, using a body parameter is fully supported again, meaning you can also use `grant_api_key` like this: - -[source,python] ------------------------------------- -# 8.12+ SUPPORTED USAGE: -resp = ( - client.options( - # This is the API key being used for the request - api_key="request-api-key" - ).security.grant_api_key( - body={ - # This is the API key being granted - "api_key": { - "name": "granted-api-key" - }, - "grant_type": "password", - "username": "elastic", - "password": "changeme" - } - ) -) ------------------------------------- - -[discrete] -[[migration-response-types]] -==== Changes to API responses - -In 7.x and earlier the return type for API methods were the raw deserialized response body. -This meant that there was no way to access HTTP status codes, headers, or other information -from the transport layer. - -In 8.0.0 responses are no longer the raw deserialized response body and instead an object -with two properties, `meta` and `body`. Transport layer metadata about the response -like HTTP status, headers, version, and which node serviced the request are available here: - -[source,python] ------------------------------------- ->>> resp = client.search(...) - -# Response is not longer a 'dict' ->>> resp -ObjectApiResponse({'took': 1, 'timed_out': False, ...}) - -# But can still be used like one: ->>> resp["hits"]["total"] -{'value': 5500, 'relation': 'eq'} - ->>> resp.keys() -dict_keys(['took', 'timed_out', '_shards', 'hits']) - -# HTTP status ->>> resp.meta.status -200 - -# HTTP headers ->>> resp.meta.headers['content-type'] -'application/json' - -# HTTP version ->>> resp.meta.http_version -'1.1' ------------------------------------- - -Because the response is no longer a dictionary, list, `str`, or `bytes` instance -calling `isintance()` on the response object will return `False`. If you need -direct access to the underlying deserialized response body you can use the `body` -property: - -[source,python] ------------------------------------- ->>> resp.body -{'took': 1, 'timed_out': False, ...} - -# The response isn't a dict, but resp.body is. ->>> isinstance(resp, dict) -False - ->>> isinstance(resp.body, dict) -True ------------------------------------- - -Requests that used the `HEAD` HTTP method can still be used within `if` conditions but won't work with `is`. - -[source,python] ------------------------------------- ->>> resp = client.indices.exists(index=...) ->>> resp.body -True - ->>> resp is True -False - ->>> resp.body is True -True - ->>> isinstance(resp, bool) -False - ->>> isinstance(resp.body, bool) -True ------------------------------------- - -[discrete] -[[migration-error-types]] -==== Changes to error classes - -Previously `elasticsearch.TransportError` was the base class for both transport layer errors (like timeouts, connection errors) and API layer errors (like "404 Not Found" when accessing an index). This was pretty confusing when you wanted to capture API errors to inspect them for a response body and not capture errors from the transport layer. - -Now in 8.0 `elasticsearch.TransportError` is a redefinition of `elastic_transport.TransportError` and will only be the base class for true transport layer errors. If you instead want to capture API layer errors you can use the new `elasticsearch.ApiError` base class. - -[source,python] ------------------------------------- -from elasticsearch import TransportError, Elasticsearch - -try: - client.indices.get(index="index-that-does-not-exist") - -# In elasticsearch-py v7.x this would capture the resulting -# 'NotFoundError' that would be raised above. But in 8.0.0 this -# 'except TransportError' won't capture 'NotFoundError'. -except TransportError as err: - print(f"TransportError: {err}") ------------------------------------- - -The `elasticsearch.ElasticsearchException` base class has been removed as well. If you'd like to capture all errors that can be raised from the library you can capture both `elasticsearch.ApiError` and `elasticsearch.TransportError`: - -[source,python] ------------------------------------- -from elasticsearch import TransportError, ApiError, Elasticsearch - -try: - client.search(...) -# This is the 'except' clause you should use if you *actually* want to -# capture both Transport errors and API errors in one clause: -except (ApiError, TransportError) as err: - ... - -# However I recommend you instead split each error into their own 'except' -# clause so you can have different behavior for TransportErrors. This -# construction wasn't possible in 7.x and earlier. -try: - client.search(...) -except ApiError as err: - ... # API errors handled here -except TransportError as err: - ... # Transport errors handled here ------------------------------------- - -`elasticsearch.helpers.errors.BulkIndexError` and `elasticsearch.helpers.errors.ScanError` now use `Exception` as a base class instead of `ElasticsearchException`. - -Another difference between 7.x and 8.0 errors is their properties. Previously there were `status_code`, `info`, and `error` properties that weren't super useful as they'd be a mix of different value types depending on what the error was and what layer it'd been raised from (transport versus API). You can inspect the error and get response metadata via `meta` and response via `body`` from an `ApiError` instance: - -[source,python] ------------------------------------- -from elasticsearch import ApiError, Elasticsearch - -try: - client.indices.get(index="index-that-does-not-exist") -except ApiError as err: - print(err.meta.status) - # 404 - print(err.meta.headers) - # {'content-length': '200', ...} - print(err.body) - # { - # 'error': { - # 'type': 'index_not_found_exception', - # 'reason': 'no such index', - # 'resource.type': 'index_or_alias', - # ... - # }, - # 'status': 404 - # } ------------------------------------- diff --git a/docs/guide/open-telemetry.asciidoc b/docs/guide/open-telemetry.asciidoc deleted file mode 100644 index 9bbc1da47..000000000 --- a/docs/guide/open-telemetry.asciidoc +++ /dev/null @@ -1,75 +0,0 @@ -[[opentelemetry]] -=== Using OpenTelemetry - -You can use https://opentelemetry.io/[OpenTelemetry] to monitor the performance and behavior of your {es} requests through the Elasticsearch Python client. -The Python client comes with built-in OpenTelemetry instrumentation that emits https://www.elastic.co/guide/en/apm/guide/current/apm-distributed-tracing.html[distributed tracing spans] by default. -With that, applications using https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry[manual OpenTelemetry instrumentation] or https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry[automatic OpenTelemetry instrumentation] are enriched with additional spans that contain insightful information about the execution of the {es} requests. - -The native instrumentation in the Python client follows the https://opentelemetry.io/docs/specs/semconv/database/elasticsearch/[OpenTelemetry Semantic Conventions for {es}]. In particular, the instrumentation in the client covers the logical layer of {es} requests. A single span per request is created that is processed by the service through the Python client. The following image shows a trace that records the handling of two different {es} requests: an `info` request and a `search` request. - -[role="screenshot"] -image::images/otel-waterfall-without-http.png[alt="Distributed trace with Elasticsearch spans",align="center"] - -Usually, OpenTelemetry auto-instrumentation modules come with instrumentation support for HTTP-level communication. In this case, in addition to the logical {es} client requests, spans will be captured for the physical HTTP requests emitted by the client. The following image shows a trace with both, {es} spans (in blue) and the corresponding HTTP-level spans (in red) after having installed the ``opentelemetry-instrumentation-urllib3`` package: - -[role="screenshot"] -image::images/otel-waterfall-with-http.png[alt="Distributed trace with Elasticsearch spans",align="center"] - -Advanced Python client behavior such as nodes round-robin and request retries are revealed through the combination of logical {es} spans and the physical HTTP spans. The following example shows a `search` request in a scenario with two nodes: - -[role="screenshot"] -image::images/otel-waterfall-retry.png[alt="Distributed trace with Elasticsearch spans",align="center"] - -The first node is unavailable and results in an HTTP error, while the retry to the second node succeeds. Both HTTP requests are subsumed by the logical {es} request span (in blue). - -[discrete] -==== Setup the OpenTelemetry instrumentation - -When using the https://opentelemetry.io/docs/languages/python/instrumentation/[manual Python OpenTelemetry instrumentation] or the https://opentelemetry.io/docs/languages/python/automatic/[OpenTelemetry Python agent], the Python client's OpenTelemetry instrumentation is enabled by default and uses the global OpenTelemetry SDK with the global tracer provider. -If you're getting started with OpenTelemetry instrumentation, the following blog posts have step-by-step instructions to ingest and explore tracing data with the Elastic stack: - -* https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry[Manual instrumentation with OpenTelemetry for Python applications] -* https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry[Automatic instrumentation with OpenTelemetry for Python applications] - -[discrete] -=== Comparison with community instrumentation - -The https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/elasticsearch/elasticsearch.html[commmunity OpenTelemetry Elasticsearch instrumentation] also instruments the client and sends OpenTelemetry traces, but was developed before the OpenTelemetry Semantic Conventions for {es}, so the traces attributes are inconsistent with other OpenTelemetry Elasticsearch client instrumentations. To avoid tracing the same requests twice, make sure to use only one instrumentation, either by uninstalling the opentelemetry-instrumentation-elasticsearch Python package or by <>. - -[discrete] -==== Configuring the OpenTelemetry instrumentation - -You can configure this OpenTelemetry instrumentation through environment variables. -The following configuration options are available. - -[discrete] -[[opentelemetry-config-enable]] -===== Enable / Disable the OpenTelemetry instrumentation - -With this configuration option you can enable (default) or disable the built-in OpenTelemetry instrumentation. - -**Default:** `true` - -|============ -| Environment Variable | `OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_ENABLED` -|============ - -[discrete] -===== Capture search request bodies - -Per default, the built-in OpenTelemetry instrumentation does not capture request bodies due to data privacy considerations. You can use this option to enable capturing of search queries from the request bodies of {es} search requests in case you wish to gather this information regardless. The options are to capture the raw search query or not capture it at all. - -**Default:** `omit` - -**Valid Options:** `omit`, `raw` - -|============ -| Environment Variable | `OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_CAPTURE_SEARCH_QUERY` -|============ - -[discrete] -==== Overhead - -The OpenTelemetry instrumentation (as any other monitoring approach) may come with a slight overhead on CPU, memory, and/or latency. The overhead may only occur when the instrumentation is enabled (default) and an OpenTelemetry SDK is active in the target application. When the instrumentation is disabled or no OpenTelemetry SDK is active within the target application, monitoring overhead is not expected when using the client. - -Even in cases where the instrumentation is enabled and is actively used (by an OpenTelemetry SDK), the overhead is minimal and negligible in the vast majority of cases. In edge cases where there is a noticeable overhead, the <> to eliminate any potential impact on performance. diff --git a/docs/guide/overview.asciidoc b/docs/guide/overview.asciidoc deleted file mode 100644 index 2b331e08c..000000000 --- a/docs/guide/overview.asciidoc +++ /dev/null @@ -1,89 +0,0 @@ -[[overview]] -== Overview - -This is the official low-level Python client for {es}. Its goal is to provide -common ground for all {es}-related code in Python. For this reason, the client -is designed to be unopinionated and extendable. An API reference is available -on https://elasticsearch-py.readthedocs.io[Read the Docs]. - - -[discrete] -=== Compatibility - -Language clients are forward compatible; meaning that the clients support -communicating with greater or equal minor versions of {es} without breaking. It -does not mean that the clients automatically support new features of newer -{es} versions; it is only possible after a release of a new client version. For -example, a 8.12 client version won't automatically support the new features of -the 8.13 version of {es}, the 8.13 client version is required for that. {es} -language clients are only backwards compatible with default distributions and -without guarantees made. - -|=== -| Elasticsearch version | elasticsearch-py branch | Supported - -| main | main | -| 8.x | 8.x | 8.x -| 7.x | 7.x | 7.17 -|=== - -If you have a need to have multiple versions installed at the same time older -versions are also released as `elasticsearch7` and `elasticsearch8`. - - -[discrete] -=== Example use - -Simple use-case: - -[source,python] ------------------------------------- ->>> from datetime import datetime ->>> from elasticsearch import Elasticsearch - -# Connect to 'http://localhost:9200' ->>> client = Elasticsearch("http://localhost:9200") - -# Datetimes will be serialized: ->>> client.index(index="my-index-000001", id=42, document={"any": "data", "timestamp": datetime.now()}) -{'_id': '42', '_index': 'my-index-000001', '_type': 'test-type', '_version': 1, 'ok': True} - -# ...but not deserialized ->>> client.get(index="my-index-000001", id=42)['_source'] -{'any': 'data', 'timestamp': '2013-05-12T19:45:31.804229'} ------------------------------------- - -TIP: For an elaborate example of how to ingest data into Elastic Cloud, -refer to {cloud}/ec-getting-started-python.html[this page]. - - -[discrete] -=== Features - -The client's features include: - -* Translating basic Python data types to and from JSON - -* Configurable automatic discovery of cluster nodes - -* Persistent connections - -* Load balancing (with pluggable selection strategy) across all available nodes - -* Node timeouts on transient errors - -* Thread safety - -* Pluggable architecture - -The client also contains a convenient set of -https://elasticsearch-py.readthedocs.org/en/master/helpers.html[helpers] for -some of the more engaging tasks like bulk indexing and reindexing. - - -[discrete] -=== Elasticsearch Python DSL - -For a higher level access with more limited scope, have a look at the DSL module, -which provides a more convenient and idiomatic way to write and manipulate -queries. diff --git a/docs/guide/release-notes.asciidoc b/docs/guide/release-notes.asciidoc deleted file mode 100644 index 1b8d3957f..000000000 --- a/docs/guide/release-notes.asciidoc +++ /dev/null @@ -1,714 +0,0 @@ -[[release-notes]] -== Release notes - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -[discrete] -[[rn-8-17-1]] -=== 8.17.1 (2025-01-23) - -- Make pyarrow dependency optional for tests (https://github.com/elastic/elasticsearch-py/pull/2733[#2733], contributed by danigm) -- Update APIs: - * Add Simulate ingest API - * Add Get data stream lifecycle stats API - * Add Update inference API - * Add Create or update, Get and Delete IP geolocation database configuration APIs - * Add Bulk update API keys - * Add Get and Update Security index settings APIs - * Add OpenID Connect prepare authentication, OpenID Connect authenticate and OpenID Connect logout APIs - * Add Delegate PKI authentication API - * Add Repository analysis API - * Add Render Search Application Query API - * Add Find field structure and Find messages structure APIs - * Add Get Watcher index settings and Update Watcher index settings APIs - * Add experimental Check in and Claim connector sync job APIs - * Add experimental Set connector sync job errors and Set connector sync job stats APIs - * Add experimental Update connector features APIs - * Add experimental Post Event to an Analytics Collection API - * Add `timeout` and `master_timeout` to Snapshot lifecycle management (SLM) APIs - * Add `allow_partial_search_results` to SQL search API - * Add `throttle_period_in_millis` to Create or update watch API - * Fix query parameters for CAT APIs - -[discrete] -[[rn-8-17-0]] -=== 8.17.0 (2024-12-13) - -- Allow simsimd again on Python 3.13 (https://github.com/elastic/elasticsearch-py/pull/2722[#2722]) -- Update APIs: - * Mark all Inference APIs as stable. - * Add `allow_partial_search_results` to the Open Point in Time API - * Add `keep_alive` to the Get async search status API - * Remove the `keep_alive`, `pre_filter_shard_size` and `scroll` parameters from the Submit async search API. They were never supported. - * Add `master_timeout` and `timeout` to all autoscaling policy APIs - * Add `master_timeout` to the Alias exists and Get alias APIs - * Add `list_executed_pipelines` and `require_data_stream` to Bulk API - * Add `include_model_definition` to Get trained models API - * Add `meta` to Create data frame analytics API - * Add `aggs` to Create datafeeds API - * Add `allow_no_indices`, `expand_wildcards`, `ignore_throttled` and `ignore_unavailable` to Create anomaly detection jobs API - -[discrete] -[[rn-8-16-0]] -=== 8.16.0 (2024-11-12) - -- Support Python 3.13 (https://github.com/elastic/elasticsearch-py/pull/2689[#2689]) -- Emit Python warnings for beta and tech preview APIs (https://github.com/elastic/elasticsearch-py/pull/2685[#2685]) -- Vectorstore: use a retriever query for hybrid search (https://github.com/elastic/elasticsearch-py/pull/2682[#2682]) -- Allow retries for statuses other than 429 in streaming bulk (https://github.com/elastic/elasticsearch-py/pull/2702[#2702]) -- Make `BulkIndexError` and `ScanError` serializable (https://github.com/elastic/elasticsearch-py/pull/2700[#2700]) -- Fix import when `trace` is missing from `opentelemetry` package (https://github.com/elastic/elasticsearch-py/pull/2705[#2705]) -- Update APIs: - * Fix `nodes` parameter in Task management API - * Add Test query rule API - * Add Create Cross-Cluster API key and Update Cross-Cluster API key APIs - * Add Verify snapshot repository API - * Add `data_stream_name` and `settings` to Delete auto-follow pattern API - * Add `max_samples_per_key` to Get async EQL status API - * Add `lifecycle` and remove unused `data_retention` and `downsampling parameters` from Put data stream lifecycle API - * Add `include_remotes` and remove `flat_settings` from Cluster stats API - * Add `remote_indices` to Create or update application privileges and Create or update roles APIs - -Note that the new Python warnings can be disabled as follows: - -[source,python] ----- -import warnings -from elasticsearch.exceptions import GeneralAvailabilityWarning - -warnings.filterwarnings("ignore", category=GeneralAvailabilityWarning) ----- - -[discrete] -[[rn-8-15-1]] -=== 8.15.1 (2024-09-08) - -- Fix OTel context loss in parallel bulk helper (https://github.com/elastic/elasticsearch-py/pull/2616[#2616]) -- Use request converter to generate python examples (https://github.com/elastic/elasticsearch-py/pull/2645[#2645]) -- Add Geoip database configuration APIs: Create or update, Get and Delete -- Add `q` parameter to Update by Query API -- Add `allow_no_indices` and `ignore_unavailable` parameters to Resolve index API - -[discrete] -[[rn-8-15-0]] -=== 8.15.0 (2024-08-09) - -- Added the Connector API (https://github.com/elastic/elasticsearch-py/pull/2623[#2623]) -- Added support for https://www.elastic.co/guide/en/elasticsearch/reference/master/semantic-text.html[semantic_text] and https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl-semantic-query.html[semantic query]. -- Added support for sequences of job id, model id and tags where applicable in ML APIs -- Added `dry_run` and `force` parameters to the Perform inference API -- Added optional Arrow deserialization support (https://github.com/elastic/elasticsearch-py/pull/2632[#2632]) -- Merged Query Ruleset API into new Query Rules API (https://github.com/elastic/elasticsearch-py/pull/2607[#2607]) -- Added mapping code examples (https://github.com/elastic/elasticsearch-py/pull/2596[#2596]) -- Fixed reference docs (https://github.com/elastic/elasticsearch-py/pull/2629[#2629]) -- Dropped Python 3.7 support (https://github.com/elastic/elasticsearch-py/pull/2618[#2618]) - -[discrete] -[[rn-8-14-0]] -=== 8.14.0 (2024-06-06) - -- Fixed `node_pool_class` override (#2581, contributed by Tallak Hellebust) -- Added `retriever` to the Search API -- Added `deprecated` and removed `allow_auto_create` from the Create or update component template API -- Added `allow_auto_create`, `cause`, `deprecated`, `ignore_missing_component_templates` and `master_timeout` to the Create or update index template API -- Added `cause`, removed `flat_settings` and `timeout` from the Create or update index template legacy API -- Removed various unsupported parameters from the Simulate index API -- Added various supported paramters to the Simulate index template API -- Added the `completion` and `rerank` task types to the Inference APIs -- Added the `query` and `timeout` parameters to the Perform inference API -- Added `typed_keys` to the Search Application Search API -- Added `with_profile_uid` to the Get API key information and Query API key information APIs - - -[discrete] -[[rn-8-13-2]] -=== 8.13.2 (2024-05-24) - -- Added the `ml.update_trained_model_deployment` API -- Marked Requests 2.32.2 as incompatible with the Elasticsearch client - -[discrete] -[[rn-8-13-1]] -=== 8.13.1 (2024-05-03) - -- Added `force_synthetic_source` to the Get API -- Added `wait_for_completion` to the Create trained model API -- Added `typed_keys` to the Query API key information API - -[discrete] -[[rn-8-13-0]] -=== 8.13.0 (2024-03-22) - -- Added native OpenTelemetry support -- Added optional `orjson` (a fast, correct JSON library) serialization support -- Added the `text_structure.test_grok_pattern` API -- Added the `indices.resolve_cluster` API -- Renamed the `model_id` parameter to `inference_id` in the `inference` APIs -- Changed all `synonyms` APIs from **experimental** to **stable**. -- Fixed API key documentation - -[discrete] -[[rn-8-12-1]] -=== 8.12.1 (2024-02-22) - -- Fixed but deprecated parameter aliases in body parameter -- Added mappings and bulk to quickstart page - -[discrete] -[[rn-8-12-0]] -=== 8.12.0 (2024-01-19) - -- Dropped support for Python 3.6 -- Allowed unrestricted `body` parameter again -- Added the Inference APIs -- Added the ES|QL API -- Added `active_only` parameter to `security.get_api_key` API -- Added `expiration` parameter to `security.update_api_key` API - -[discrete] -[[rn-8-11-1]] -=== 8.11.1 (2023-12-08) - -- Added missing `role_templates` to `security.put_role_mapping` API -- Added interactive examples page to documentation -- Changed API reference to have one page per sub-client - -[discrete] -[[rn-8-11-0]] -=== 8.11.0 (2023-11-13) - -- Support Python 3.12 -- Added missing `scores` parameter to create trained model vocabulary API -- Added missing `delete_dest_index` parameter to delete transform API - -[discrete] -[[rn-8-10-1]] -=== 8.10.1 (2023-10-13) - -- Removed deprecation warnings when using `body` parameter -- Fixed some type hints to use covariant Sequence instead of invariant List - -[discrete] -[[rn-8-10-0]] -=== 8.10.0 (2023-09-22) - -- Added the Query rules APIs -- Added the Synonyms APIs - -[discrete] -[[rn-8-9-0]] -=== 8.9.0 (2023-08-10) - -- Added the `cluster.info` API -- Updated the `inference_config` argument in `ml.put_trained_model` API to reflect an improvement in the specification - -[discrete] -[[rn-8-8-1]] -=== 8.8.1 (2023-07-06) - -* Added the `rank` parameter to the `search` API - -[discrete] -[[rn-8-8-0]] -=== 8.8.0 (2023-05-25) - -* Added `include_defaults` parameter to the `cluster.get_component_template`, `indices.get_data_stream`, and `indices.get_index_template` API -* Added the `indices.delete_data_lifecycle`, `indices.explain_data_lifecycle`, `indices.get_data_lifecycle`, and `indices.put_data_lifecycle` APIs -* Added the **experimental** `search_application.delete`, `search_application.delete_behavioral_analytics`, `search_application.get`, `search_application.get_behavioral_analytics`, `search_application.list`, `search_application.put`, `search_application.put_behavioral_analytics`, and `search_application.search` APIs. - -[discrete] -[[rn-8-7-0]] -=== 8.7.0 (2023-04-06) - -* Added the `health_report` API -* Added the `transform.schedule_now_transform` API -* Added the `from_` request parameter to the `transform.start_transform` API -* Added the `buffer`, `grid_agg`, and `with_labels` parameters to the `search_mvt` API -* Added the `allow_auto_create` parameter to the `cluster.create_component_template` API -* Added the `delete_user_annotations` parameter to the `ml.delete_job`, `ml.reset_job` API -* Added the `start` and `end` parameters to the `ml.preview_datafeed` API -* Added the `priority` parameter to the `ml.start_datafeed` API -* Added the `job_id` parameter to the `ml.update_datafeed` API -* Added the `model_prune_window` parameter to the `ml.update_job` API -* Added the `feature_states` parameter to the `snapshot.restore_snapshot` API -* Added the `timeout` parameter to the `transform.get_transform_stats` API -* Added the `from_` parameter to the `transform.start_transform` API -* Changed the `input` parameter of the `ml.put_trained_models` API from required to optional -* Fixed the `cluster.create_component_template` API by removing the erroneously provided `aliases`, `mappings`, and `settings` parameters. Only the `template` parameter should be used for specifying component templates. - -[discrete] -[[rn-8-6-2]] -=== 8.6.2 (2023-02-16) - -* Client is compatible with Elasticsearch 8.6.2 - -[discrete] -[[rn-8-6-1]] -=== 8.6.1 (2023-01-27) - -* Client is compatible with Elasticsearch 8.6.1 - -[discrete] -==== Core - -* Added the `expand_wildcards`, `preference`, and `routing` parameters to the `open_point_in_time` API. - -[discrete] -[[rn-8-6-0]] -=== 8.6.0 (2023-01-10) - -* Client is compatible with Elasticsearch 8.6.0 - -[discrete] -==== Core - -* Changed the `fields` parameter of the `field_caps` API to be encoded in the HTTP request body. -* Changed the `index` parameter of the `rank_eval` API to be optional. -* Changed the `requests` parameter of the `rank_eval` API to be optional. - -[discrete] -==== CAT - -* Added the `time` parameter to the `cat.indices` API - -[discrete] -==== Machine Learning - -* Fixed the `model_id` parameter of the `ml.clear_trained_model_deployment_cache` API to be required. - -[discrete] -[[rn-8-5-3]] -=== 8.5.3 (2022-12-08) - -* Client is compatible with Elasticsearch 8.5.3 - -[discrete] -[[rn-8-5-2]] -=== 8.5.2 (2022-11-23) - -* Client is compatible with Elasticsearch 8.5.2 - -[discrete] -[[rn-8-5-1]] -=== 8.5.1 (2022-11-21) - -* Client is compatible with Elasticsearch 8.5.1 - -[discrete] -[[rn-8-5-0]] -=== 8.5.0 (2022-11-2) - -[discrete] -==== Indices - -* Added the **experimental** `indices.downsample` API - -[discrete] -==== Rollup - -* Removed the deprecated `rollup.rollup` API. - -[discrete] -==== Snapshot - -* Added the `index_names` parameter to the `snapshot.get` API. - -[discrete] -==== Machine Learning - -* Added the **beta** `ml.clear_trained_model_deployment_cache` API. -* Changed the `ml.put_trained_model_definition_part` API from **experimental** to **stable**. -* Changed the `ml.put_trained_model_vocabulary` API from **experimental** to **stable**. -* Changed the `ml.start_trained_model_deployment` API from **experimental** to **stable**. -* Changed the `ml.stop_trained_model_deployment` API from **experimental** to **stable**. - -[discrete] -==== Security - -* Added the `with_limited_by` parameter to the `get_api_key` API. -* Added the `with_limited_by` parameter to the `query_api_keys` API. -* Added the `with_profile_uid` parameter to the `get_user` API. -* Changed the `security.activate_user_profile` API from **beta** to **stable**. -* Changed the `security.disable_user_profile` API from **beta** to **stable**. -* Changed the `security.enable_user_profile` API from **beta** to **stable**. -* Changed the `security.get_user_profile` API from **beta** to **stable**. -* Changed the `security.suggest_user_profiles` API from **beta** to **stable**. -* Changed the `security.update_user_profile_data` API from **beta** to **stable**. -* Changed the `security.has_privileges_user_profile` API from **experimental** to **stable**. - -[discrete] -[[rn-8-4-3]] -=== 8.4.3 (2022-10-06) - -* Client is compatible with Elasticsearch 8.4.3 - -[discrete] -[[rn-8-4-2]] -=== 8.4.2 (2022-09-20) - -[discrete] -==== Documents - -* Added the `error_trace`, `filter_path`, `human` and `pretty` parameters to the `get_source` API. -* Added the `ext` parameter to the `search` API. - -[discrete] -==== Async Search - -* Added the `ext` parameter to the `async_search.submit` API. - -[discrete] -==== Fleet - -* Added the `ext` parameter to the `fleet.search` API. - -[discrete] -[[rn-8-4-1]] -=== 8.4.1 (2022-09-06) - -* Client is compatible with Elasticsearch 8.4.1 - -[discrete] -[[rn-8-4-0]] -=== 8.4.0 (2022-08-25) - -[discrete] -==== Search - -* Added the `knn` parameter to the `search` API. -* Added the `knn` parameter to the `async_search.submit` API. - -[discrete] -==== Machine Learning - -* Added the `cache_size` parameter to the `ml.start_trained_model_deployment` API. - -[discrete] -==== Security - -* Added the `security.update_api_key` API. - -[discrete] -[[rn-8-3-3]] -=== 8.3.3 (2022-08-01) - -* Client is compatible with Elasticsearch 8.3.3 - -[discrete] -[[rn-8-3-2]] -=== 8.3.2 (2022-08-01) - -[discrete] -==== Security - -* Added the `refresh` parameter to the `security.create_service_token` API. - -[discrete] -[[rn-8-3-1]] -=== 8.3.1 (2022-06-30) - -[discrete] -==== Security - -* Added the **experimental** `security.has_privileges_user_profile` API. -* Added the `hint` parameter to the **experimental** `security.suggest_user_profiles` API. - -[discrete] -[[rn-8-3-0]] -=== 8.3.0 (2022-06-29) - -* Client is compatible with Elasticsearch 8.3.0 - -[discrete] -[[rn-8-2-3]] -=== 8.2.3 (2022-06-15) - -[discrete] -==== Documents - -* Added the `routing` parameter to the `msearch` API. - -[discrete] -==== CAT - -* Added the `cat.component_templates` API. - -[discrete] -==== Ingest - -* Added the `if_version` parameter to the `ingest.put_pipeline` API. - -[discrete] -==== Security - -* Changed the `name` parameter for the `security.create_service_token` API from required to optional. -* Added the `refresh` parameter to the `security.create_service_token` API. -* Changed the name of `access` parameter to the `labels` parameter in the `security.update_user_profile_data` API. - -[discrete] -==== Shutdown - -* Added the `timeout` and `master_timeout` parameters to the `shutdown.get_node`, `shutdown.delete_node`, and `shutdown.put_node` APIs. -* Added the `reason`, `type`, `allocation_delay`, and `target_node_name` parameters to the `shutdown.put_node` API. - -[discrete] -[[rn-8-2-2]] -=== 8.2.2 (2022-06-01) - -* Client is compatible with Elasticsearch 8.2.2 - -[discrete] -[[rn-8-2-1]] -=== 8.2.1 (2022-06-01) - -[discrete] -==== Machine Learning - -* Added the `inference_config` parameter to the `ml.infer_trained_model_deployment` API - -[discrete] -[[rn-8-2-0]] -=== 8.2.0 (2022-05-03) - -[discrete] -==== Client - -* Re-introduced support for passing `requests.auth.BaseAuth` objects to the `http_auth` parameter which was available in 7.x. - -[discrete] -==== Search - -* Added the `filter` parameter to the **experimental** `knn_search` API - -[discrete] -==== Documents - -* Changed the `source` and `dest` parameters for the `reindex` API from optional to required - -[discrete] -==== Indices - -* Added the `indices.field_usage_stats` API -* Added the `indices.modify_data_stream` API -* Added the `fields` and `types` parameters to the `field_caps` API -* Added the `ignore_unvailable` parameter to the `open_point_in_time` API -* Added the `master_timeout` and `timeout` parameters to the `indices.delete` API -* Added the `features` parameter to the `indices.get` API - -[discrete] -==== Machine Learning - -* Added the `ml.get_memory_stats` API - -[discrete] -==== Migrations - -* Added the `migrations.get_feature_upgrade_status` API -* Added the `migrations.post_feature_upgrade` API - -[discrete] -==== Nodes - -* Added the `nodes.clear_repositories_metering_archive` API -* Added the `nodes.get_repositories_metering_info` API - -[discrete] -==== Security - -* Added the **beta** `security.activate_user_profile` API -* Added the **beta** `security.disable_user_profile` API -* Added the **beta** `security.enable_user_profile` API -* Added the **beta** `security.get_user_profile` API -* Added the **beta** `security.suggest_user_profiles` API -* Added the **beta** `security.update_user_profile_data` API - -[discrete] -==== SQL - -* Added the `catalog`, `index_using_frozen`, `keep_alive`, `keep_on_completion`, `runtime_mappings`, and `wait_for_completion_timeout` parameters to the `sql.query` API - -[discrete] -[[rn-8-1-2]] -=== 8.1.2 (2022-03-30) - -* Client is compatible with Elasticsearch 8.1.2 - - -[discrete] -[[rn-8-1-1]] -=== 8.1.1 (2022-03-22) - -[discrete] -==== Documents - -* Changed the `source` and `dest` parameters of the `reindex` API to be required. - -[discrete] -==== Mappings - -* Changed the `fields` parameter of the `field_caps` API to be required. - - -[discrete] -[[rn-8-1-0]] -=== 8.1.0 (2022-03-08) - -[discrete] -==== Transforms - -* Added the `transform.reset_transform` API - - -[discrete] -[[rn-8-0-0]] -=== 8.0.0 (2022-02-10) - -[discrete] -==== Added - -* Added the top-level `.options()` method to `Elasticsearch` and `AsyncElasticsearch` for modifying transport options. -* Added parameters corresponding to JSON request body fields for all APIs -* Added `basic_auth` parameter for specifying username and password authentication -* Added `bearer_auth` parameter for specifying an HTTP bearer token or service token -* Added the `meta` property to `ApiError` to access the HTTP response metadata of an error. -* Added a check that a compatible version of the `elastic-transport` package is installed. - -[discrete] -==== Changed - -* Changed the transport layer to use the `elastic-transport` package -* Changed user-defined `body` parameters to have semantic names (e.g `index(document={...})` instead of `index(body={...})`). -* Changed responses to be objects with two properties, `meta` for response metadata (HTTP status, headers, node, etc) and `body` for a typed body. -* Changed `AsyncElasticsearch` to always be available, regardless of whether `aiohttp` is installed -* Changed exception hierarchy, the major change is a new exception `ApiError` which differentiates between an error that's raised from the transport layer (previously `elasticsearch.exceptions.TransportError`, now `elastic_transport.TransportError`) and one raised from the API layer -* Changed the name of `JSONSerializer` to `JsonSerializer` for consistency with other serializer names. Added an alias to the old name for backwards compatibility -* Changed the default mimetypes (`application/json`) to instead use compatibility mimetypes (`application/vnd.elasticsearch+json`) which always request for responses compatibility with version 8.x. - -[discrete] -==== Removed - -* Removed support for Python 2.7 and Python 3.5, the library now supports only Python 3.6+ -* Removed the `elasticsearch.connection` module as all functionality has been moved to the `elastic-transport` package -* Removed the default URL of `http://localhost:9200` due to Elasticsearch 8.0 default configuration being `https://localhost:9200`. - The client's connection to Elasticsearch now must be specified with scheme, host, and port or with the `cloud_id` parameter -* Removed the ability to use positional arguments with API methods. Going forward all API parameters must be keyword-only parameters -* Removed the `doc_type`, `include_type_name`, and `copy_settings` parameters from many document and index APIs - -[discrete] -==== Deprecated - -* Deprecated the `body` and `params` parameters on all APIs -* Deprecated setting transport options `http_auth`, `api_key`, `ignore`, `request_timeout`, `headers`, and `opaque_id` - All of these settings should instead be set via the `.options()` method -* Deprecated the `elasticsearch.transport` and `elasticsearch.client` modules. These modules will be removed in a future version - -[discrete] -==== CAT - -* Removed the deprecated `local` parameter from the `cat.indices`, `cat.nodes`, `cat.shards` API -* Removed the deprecated `allow_no_datafeeds` parameter from the `cat.ml_datafeeds` API -* Removed the deprecated `allow_no_jobs` parameter from the `cat.ml_jobs` API -* Removed the deprecated `size` parameter from the `cat.thread_pool` API -* Added the `time` parameter to the `cat.thread_pool` API - -[discrete] -==== Documents - -* Removed the deprecated `size` parameter from the `delete_by_query` API -* Removed the deprecated `size` parameter from the `update_by_query` API - -[discrete] -==== Indices - -* Removed the deprecated `indices.flush_synced` API -* Removed the deprecated `indices.freeze` API -* Removed the deprecated `indices.get_upgrade` API -* Removed the deprecated `indices.upgrade` API -* Removed the deprecated `indices.exist_type` API -* Removed the deprecated parameter `copy_settings` from the `indices.shrink` API -* Deprecated the `verbose` parameter of the `indices.segments` API - -[discrete] -==== License / X-Pack - -* Deprecated the `accept_enterprise` parameter of the `license.get` API -* Deprecated the `accept_enterprise` parameter of the `xpack.info` API - -[discrete] -==== Machine Learning - -* Added the **experimental** `ml.infer_trained_model_deployment` API -* Added the **experimental** `ml.put_trained_model_definition_part` API -* Added the **experimental** `ml.put_trained_model_vocabulary` API -* Added the **experimental** `ml.start_trained_model_deployment` API -* Added the **experimental** `ml.stop_trained_model_deployment` API -* Added the `timeout` parameter to the `ml.delete_trained_model` API -* Removed the deprecated `allow_no_jobs` parameter from the `ml.close_job` API -* Removed the deprecated `ml.find_text_structure` API -* Removed the deprecated `allow_no_datafeeds` parameter from the `ml.get_datafeed_stats` API -* Removed the deprecated `allow_no_datafeeds` parameter from the `ml.get_datafeeds` API -* Removed the deprecated `allow_no_jobs` parameter from the `ml.get_job_stats` API -* Removed the deprecated `allow_no_jobs` parameter from the `ml.get_jobs` API -* Removed the deprecated `allow_no_jobs` parameter from the `ml.get_overall_buckets` API - -[discrete] -==== Search - -* Added the **experimental** `knn_search` API - -[discrete] -==== Searchable Snapshots - -* Removed the deprecated `searchable_snapshots.repository_stats` API - -[discrete] -==== Snapshots - -* Changed the `snapshot.delete` API to accept multiple snapshots - -[discrete] -==== Security - -* Added the `security.enroll_kibana` API -* Added the `security.enroll_node` API diff --git a/docs/guide/images/create-api-key.png b/docs/images/create-api-key.png similarity index 100% rename from docs/guide/images/create-api-key.png rename to docs/images/create-api-key.png diff --git a/docs/guide/images/es-endpoint.jpg b/docs/images/es-endpoint.jpg similarity index 100% rename from docs/guide/images/es-endpoint.jpg rename to docs/images/es-endpoint.jpg diff --git a/docs/guide/images/otel-waterfall-retry.png b/docs/images/otel-waterfall-retry.png similarity index 100% rename from docs/guide/images/otel-waterfall-retry.png rename to docs/images/otel-waterfall-retry.png diff --git a/docs/guide/images/otel-waterfall-with-http.png b/docs/images/otel-waterfall-with-http.png similarity index 100% rename from docs/guide/images/otel-waterfall-with-http.png rename to docs/images/otel-waterfall-with-http.png diff --git a/docs/guide/images/otel-waterfall-without-http.png b/docs/images/otel-waterfall-without-http.png similarity index 100% rename from docs/guide/images/otel-waterfall-without-http.png rename to docs/images/otel-waterfall-without-http.png diff --git a/docs/logo-elastic-glyph-color.svg b/docs/logo-elastic-glyph-color.svg deleted file mode 100644 index 37a349291..000000000 --- a/docs/logo-elastic-glyph-color.svg +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/docs/reference/_configuration.md b/docs/reference/_configuration.md new file mode 100644 index 000000000..ea89ddd68 --- /dev/null +++ b/docs/reference/_configuration.md @@ -0,0 +1,105 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/_configuration.html +--- + +# Configuration [_configuration] + +There are several ways to configure connections for the library. The easiest and most useful approach is to define one default connection that can be used every time an API call is made without explicitly passing in other connections. + +::::{note} +Unless you want to access multiple clusters from your application, it is highly recommended that you use the `create_connection` method and all operations will use that connection automatically. + +:::: + + +## Default connection [_default_connection] + +To define a default connection that can be used globally, use the `connections` module and the `create_connection` method like this: + +```python +from elasticsearch.dsl import connections + +connections.create_connection(hosts=['localhost'], timeout=20) +``` + +### Single connection with an alias [_single_connection_with_an_alias] + +You can define the `alias` or name of a connection so you can easily refer to it later. The default value for `alias` is `default`. + +```python +from elasticsearch.dsl import connections + +connections.create_connection(alias='my_new_connection', hosts=['localhost'], timeout=60) +``` + +Additional keyword arguments (`hosts` and `timeout` in our example) will be passed to the `Elasticsearch` class from `elasticsearch-py`. + +To see all possible configuration options refer to the [documentation](https://elasticsearch-py.readthedocs.io/en/latest/api/elasticsearch.html). + + + +## Multiple clusters [_multiple_clusters] + +You can define multiple connections to multiple clusters at the same time using the `configure` method: + +```python +from elasticsearch.dsl import connections + +connections.configure( + default={'hosts': 'localhost'}, + dev={ + 'hosts': ['esdev1.example.com:9200'], + 'sniff_on_start': True + } +) +``` + +Such connections will be constructed lazily when requested for the first time. + +You can alternatively define multiple connections by adding them one by one as shown in the following example: + +```python +# if you have configuration options to be passed to Elasticsearch.__init__ +# this also shows creating a connection with the alias 'qa' +connections.create_connection('qa', hosts=['esqa1.example.com'], sniff_on_start=True) + +# if you already have an Elasticsearch instance ready +connections.add_connection('another_qa', my_client) +``` + +### Using aliases [_using_aliases] + +When using multiple connections, you can refer to them using the string alias specified when you created the connection. + +This example shows how to use an alias to a connection: + +```python +s = Search(using='qa') +``` + +A `KeyError` will be raised if there is no connection registered with that alias. + + + +## Manual [_manual] + +If you don’t want to supply a global configuration, you can always pass in your own connection as an instance of `elasticsearch.Elasticsearch` with the parameter `using` wherever it is accepted like this: + +```python +s = Search(using=Elasticsearch('localhost')) +``` + +You can even use this approach to override any connection the object might be already associated with: + +```python +s = s.using(Elasticsearch('otherhost:9200')) +``` + +::::{note} +When using the `dsl` module, it is highly recommended that you use the built-in serializer (`elasticsearch.dsl.serializer.serializer`) to ensure your objects are correctly serialized into `JSON` every time. The `create_connection` method that is described here (and that the `configure` method uses under the hood) will do that automatically for you, unless you explicitly specify your own serializer. The built-in serializer also allows you to serialize your own objects - just define a `to_dict()` method on your objects and that method will be automatically called when serializing your custom objects to `JSON`. + +:::: + + + diff --git a/docs/reference/_examples.md b/docs/reference/_examples.md new file mode 100644 index 000000000..ba57b7854 --- /dev/null +++ b/docs/reference/_examples.md @@ -0,0 +1,9 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/_examples.html +--- + +# Examples [_examples] + +Please see the [DSL examples](https://github.com/elastic/elasticsearch-py/tree/master/examples/dsl) directory to see some complex examples using the DSL module. + diff --git a/docs/reference/_how_to_guides.md b/docs/reference/_how_to_guides.md new file mode 100644 index 000000000..08e16b28c --- /dev/null +++ b/docs/reference/_how_to_guides.md @@ -0,0 +1,1492 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/_how_to_guides.html +--- + +# How-To Guides [_how_to_guides] + +## Search DSL [search_dsl] + +### The `Search` object [_the_search_object] + +The `Search` object represents the entire search request: + +* queries +* filters +* aggregations +* k-nearest neighbor searches +* sort +* pagination +* highlighting +* suggestions +* collapsing +* additional parameters +* associated client + +The API is designed to be chainable. With the exception of the aggregations functionality this means that the `Search` object is immutable -all changes to the object will result in a shallow copy being created which contains the changes. This means you can safely pass the `Search` object to foreign code without fear of it modifying your objects as long as it sticks to the `Search` object APIs. + +You can pass an instance of the [elasticsearch client](https://elasticsearch-py.readthedocs.io/) when instantiating the `Search` object: + +```python +from elasticsearch import Elasticsearch +from elasticsearch.dsl import Search + +client = Elasticsearch() + +s = Search(using=client) +``` + +You can also define the client at a later time (for more options see the `configuration` chapter): + +```python +s = s.using(client) +``` + +::::{note} +All methods return a *copy* of the object, making it safe to pass to outside code. + +:::: + + +The API is chainable, allowing you to combine multiple method calls in one statement: + +```python +s = Search().using(client).query("match", title="python") +``` + +To send the request to Elasticsearch: + +```python +response = s.execute() +``` + +If you just want to iterate over the hits returned by your search you can iterate over the `Search` object: + +```python +for hit in s: + print(hit.title) +``` + +Search results will be cached. Subsequent calls to `execute` or trying to iterate over an already executed `Search` object will not trigger additional requests being sent to Elasticsearch. To force a request specify `ignore_cache=True` when calling `execute`. + +For debugging purposes you can serialize the `Search` object to a `dict` explicitly: + +```python +print(s.to_dict()) +``` + +#### Delete By Query [_delete_by_query] + +You can delete the documents matching a search by calling `delete` on the `Search` object instead of `execute` like this: + +```python +s = Search(index='i').query("match", title="python") +response = s.delete() +``` + + +#### Queries [_queries] + +The library provides classes for all Elasticsearch query types. Pass all the parameters as keyword arguments. The classes accept any keyword arguments, the dsl then takes all arguments passed to the constructor and serializes them as top-level keys in the resulting dictionary (and thus the resulting json being sent to elasticsearch). This means that there is a clear one-to-one mapping between the raw query and its equivalent in the DSL: + +```python +from elasticsearch.dsl.query import MultiMatch, Match + +# {"multi_match": {"query": "python django", "fields": ["title", "body"]}} +MultiMatch(query='python django', fields=['title', 'body']) + +# {"match": {"title": {"query": "web framework", "type": "phrase"}}} +Match(title={"query": "web framework", "type": "phrase"}) +``` + +::::{note} +In some cases this approach is not possible due to python’s restriction on identifiers - for example if your field is called `@timestamp`. In that case you have to fall back to unpacking a dictionary: `Range(*+ {'@timestamp': {'lt': 'now'}})` + +:::: + + +You can use the `Q` shortcut to construct the instance using a name with parameters or the raw `dict`: + +```python +from elasticsearch.dsl import Q + +Q("multi_match", query='python django', fields=['title', 'body']) +Q({"multi_match": {"query": "python django", "fields": ["title", "body"]}}) +``` + +To add the query to the `Search` object, use the `.query()` method: + +```python +q = Q("multi_match", query='python django', fields=['title', 'body']) +s = s.query(q) +``` + +The method also accepts all the parameters as the `Q` shortcut: + +```python +s = s.query("multi_match", query='python django', fields=['title', 'body']) +``` + +If you already have a query object, or a `dict` representing one, you can just override the query used in the `Search` object: + +```python +s.query = Q('bool', must=[Q('match', title='python'), Q('match', body='best')]) +``` + + +#### Dotted fields [_dotted_fields] + +Sometimes you want to refer to a field within another field, either as a multi-field (`title.keyword`) or in a structured `json` document like `address.city`. To make it easier, the `Q` shortcut (as well as the `query`, `filter`, and `exclude` methods on `Search` class) allows you to use `_+` (double underscore) in place of a dot in a keyword argument: + +```python +s = Search() +s = s.filter('term', category__keyword='Python') +s = s.query('match', address__city='prague') +``` + +Alternatively you can always fall back to python’s kwarg unpacking if you prefer: + +```python +s = Search() +s = s.filter('term', **{'category.keyword': 'Python'}) +s = s.query('match', **{'address.city': 'prague'}) +``` + + +#### Query combination [_query_combination] + +Query objects can be combined using logical operators: + +```python +Q("match", title='python') | Q("match", title='django') +# {"bool": {"should": [...]}} + +Q("match", title='python') & Q("match", title='django') +# {"bool": {"must": [...]}} + +~Q("match", title="python") +# {"bool": {"must_not": [...]}} +``` + +When you call the `.query()` method multiple times, the `&` operator will be used internally: + +```python +s = s.query().query() +print(s.to_dict()) +# {"query": {"bool": {...}}} +``` + +If you want to have precise control over the query form, use the `Q` shortcut to directly construct the combined query: + +```python +q = Q('bool', + must=[Q('match', title='python')], + should=[Q(...), Q(...)], + minimum_should_match=1 +) +s = Search().query(q) +``` + + +#### Filters [_filters] + +If you want to add a query in a [filter context](docs-content://explore-analyze/query-filter/languages/querydsl.md) you can use the `filter()` method to make things easier: + +```python +s = Search() +s = s.filter('terms', tags=['search', 'python']) +``` + +Behind the scenes this will produce a `Bool` query and place the specified `terms` query into its `filter` branch, making it equivalent to: + +```python +s = Search() +s = s.query('bool', filter=[Q('terms', tags=['search', 'python'])]) +``` + +If you want to use the post_filter element for faceted navigation, use the `.post_filter()` method. + +You can also `exclude()` items from your query like this: + +```python +s = Search() +s = s.exclude('terms', tags=['search', 'python']) +``` + +which is shorthand for: `s = s.query('bool', filter=[~Q('terms', tags=['search', 'python'])])` + + +#### Aggregations [_aggregations] + +To define an aggregation, you can use the `A` shortcut: + +```python +from elasticsearch.dsl import A + +A('terms', field='tags') +# {"terms": {"field": "tags"}} +``` + +To nest aggregations, you can use the `.bucket()`, `.metric()` and `.pipeline()` methods: + +```python +a = A('terms', field='category') +# {'terms': {'field': 'category'}} + +a.metric('clicks_per_category', 'sum', field='clicks')\ + .bucket('tags_per_category', 'terms', field='tags') +# { +# 'terms': {'field': 'category'}, +# 'aggs': { +# 'clicks_per_category': {'sum': {'field': 'clicks'}}, +# 'tags_per_category': {'terms': {'field': 'tags'}} +# } +# } +``` + +To add aggregations to the `Search` object, use the `.aggs` property, which acts as a top-level aggregation: + +```python +s = Search() +a = A('terms', field='category') +s.aggs.bucket('category_terms', a) +# { +# 'aggs': { +# 'category_terms': { +# 'terms': { +# 'field': 'category' +# } +# } +# } +# } +``` + +or + +```python +s = Search() +s.aggs.bucket('articles_per_day', 'date_histogram', field='publish_date', interval='day')\ + .metric('clicks_per_day', 'sum', field='clicks')\ + .pipeline('moving_click_average', 'moving_avg', buckets_path='clicks_per_day')\ + .bucket('tags_per_day', 'terms', field='tags') + +s.to_dict() +# { +# "aggs": { +# "articles_per_day": { +# "date_histogram": { "interval": "day", "field": "publish_date" }, +# "aggs": { +# "clicks_per_day": { "sum": { "field": "clicks" } }, +# "moving_click_average": { "moving_avg": { "buckets_path": "clicks_per_day" } }, +# "tags_per_day": { "terms": { "field": "tags" } } +# } +# } +# } +# } +``` + +You can access an existing bucket by its name: + +```python +s = Search() + +s.aggs.bucket('per_category', 'terms', field='category') +s.aggs['per_category'].metric('clicks_per_category', 'sum', field='clicks') +s.aggs['per_category'].bucket('tags_per_category', 'terms', field='tags') +``` + +::::{note} +When chaining multiple aggregations, there is a difference between what `.bucket()` and `.metric()` methods return - `.bucket()` returns the newly defined bucket while `.metric()` returns its parent bucket to allow further chaining. + +:::: + + +As opposed to other methods on the `Search` objects, defining aggregations is done in-place (does not return a copy). + + +#### K-Nearest Neighbor Searches [_k_nearest_neighbor_searches] + +To issue a kNN search, use the `.knn()` method: + +```python +s = Search() +vector = get_embedding("search text") + +s = s.knn( + field="embedding", + k=5, + num_candidates=10, + query_vector=vector +) +``` + +The `field`, `k` and `num_candidates` arguments can be given as positional or keyword arguments and are required. In addition to these, `query_vector` or `query_vector_builder` must be given as well. + +The `.knn()` method can be invoked multiple times to include multiple kNN searches in the request. + + +#### Sorting [_sorting] + +To specify sorting order, use the `.sort()` method: + +```python +s = Search().sort( + 'category', + '-title', + {"lines" : {"order" : "asc", "mode" : "avg"}} +) +``` + +It accepts positional arguments which can be either strings or dictionaries. String value is a field name, optionally prefixed by the `-` sign to specify a descending order. + +To reset the sorting, just call the method with no arguments: + +```python +s = s.sort() +``` + + +#### Pagination [_pagination] + +To specify the from/size parameters, use the Python slicing API: + +```python +s = s[10:20] +# {"from": 10, "size": 10} + +s = s[:20] +# {"size": 20} + +s = s[10:] +# {"from": 10} + +s = s[10:20][2:] +# {"from": 12, "size": 8} +``` + +If you want to access all the documents matched by your query you can use the `scan` method which uses the scan/scroll elasticsearch API: + +```python +for hit in s.scan(): + print(hit.title) +``` + +Note that in this case the results won’t be sorted. + + +#### Highlighting [_highlighting] + +To set common attributes for highlighting use the `highlight_options` method: + +```python +s = s.highlight_options(order='score') +``` + +Enabling highlighting for individual fields is done using the `highlight` method: + +```python +s = s.highlight('title') +# or, including parameters: +s = s.highlight('title', fragment_size=50) +``` + +The fragments in the response will then be available on each `Result` object as `.meta.highlight.FIELD` which will contain the list of fragments: + +```python +response = s.execute() +for hit in response: + for fragment in hit.meta.highlight.title: + print(fragment) +``` + + +#### Suggestions [_suggestions] + +To specify a suggest request on your `Search` object use the `suggest` method: + +```python +# check for correct spelling +s = s.suggest('my_suggestion', 'pyhton', term={'field': 'title'}) +``` + +The first argument is the name of the suggestions (name under which it will be returned), second is the actual text you wish the suggester to work on and the keyword arguments will be added to the suggest’s json as-is which means that it should be one of `term`, `phrase` or `completion` to indicate which type of suggester should be used. + + +#### Collapsing [_collapsing] + +To collapse search results use the `collapse` method on your `Search` object: + +```python +s = Search().query("match", message="GET /search") +# collapse results by user_id +s = s.collapse("user_id") +``` + +The top hits will only include one result per `user_id`. You can also expand each collapsed top hit with the `inner_hits` parameter, `max_concurrent_group_searches` being the number of concurrent requests allowed to retrieve the inner hits per group: + +```python +inner_hits = {"name": "recent_search", "size": 5, "sort": [{"@timestamp": "desc"}]} +s = s.collapse("user_id", inner_hits=inner_hits, max_concurrent_group_searches=4) +``` + + +#### More Like This Query [_more_like_this_query] + +To use Elasticsearch’s `more_like_this` functionality, you can use the MoreLikeThis query type. + +A simple example is below + +```python +from elasticsearch.dsl.query import MoreLikeThis +from elasticsearch.dsl import Search + +my_text = 'I want to find something similar' + +s = Search() +# We're going to match based only on two fields, in this case text and title +s = s.query(MoreLikeThis(like=my_text, fields=['text', 'title'])) +# You can also exclude fields from the result to make the response quicker in the normal way +s = s.source(exclude=["text"]) +response = s.execute() + +for hit in response: + print(hit.title) +``` + + +#### Extra properties and parameters [_extra_properties_and_parameters] + +To set extra properties of the search request, use the `.extra()` method. This can be used to define keys in the body that cannot be defined via a specific API method like `explain` or `search_after`: + +```python +s = s.extra(explain=True) +``` + +To set query parameters, use the `.params()` method: + +```python +s = s.params(routing="42") +``` + +If you need to limit the fields being returned by elasticsearch, use the `source()` method: + +```python +# only return the selected fields +s = s.source(['title', 'body']) +# don't return any fields, just the metadata +s = s.source(False) +# explicitly include/exclude fields +s = s.source(includes=["title"], excludes=["user.*"]) +# reset the field selection +s = s.source(None) +``` + + +#### Serialization and Deserialization [_serialization_and_deserialization] + +The search object can be serialized into a dictionary by using the `.to_dict()` method. + +You can also create a `Search` object from a `dict` using the `from_dict` class method. This will create a new `Search` object and populate it using the data from the dict: + +```python +s = Search.from_dict({"query": {"match": {"title": "python"}}}) +``` + +If you wish to modify an existing `Search` object, overriding it’s properties, instead use the `update_from_dict` method that alters an instance **in-place**: + +```python +s = Search(index='i') +s.update_from_dict({"query": {"match": {"title": "python"}}, "size": 42}) +``` + + + +### Response [_response] + +You can execute your search by calling the `.execute()` method that will return a `Response` object. The `Response` object allows you access to any key from the response dictionary via attribute access. It also provides some convenient helpers: + +```python +response = s.execute() + +print(response.success()) +# True + +print(response.took) +# 12 + +print(response.hits.total.relation) +# eq +print(response.hits.total.value) +# 142 + +print(response.suggest.my_suggestions) +``` + +If you want to inspect the contents of the `response` objects, just use its `to_dict` method to get access to the raw data for pretty printing. + +#### Hits [_hits] + +To access to the hits returned by the search, access the `hits` property or just iterate over the `Response` object: + +```python +response = s.execute() +print('Total %d hits found.' % response.hits.total) +for h in response: + print(h.title, h.body) +``` + +::::{note} +If you are only seeing partial results (e.g. 10000 or even 10 results), consider using the option `s.extra(track_total_hits=True)` to get a full hit count. + +:::: + + + +#### Result [_result] + +The individual hits is wrapped in a convenience class that allows attribute access to the keys in the returned dictionary. All the metadata for the results are accessible via `meta` (without the leading `_`): + +```python +response = s.execute() +h = response.hits[0] +print('/%s/%s/%s returned with score %f' % ( + h.meta.index, h.meta.doc_type, h.meta.id, h.meta.score)) +``` + +::::{note} +If your document has a field called `meta` you have to access it using the get item syntax: `hit['meta']`. + +:::: + + + +#### Aggregations [_aggregations_2] + +Aggregations are available through the `aggregations` property: + +```python +for tag in response.aggregations.per_tag.buckets: + print(tag.key, tag.max_lines.value) +``` + + + +### `MultiSearch` [_multisearch] + +If you need to execute multiple searches at the same time you can use the `MultiSearch` class which will use the `_msearch` API: + +```python +from elasticsearch.dsl import MultiSearch, Search + +ms = MultiSearch(index='blogs') + +ms = ms.add(Search().filter('term', tags='python')) +ms = ms.add(Search().filter('term', tags='elasticsearch')) + +responses = ms.execute() + +for response in responses: + print("Results for query %r." % response._search.query) + for hit in response: + print(hit.title) +``` + + +### `EmptySearch` [_emptysearch] + +The `EmptySearch` class can be used as a fully compatible version of `Search` that will return no results, regardless of any queries configured. + + + +## Persistence [_persistence_2] + +You can use the DSL module to define your mappings and a basic persistent layer for your application. + +For more comprehensive examples have a look at the [DSL examples](https://github.com/elastic/elasticsearch-py/tree/main/examples/dsl) directory in the repository. + +### Document [doc_type] + +If you want to create a model-like wrapper around your documents, use the `Document` class. It can also be used to create all the necessary mappings and settings in elasticsearch (see `life-cycle` for details). + +```python +from datetime import datetime +from elasticsearch.dsl import Document, Date, Nested, Boolean, \ + analyzer, InnerDoc, Completion, Keyword, Text + +html_strip = analyzer('html_strip', + tokenizer="standard", + filter=["standard", "lowercase", "stop", "snowball"], + char_filter=["html_strip"] +) + +class Comment(InnerDoc): + author = Text(fields={'raw': Keyword()}) + content = Text(analyzer='snowball') + created_at = Date() + + def age(self): + return datetime.now() - self.created_at + +class Post(Document): + title = Text() + title_suggest = Completion() + created_at = Date() + published = Boolean() + category = Text( + analyzer=html_strip, + fields={'raw': Keyword()} + ) + + comments = Nested(Comment) + + class Index: + name = 'blog' + + def add_comment(self, author, content): + self.comments.append( + Comment(author=author, content=content, created_at=datetime.now())) + + def save(self, ** kwargs): + self.created_at = datetime.now() + return super().save(** kwargs) +``` + +#### Data types [_data_types] + +The `Document` instances use native python types like `str` and `datetime`. In case of `Object` or `Nested` fields an instance of the `InnerDoc` subclass is used, as in the `add_comment` method in the above example where we are creating an instance of the `Comment` class. + +There are some specific types that were created as part of this library to make working with some field types easier, for example the `Range` object used in any of the [range fields](elasticsearch://reference/elasticsearch/mapping-reference/range.md): + +```python +from elasticsearch.dsl import Document, DateRange, Keyword, Range + +class RoomBooking(Document): + room = Keyword() + dates = DateRange() + + +rb = RoomBooking( + room='Conference Room II', + dates=Range( + gte=datetime(2018, 11, 17, 9, 0, 0), + lt=datetime(2018, 11, 17, 10, 0, 0) + ) +) + +# Range supports the in operator correctly: +datetime(2018, 11, 17, 9, 30, 0) in rb.dates # True + +# you can also get the limits and whether they are inclusive or exclusive: +rb.dates.lower # datetime(2018, 11, 17, 9, 0, 0), True +rb.dates.upper # datetime(2018, 11, 17, 10, 0, 0), False + +# empty range is unbounded +Range().lower # None, False +``` + + +#### Python Type Hints [_python_type_hints] + +Document fields can be defined using standard Python type hints if desired. Here are some simple examples: + +```python +from typing import Optional + +class Post(Document): + title: str # same as title = Text(required=True) + created_at: Optional[datetime] # same as created_at = Date(required=False) + published: bool # same as published = Boolean(required=True) +``` + +It is important to note that when using `Field` subclasses such as `Text`, `Date` and `Boolean`, they must be given in the right-side of an assignment, as shown in examples above. Using these classes as type hints will result in errors. + +Python types are mapped to their corresponding field type according to the following table: + +| Python type | DSL field | +| --- | --- | +| `str` | `Text(required=True)` | +| `bool` | `Boolean(required=True)` | +| `int` | `Integer(required=True)` | +| `float` | `Float(required=True)` | +| `bytes` | `Binary(required=True)` | +| `datetime` | `Date(required=True)` | +| `date` | `Date(format="yyyy-MM-dd", required=True)` | + +To type a field as optional, the standard `Optional` modifier from the Python `typing` package can be used. When using Python 3.10 or newer, "pipe" syntax can also be used, by adding `| None` to a type. The `List` modifier can be added to a field to convert it to an array, similar to using the `multi=True` argument on the field object. + +```python +from typing import Optional, List + +class MyDoc(Document): + pub_date: Optional[datetime] # same as pub_date = Date() + middle_name: str | None # same as middle_name = Text() + authors: List[str] # same as authors = Text(multi=True, required=True) + comments: Optional[List[str]] # same as comments = Text(multi=True) +``` + +A field can also be given a type hint of an `InnerDoc` subclass, in which case it becomes an `Object` field of that class. When the `InnerDoc` subclass is wrapped with `List`, a `Nested` field is created instead. + +```python +from typing import List + +class Address(InnerDoc): + ... + +class Comment(InnerDoc): + ... + +class Post(Document): + address: Address # same as address = Object(Address, required=True) + comments: List[Comment] # same as comments = Nested(Comment, required=True) +``` + +Unfortunately it is impossible to have Python type hints that uniquely identify every possible Elasticsearch field type. To choose a field type that is different than the ones in the table above, the field instance can be added explicitly as a right-side assignment in the field declaration. The next example creates a field that is typed as `Optional[str]`, but is mapped to `Keyword` instead of `Text`: + +```python +class MyDocument(Document): + category: Optional[str] = Keyword() +``` + +This form can also be used when additional options need to be given to initialize the field, such as when using custom analyzer settings or changing the `required` default: + +```python +class Comment(InnerDoc): + content: str = Text(analyzer='snowball', required=True) +``` + +When using type hints as above, subclasses of `Document` and `InnerDoc` inherit some of the behaviors associated with Python dataclasses, as defined by [PEP 681](https://peps.python.org/pep-0681/) and the [dataclass_transform decorator](https://typing.readthedocs.io/en/latest/spec/dataclasses.html#dataclass-transform). To add per-field dataclass options such as `default` or `default_factory`, the `mapped_field()` wrapper can be used on the right side of a typed field declaration: + +```python +class MyDocument(Document): + title: str = mapped_field(default="no title") + created_at: datetime = mapped_field(default_factory=datetime.now) + published: bool = mapped_field(default=False) + category: str = mapped_field(Keyword(required=True), default="general") +``` + +When using the `mapped_field()` wrapper function, an explicit field type instance can be passed as a first positional argument, as the `category` field does in the example above. + +Static type checkers such as [mypy](https://mypy-lang.org/) and [pyright](https://github.com/microsoft/pyright) can use the type hints and the dataclass-specific options added to the `mapped_field()` function to improve type inference and provide better real-time suggestions in IDEs. + +One situation in which type checkers can’t infer the correct type is when using fields as class attributes. Consider the following example: + +```python +class MyDocument(Document): + title: str + +doc = MyDocument() +# doc.title is typed as "str" (correct) +# MyDocument.title is also typed as "str" (incorrect) +``` + +To help type checkers correctly identify class attributes as such, the `M` generic must be used as a wrapper to the type hint, as shown in the next examples: + +```python +from elasticsearch.dsl import M + +class MyDocument(Document): + title: M[str] + created_at: M[datetime] = mapped_field(default_factory=datetime.now) + +doc = MyDocument() +# doc.title is typed as "str" +# doc.created_at is typed as "datetime" +# MyDocument.title is typed as "InstrumentedField" +# MyDocument.created_at is typed as "InstrumentedField" +``` + +Note that the `M` type hint does not provide any runtime behavior and its use is not required, but it can be useful to eliminate spurious type errors in IDEs or type checking builds. + +The `InstrumentedField` objects returned when fields are accessed as class attributes are proxies for the field instances that can be used anywhere a field needs to be referenced, such as when specifying sort options in a `Search` object: + +```python +# sort by creation date descending, and title ascending +s = MyDocument.search().sort(-MyDocument.created_at, MyDocument.title) +``` + +When specifying sorting order, the `{{plus}}` and `-` unary operators can be used on the class field attributes to indicate ascending and descending order. + +Finally, the `ClassVar` annotation can be used to define a regular class attribute that should not be mapped to the Elasticsearch index: + +```python +from typing import ClassVar + +class MyDoc(Document): + title: M[str] created_at: M[datetime] = + mapped_field(default_factory=datetime.now) my_var: + ClassVar[str] # regular class variable, ignored by Elasticsearch +``` + + +#### Note on dates [_note_on_dates] + +The DSL module will always respect the timezone information (or lack thereof) on the `datetime` objects passed in or stored in Elasticsearch. Elasticsearch itself interprets all datetimes with no timezone information as `UTC`. If you wish to reflect this in your python code, you can specify `default_timezone` when instantiating a `Date` field: + +```python +class Post(Document): + created_at = Date(default_timezone='UTC') +``` + +In that case any `datetime` object passed in (or parsed from elasticsearch) will be treated as if it were in `UTC` timezone. + + +#### Document life cycle [life-cycle] + +Before you first use the `Post` document type, you need to create the mappings in Elasticsearch. For that you can either use the `index` object or create the mappings directly by calling the `init` class method: + +```python +# create the mappings in Elasticsearch +Post.init() +``` + +This code will typically be run in the setup for your application during a code deploy, similar to running database migrations. + +To create a new `Post` document just instantiate the class and pass in any fields you wish to set, you can then use standard attribute setting to change/add more fields. Note that you are not limited to the fields defined explicitly: + +```python +# instantiate the document +first = Post(title='My First Blog Post, yay!', published=True) +# assign some field values, can be values or lists of values +first.category = ['everything', 'nothing'] +# every document has an id in meta +first.meta.id = 47 + + +# save the document into the cluster +first.save() +``` + +All the metadata fields (`id`, `routing`, `index` etc) can be accessed (and set) via a `meta` attribute or directly using the underscored variant: + +```python +post = Post(meta={'id': 42}) + +# prints 42 +print(post.meta.id) + +# override default index +post.meta.index = 'my-blog' +``` + +::::{note} +Having all metadata accessible through `meta` means that this name is reserved and you shouldn’t have a field called `meta` on your document. If you, however, need it you can still access the data using the get item (as opposed to attribute) syntax: `post['meta']`. + +:::: + + +To retrieve an existing document use the `get` class method: + +```python +# retrieve the document +first = Post.get(id=42) +# now we can call methods, change fields, ... +first.add_comment('me', 'This is nice!') +# and save the changes into the cluster again +first.save() +``` + +The [Update API](https://www.elastic.co/docs/api/doc/elasticsearch/v8/group/endpoint-document) can also be used via the `update` method. By default any keyword arguments, beyond the parameters of the API, will be considered fields with new values. Those fields will be updated on the local copy of the document and then sent over as partial document to be updated: + +```python +# retrieve the document +first = Post.get(id=42) +# you can update just individual fields which will call the update API +# and also update the document in place +first.update(published=True, published_by='me') +``` + +In case you wish to use a `painless` script to perform the update you can pass in the script string as `script` or the `id` of a [stored script](docs-content://explore-analyze/scripting/modules-scripting-using.md#script-stored-scripts) via `script_id`. All additional keyword arguments to the `update` method will then be passed in as parameters of the script. The document will not be updated in place. + +```python +# retrieve the document +first = Post.get(id=42) +# we execute a script in elasticsearch with additional kwargs being passed +# as params into the script +first.update(script='ctx._source.category.add(params.new_category)', + new_category='testing') +``` + +If the document is not found in elasticsearch an exception (`elasticsearch.NotFoundError`) will be raised. If you wish to return `None` instead just pass in `ignore=404` to suppress the exception: + +```python +p = Post.get(id='not-in-es', ignore=404) +p is None +``` + +When you wish to retrieve multiple documents at the same time by their `id` you can use the `mget` method: + +```python +posts = Post.mget([42, 47, 256]) +``` + +`mget` will, by default, raise a `NotFoundError` if any of the documents wasn’t found and `RequestError` if any of the document had resulted in error. You can control this behavior by setting parameters: + +* `raise_on_error`: If `True` (default) then any error will cause an exception to be raised. Otherwise all documents containing errors will be treated as missing. +* `missing`: Can have three possible values: `'none'` (default), `'raise'` and `'skip'`. If a document is missing or errored it will either be replaced with `None`, an exception will be raised or the document will be skipped in the output list entirely. + +The index associated with the `Document` is accessible via the `_index` class property which gives you access to the `index` class. + +The `_index` attribute is also home to the `load_mappings` method which will update the mapping on the `Index` from elasticsearch. This is very useful if you use dynamic mappings and want the class to be aware of those fields (for example if you wish the `Date` fields to be properly (de)serialized): + +```python +Post._index.load_mappings() +``` + +To delete a document just call its `delete` method: + +```python +first = Post.get(id=42) +first.delete() +``` + + +#### Analysis [_analysis] + +To specify `analyzer` values for `Text` fields you can just use the name of the analyzer (as a string) and either rely on the analyzer being defined (like built-in analyzers) or define the analyzer yourself manually. + +Alternatively you can create your own analyzer and have the persistence layer handle its creation, from our example earlier: + +```python +from elasticsearch.dsl import analyzer, tokenizer + +my_analyzer = analyzer('my_analyzer', + tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), + filter=['lowercase'] +) +``` + +Each analysis object needs to have a name (`my_analyzer` and `trigram` in our example) and tokenizers, token filters and char filters also need to specify type (`nGram` in our example). + +Once you have an instance of a custom `analyzer` you can also call the [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/v8/group/endpoint-indices) on it by using the `simulate` method: + +```python +response = my_analyzer.simulate('Hello World!') + +# ['hel', 'ell', 'llo', 'lo ', 'o w', ' wo', 'wor', 'orl', 'rld', 'ld!'] +tokens = [t.token for t in response.tokens] +``` + +::::{note} +When creating a mapping which relies on a custom analyzer the index must either not exist or be closed. To create multiple `Document`-defined mappings you can use the `index` object. + +:::: + + + +#### Search [_search_2] + +To search for this document type, use the `search` class method: + +```python +# by calling .search we get back a standard Search object +s = Post.search() +# the search is already limited to the index and doc_type of our document +s = s.filter('term', published=True).query('match', title='first') + + +results = s.execute() + +# when you execute the search the results are wrapped in your document class (Post) +for post in results: + print(post.meta.score, post.title) +``` + +Alternatively you can just take a `Search` object and restrict it to return our document type, wrapped in correct class: + +```python +s = Search() +s = s.doc_type(Post) +``` + +You can also combine document classes with standard doc types (just strings), which will be treated as before. You can also pass in multiple `Document` subclasses and each document in the response will be wrapped in it’s class. + +If you want to run suggestions, just use the `suggest` method on the `Search` object: + +```python +s = Post.search() +s = s.suggest('title_suggestions', 'pyth', completion={'field': 'title_suggest'}) + +response = s.execute() + +for result in response.suggest.title_suggestions: + print('Suggestions for %s:' % result.text) + for option in result.options: + print(' %s (%r)' % (option.text, option.payload)) +``` + + +#### `class Meta` options [_class_meta_options] + +In the `Meta` class inside your document definition you can define various metadata for your document: + +* `mapping`: optional instance of `Mapping` class to use as base for the mappings created from the fields on the document class itself. + +Any attributes on the `Meta` class that are instance of `MetaField` will be used to control the mapping of the meta fields (`_all`, `dynamic` etc). Just name the parameter (without the leading underscore) as the field you wish to map and pass any parameters to the `MetaField` class: + +```python +class Post(Document): + title = Text() + + class Meta: + all = MetaField(enabled=False) + dynamic = MetaField('strict') +``` + + +#### `class Index` options [_class_index_options] + +This section of the `Document` definition can contain any information about the index, its name, settings and other attributes: + +* `name`: name of the index to use, if it contains a wildcard (`*`) then it cannot be used for any write operations and an `index` kwarg will have to be passed explicitly when calling methods like `.save()`. +* `using`: default connection alias to use, defaults to `'default'` +* `settings`: dictionary containing any settings for the `Index` object like `number_of_shards`. +* `analyzers`: additional list of analyzers that should be defined on an index (see `analysis` for details). +* `aliases`: dictionary with any aliases definitions + + +#### Document Inheritance [_document_inheritance] + +You can use standard Python inheritance to extend models, this can be useful in a few scenarios. For example if you want to have a `BaseDocument` defining some common fields that several different `Document` classes should share: + +```python +class User(InnerDoc): + username = Text(fields={'keyword': Keyword()}) + email = Text() + +class BaseDocument(Document): + created_by = Object(User) + created_date = Date() + last_updated = Date() + + def save(**kwargs): + if not self.created_date: + self.created_date = datetime.now() + self.last_updated = datetime.now() + return super(BaseDocument, self).save(**kwargs) + +class BlogPost(BaseDocument): + class Index: + name = 'blog' +``` + +Another use case would be using the [join type](elasticsearch://reference/elasticsearch/mapping-reference/parent-join.md) to have multiple different entities in a single index. You can see an [example](https://github.com/elastic/elasticsearch-py/blob/master/examples/dsl/parent_child.py) of this approach. Note that in this case, if the subclasses don’t define their own Index classes, the mappings are merged and shared between all the subclasses. + + + +### Index [_index] + +In typical scenario using `class Index` on a `Document` class is sufficient to perform any action. In a few cases though it can be useful to manipulate an `Index` object directly. + +`Index` is a class responsible for holding all the metadata related to an index in elasticsearch - mappings and settings. It is most useful when defining your mappings since it allows for easy creation of multiple mappings at the same time. This is especially useful when setting up your elasticsearch objects in a migration: + +```python +from elasticsearch.dsl import Index, Document, Text, analyzer + +blogs = Index('blogs') + +# define custom settings +blogs.settings( + number_of_shards=1, + number_of_replicas=0 +) + +# define aliases +blogs.aliases( + old_blogs={} +) + +# register a document with the index +blogs.document(Post) + +# can also be used as class decorator when defining the Document +@blogs.document +class Post(Document): + title = Text() + +# You can attach custom analyzers to the index + +html_strip = analyzer('html_strip', + tokenizer="standard", + filter=["standard", "lowercase", "stop", "snowball"], + char_filter=["html_strip"] +) + +blogs.analyzer(html_strip) + +# delete the index, ignore if it doesn't exist +blogs.delete(ignore=404) + +# create the index in elasticsearch +blogs.create() +``` + +You can also set up a template for your indices and use the `clone` method to create specific copies: + +```python +blogs = Index('blogs', using='production') +blogs.settings(number_of_shards=2) +blogs.document(Post) + +# create a copy of the index with different name +company_blogs = blogs.clone('company-blogs') + +# create a different copy on different cluster +dev_blogs = blogs.clone('blogs', using='dev') +# and change its settings +dev_blogs.setting(number_of_shards=1) +``` + +#### IndexTemplate [index-template] + +The DSL module also exposes an option to manage [index templates](docs-content://manage-data/data-store/templates.md) in elasticsearch using the `IndexTemplate` class which has very similar API to `Index`. + +Once an index template is saved in elasticsearch it’s contents will be automatically applied to new indices (existing indices are completely unaffected by templates) that match the template pattern (any index starting with `blogs-` in our example), even if the index is created automatically upon indexing a document into that index. + +Potential workflow for a set of time based indices governed by a single template: + +```python +from datetime import datetime + +from elasticsearch.dsl import Document, Date, Text + + +class Log(Document): + content = Text() + timestamp = Date() + + class Index: + name = "logs-*" + settings = { + "number_of_shards": 2 + } + + def save(self, **kwargs): + # assign now if no timestamp given + if not self.timestamp: + self.timestamp = datetime.now() + + # override the index to go to the proper timeslot + kwargs['index'] = self.timestamp.strftime('logs-%Y%m%d') + return super().save(**kwargs) + +# once, as part of application setup, during deploy/migrations: +logs = Log._index.as_template('logs', order=0) +logs.save() + +# to perform search across all logs: +search = Log.search() +``` + + + + +## Faceted Search [faceted_search] + +The library comes with a simple abstraction aimed at helping you develop faceted navigation for your data. + +::::{note} +This API is experimental and will be subject to change. Any feedback is welcome. + +:::: + + +### Configuration [_configuration_2] + +You can provide several configuration options (as class attributes) when declaring a `FacetedSearch` subclass: + +* `index`: the name of the index (as string) to search through, defaults to `'_all'`. +* `doc_types`: list of `Document` subclasses or strings to be used, defaults to `['_all']`. +* `fields`: list of fields on the document type to search through. The list will be passes to `MultiMatch` query so can contain boost values (`'title^5'`), defaults to `['*']`. +* `facets`: dictionary of facets to display/filter on. The key is the name displayed and values should be instances of any `Facet` subclass, for example: `{'tags': TermsFacet(field='tags')}` + +#### Facets [_facets] + +There are several different facets available: + +* `TermsFacet`: provides an option to split documents into groups based on a value of a field, for example `TermsFacet(field='category')` +* `DateHistogramFacet`: split documents into time intervals, example: `DateHistogramFacet(field="published_date", calendar_interval="day")` +* `HistogramFacet`: similar to `DateHistogramFacet` but for numerical values: `HistogramFacet(field="rating", interval=2)` +* `RangeFacet`: allows you to define your own ranges for a numerical fields: `RangeFacet(field="comment_count", ranges=[("few", (None, 2)), ("lots", (2, None))])` +* `NestedFacet`: is just a simple facet that wraps another to provide access to nested documents: `NestedFacet('variants', TermsFacet(field='variants.color'))` + +By default facet results will only calculate document count, if you wish for a different metric you can pass in any single value metric aggregation as the `metric` kwarg (`TermsFacet(field='tags', metric=A('max', field=timestamp))`). When specifying `metric` the results will be, by default, sorted in descending order by that metric. To change it to ascending specify `metric_sort="asc"` and to just sort by document count use `metric_sort=False`. + + +#### Advanced [_advanced] + +If you require any custom behavior or modifications simply override one or more of the methods responsible for the class' functions: + +* `search(self)`: is responsible for constructing the `Search` object used. Override this if you want to customize the search object (for example by adding a global filter for published articles only). +* `query(self, search)`: adds the query position of the search (if search input specified), by default using `MultiField` query. Override this if you want to modify the query type used. +* `highlight(self, search)`: defines the highlighting on the `Search` object and returns a new one. Default behavior is to highlight on all fields specified for search. + + + +### Usage [_usage] + +The custom subclass can be instantiated empty to provide an empty search (matching everything) or with `query`, `filters` and `sort`. + +* `query`: is used to pass in the text of the query to be performed. If `None` is passed in (default) a `MatchAll` query will be used. For example `'python web'` +* `filters`: is a dictionary containing all the facet filters that you wish to apply. Use the name of the facet (from `.facets` attribute) as the key and one of the possible values as value. For example `{'tags': 'python'}`. +* `sort`: is a tuple or list of fields on which the results should be sorted. The format of the individual fields are to be the same as those passed to `~elasticsearch.dsl.Search.sort`. + +#### Response [_response_2] + +the response returned from the `FacetedSearch` object (by calling `.execute()`) is a subclass of the standard `Response` class that adds a property called `facets` which contains a dictionary with lists of buckets -each represented by a tuple of key, document count and a flag indicating whether this value has been filtered on. + + + +### Example [_example] + +```python +from datetime import date + +from elasticsearch.dsl import FacetedSearch, TermsFacet, DateHistogramFacet + +class BlogSearch(FacetedSearch): + doc_types = [Article, ] + # fields that should be searched + fields = ['tags', 'title', 'body'] + + facets = { + # use bucket aggregations to define facets + 'tags': TermsFacet(field='tags'), + 'publishing_frequency': DateHistogramFacet(field='published_from', interval='month') + } + + def search(self): + # override methods to add custom pieces + s = super().search() + return s.filter('range', publish_from={'lte': 'now/h'}) + +bs = BlogSearch('python web', {'publishing_frequency': date(2015, 6)}) +response = bs.execute() + +# access hits and other attributes as usual +total = response.hits.total +print('total hits', total.relation, total.value) +for hit in response: + print(hit.meta.score, hit.title) + +for (tag, count, selected) in response.facets.tags: + print(tag, ' (SELECTED):' if selected else ':', count) + +for (month, count, selected) in response.facets.publishing_frequency: + print(month.strftime('%B %Y'), ' (SELECTED):' if selected else ':', count) +``` + + + +## Update By Query [update_by_query] + +### The `Update By Query` object [_the_update_by_query_object] + +The `Update By Query` object enables the use of the [_update_by_query](https://www.elastic.co/docs/api/doc/elasticsearch/v8/operation/operation-update-by-query) endpoint to perform an update on documents that match a search query. + +The object is implemented as a modification of the `Search` object, containing a subset of its query methods, as well as a script method, which is used to make updates. + +The `Update By Query` object implements the following `Search` query types: + +* queries +* filters +* excludes + +For more information on queries, see the `search_dsl` chapter. + +Like the `Search` object, the API is designed to be chainable. This means that the `Update By Query` object is immutable: all changes to the object will result in a shallow copy being created which contains the changes. This means you can safely pass the `Update By Query` object to foreign code without fear of it modifying your objects as long as it sticks to the `Update By Query` object APIs. + +You can define your client in a number of ways, but the preferred method is to use a global configuration. For more information on defining a client, see the `configuration` chapter. + +Once your client is defined, you can instantiate a copy of the `Update By Query` object as seen below: + +```python +from elasticsearch.dsl import UpdateByQuery + +ubq = UpdateByQuery().using(client) +# or +ubq = UpdateByQuery(using=client) +``` + +::::{note} +All methods return a *copy* of the object, making it safe to pass to outside code. + +:::: + + +The API is chainable, allowing you to combine multiple method calls in one statement: + +```python +ubq = UpdateByQuery().using(client).query("match", title="python") +``` + +To send the request to Elasticsearch: + +```python +response = ubq.execute() +``` + +It should be noted, that there are limits to the chaining using the script method: calling script multiple times will overwrite the previous value. That is, only a single script can be sent with a call. An attempt to use two scripts will result in only the second script being stored. + +Given the below example: + +```python +ubq = UpdateByQuery() \ + .using(client) \ + .script(source="ctx._source.likes++") \ + .script(source="ctx._source.likes+=2") +``` + +This means that the stored script by this client will be `'source': 'ctx._source.likes{{plus}}=2'` and the previous call will not be stored. + +For debugging purposes you can serialize the `Update By Query` object to a `dict` explicitly: + +```python +print(ubq.to_dict()) +``` + +Also, to use variables in script see below example: + +```python +ubq.script( + source="ctx._source.messages.removeIf(x -> x.somefield == params.some_var)", + params={ + 'some_var': 'some_string_val' + } +) +``` + +#### Serialization and Deserialization [_serialization_and_deserialization_2] + +The search object can be serialized into a dictionary by using the `.to_dict()` method. + +You can also create a `Update By Query` object from a `dict` using the `from_dict` class method. This will create a new `Update By Query` object and populate it using the data from the dict: + +```python +ubq = UpdateByQuery.from_dict({"query": {"match": {"title": "python"}}}) +``` + +If you wish to modify an existing `Update By Query` object, overriding it’s properties, instead use the `update_from_dict` method that alters an instance **in-place**: + +```python +ubq = UpdateByQuery(index='i') +ubq.update_from_dict({"query": {"match": {"title": "python"}}, "size": 42}) +``` + + +#### Extra properties and parameters [_extra_properties_and_parameters_2] + +To set extra properties of the search request, use the `.extra()` method. This can be used to define keys in the body that cannot be defined via a specific API method like `explain`: + +```python +ubq = ubq.extra(explain=True) +``` + +To set query parameters, use the `.params()` method: + +```python +ubq = ubq.params(routing="42") +``` + + + +### Response [_response_3] + +You can execute your search by calling the `.execute()` method that will return a `Response` object. The `Response` object allows you access to any key from the response dictionary via attribute access. It also provides some convenient helpers: + +```python +response = ubq.execute() + +print(response.success()) +# True + +print(response.took) +# 12 +``` + +If you want to inspect the contents of the `response` objects, just use its `to_dict` method to get access to the raw data for pretty printing. + + + +## Using asyncio with Elasticsearch Python DSL [asyncio] + +The DSL module supports async/await with [asyncio](https://docs.python.org/3/library/asyncio.html). To ensure that you have all the required dependencies, install the `[async]` extra: + +```bash +$ python -m pip install "elasticsearch[async]" +``` + +### Connections [_connections] + +Use the `async_connections` module to manage your asynchronous connections. + +```python +from elasticsearch.dsl import async_connections + +async_connections.create_connection(hosts=['localhost'], timeout=20) +``` + +All the options available in the `connections` module can be used with `async_connections`. + +#### How to avoid *Unclosed client session / connector* warnings on exit [_how_to_avoid_unclosed_client_session_connector_warnings_on_exit] + +These warnings come from the `aiohttp` package, which is used internally by the `AsyncElasticsearch` client. They appear often when the application exits and are caused by HTTP connections that are open when they are garbage collected. To avoid these warnings, make sure that you close your connections. + +```python +es = async_connections.get_connection() +await es.close() +``` + + + +### Search DSL [_search_dsl] + +Use the `AsyncSearch` class to perform asynchronous searches. + +```python +from elasticsearch.dsl import AsyncSearch + +s = AsyncSearch().query("match", title="python") +async for hit in s: + print(hit.title) +``` + +Instead of using the `AsyncSearch` object as an asynchronous iterator, you can explicitly call the `execute()` method to get a `Response` object. + +```python +s = AsyncSearch().query("match", title="python") +response = await s.execute() +for hit in response: + print(hit.title) +``` + +An `AsyncMultiSearch` is available as well. + +```python +from elasticsearch.dsl import AsyncMultiSearch + +ms = AsyncMultiSearch(index='blogs') + +ms = ms.add(AsyncSearch().filter('term', tags='python')) +ms = ms.add(AsyncSearch().filter('term', tags='elasticsearch')) + +responses = await ms.execute() + +for response in responses: + print("Results for query %r." % response.search.query) + for hit in response: + print(hit.title) +``` + + +### Asynchronous Documents, Indexes, and more [_asynchronous_documents_indexes_and_more] + +The `Document`, `Index`, `IndexTemplate`, `Mapping`, `UpdateByQuery` and `FacetedSearch` classes all have asynchronous versions that use the same name with an `Async` prefix. These classes expose the same interfaces as the synchronous versions, but any methods that perform I/O are defined as coroutines. + +Auxiliary classes that do not perform I/O do not have asynchronous versions. The same classes can be used in synchronous and asynchronous applications. + +When using a custom analyzer in an asynchronous application, use the `async_simulate()` method to invoke the Analyze API on it. + +Consult the `api` section for details about each specific method. + + + diff --git a/docs/guide/dsl/tutorials.asciidoc b/docs/reference/_tutorials.md similarity index 76% rename from docs/guide/dsl/tutorials.asciidoc rename to docs/reference/_tutorials.md index 1b5ff0e2c..7b72bc04e 100644 --- a/docs/guide/dsl/tutorials.asciidoc +++ b/docs/reference/_tutorials.md @@ -1,11 +1,15 @@ -=== Tutorials +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/_tutorials.html +--- -==== Search +# Tutorials [_tutorials] -Let's have a typical search request written directly as a `dict`: +## Search [_search] -[source,python] ----- +Let’s have a typical search request written directly as a `dict`: + +```python from elasticsearch import Elasticsearch client = Elasticsearch("https://localhost:9200") @@ -35,16 +39,13 @@ for hit in response['hits']['hits']: for tag in response['aggregations']['per_tag']['buckets']: print(tag['key'], tag['max_lines']['value']) ----- +``` -The problem with this approach is that it is very verbose, prone to -syntax mistakes like incorrect nesting, hard to modify (eg. adding -another filter) and definitely not fun to write. +The problem with this approach is that it is very verbose, prone to syntax mistakes like incorrect nesting, hard to modify (eg. adding another filter) and definitely not fun to write. -Let's rewrite the example using the DSL module: +Let’s rewrite the example using the DSL module: -[source,python] ----- +```python from elasticsearch import Elasticsearch from elasticsearch.dsl import Search @@ -65,7 +66,7 @@ for hit in response: for tag in response.aggregations.per_tag.buckets: print(tag.key, tag.max_lines.value) ----- +``` As you see, the library took care of: @@ -75,13 +76,12 @@ As you see, the library took care of: * providing a convenient access to response data * no curly or square brackets everywhere -==== Persistence -Let's have a simple Python class representing an article in a blogging -system: +## Persistence [_persistence] + +Let’s have a simple Python class representing an article in a blogging system: -[source,python] ----- +```python from datetime import datetime from elasticsearch.dsl import Document, Date, Integer, Keyword, Text, connections @@ -122,7 +122,7 @@ print(article.is_published()) # Display cluster health print(connections.get_connection().cluster.health()) ----- +``` In this example you can see: @@ -130,25 +130,24 @@ In this example you can see: * defining fields with mapping configuration * setting index name * defining custom methods -* overriding the built-in `.save()` method to hook into the persistence -life cycle +* overriding the built-in `.save()` method to hook into the persistence life cycle * retrieving and saving the object into Elasticsearch * accessing the underlying client for other APIs You can see more in the `persistence` chapter. -==== Pre-built Faceted Search -If you have your `Document`s defined you can very easily create a -faceted search class to simplify searching and filtering. +## Pre-built Faceted Search [_pre_built_faceted_search] + +If you have your `Document`s defined you can very easily create a faceted search class to simplify searching and filtering. -[NOTE] -==== +::::{note} This feature is experimental and may be subject to change. -==== -[source,python] ----- +:::: + + +```python from elasticsearch.dsl import FacetedSearch, TermsFacet, DateHistogramFacet class BlogSearch(FacetedSearch): @@ -174,20 +173,16 @@ for (tag, count, selected) in response.facets.tags: for (month, count, selected) in response.facets.publishing_frequency: print(month.strftime('%B %Y'), ' (SELECTED):' if selected else ':', count) ----- +``` You can find more details in the `faceted_search` chapter. -==== Update By Query -Let's resume the simple example of articles on a blog, and let's assume -that each article has a number of likes. For this example, imagine we -want to increment the number of likes by 1 for all articles that match a -certain tag and do not match a certain description. Writing this as a -`dict`, we would have the following code: +## Update By Query [_update_by_query] -[source,python] ----- +Let’s resume the simple example of articles on a blog, and let’s assume that each article has a number of likes. For this example, imagine we want to increment the number of likes by 1 for all articles that match a certain tag and do not match a certain description. Writing this as a `dict`, we would have the following code: + +```python from elasticsearch import Elasticsearch client = Elasticsearch() @@ -206,12 +201,11 @@ response = client.update_by_query( } }, ) ----- +``` Using the DSL, we can now express this query as such: -[source,python] ----- +```python from elasticsearch import Elasticsearch from elasticsearch.dsl import Search, UpdateByQuery @@ -222,22 +216,16 @@ ubq = UpdateByQuery(using=client, index="my-index") \ .script(source="ctx._source.likes++", lang="painless") response = ubq.execute() ----- +``` + +As you can see, the `Update By Query` object provides many of the savings offered by the `Search` object, and additionally allows one to update the results of the search based on a script assigned in the same manner. -As you can see, the `Update By Query` object provides many of the -savings offered by the `Search` object, and additionally allows one to -update the results of the search based on a script assigned in the same -manner. -==== Migration from the standard client +## Migration from the standard client [_migration_from_the_standard_client] -You don't have to port your entire application to get the benefits of -the DSL module, you can start gradually by creating a `Search` object -from your existing `dict`, modifying it using the API and serializing it -back to a `dict`: +You don’t have to port your entire application to get the benefits of the DSL module, you can start gradually by creating a `Search` object from your existing `dict`, modifying it using the API and serializing it back to a `dict`: -[source,python] ----- +```python body = {...} # insert complicated query here # Convert to Search object @@ -248,4 +236,6 @@ s.filter("term", tags="python") # Convert back to dict to plug back into existing code body = s.to_dict() ----- +``` + + diff --git a/docs/reference/async.md b/docs/reference/async.md new file mode 100644 index 000000000..f205d3807 --- /dev/null +++ b/docs/reference/async.md @@ -0,0 +1,108 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/async.html +--- + +# Using with asyncio [async] + +The `elasticsearch` package supports async/await with [asyncio](https://docs.python.org/3/library/asyncio.html) and [aiohttp](https://docs.aiohttp.org). You can either install `aiohttp` directly or use the `[async]` extra: + +```bash +$ python -m pip install elasticsearch aiohttp + +# - OR - + +$ python -m pip install elasticsearch[async] +``` + + +## Getting Started with Async [_getting_started_with_async] + +After installation all async API endpoints are available via `~elasticsearch.AsyncElasticsearch` and are used in the same way as other APIs, with an extra `await`: + +```python +import asyncio +from elasticsearch import AsyncElasticsearch + +client = AsyncElasticsearch() + +async def main(): + resp = await client.search( + index="documents", + body={"query": {"match_all": {}}}, + size=20, + ) + print(resp) + +loop = asyncio.get_event_loop() +loop.run_until_complete(main()) +``` + +All APIs that are available under the sync client are also available under the async client. + +[Reference documentation](https://elasticsearch-py.readthedocs.io/en/latest/async.html#api-reference) + + +## ASGI Applications and Elastic APM [_asgi_applications_and_elastic_apm] + +[ASGI](https://asgi.readthedocs.io) (Asynchronous Server Gateway Interface) is a way to serve Python web applications making use of async I/O to achieve better performance. Some examples of ASGI frameworks include FastAPI, Django 3.0+, and Starlette. If you’re using one of these frameworks along with Elasticsearch then you should be using `~elasticsearch.AsyncElasticsearch` to avoid blocking the event loop with synchronous network calls for optimal performance. + +[Elastic APM](apm-agent-python://reference/index.md) also supports tracing of async Elasticsearch queries just the same as synchronous queries. For an example on how to configure `AsyncElasticsearch` with a popular ASGI framework [FastAPI](https://fastapi.tiangolo.com/) and APM tracing there is a [pre-built example](https://github.com/elastic/elasticsearch-py/tree/master/examples/fastapi-apm) in the `examples/fastapi-apm` directory. + +See also the [Using OpenTelemetry](/reference/opentelemetry.md) page. + + +## Frequently Asked Questions [_frequently_asked_questions] + + +### ValueError when initializing `AsyncElasticsearch`? [_valueerror_when_initializing_asyncelasticsearch] + +If when trying to use `AsyncElasticsearch` you receive `ValueError: You must have 'aiohttp' installed to use AiohttpHttpNode` you should ensure that you have `aiohttp` installed in your environment (check with `$ python -m pip freeze | grep aiohttp`). Otherwise, async support won’t be available. + + +### What about the `elasticsearch-async` package? [_what_about_the_elasticsearch_async_package] + +Previously asyncio was supported separately via the [elasticsearch-async](https://github.com/elastic/elasticsearch-py-async) package. The `elasticsearch-async` package has been deprecated in favor of `AsyncElasticsearch` provided by the `elasticsearch` package in v7.8 and onwards. + + +### Receiving *Unclosed client session / connector* warning? [_receiving_unclosed_client_session_connector_warning] + +This warning is created by `aiohttp` when an open HTTP connection is garbage collected. You’ll typically run into this when closing your application. To resolve the issue ensure that `~elasticsearch.AsyncElasticsearch.close` is called before the `~elasticsearch.AsyncElasticsearch` instance is garbage collected. + +For example if using FastAPI that might look like this: + +```python +import os +from contextlib import asynccontextmanager + +from fastapi import FastAPI +from elasticsearch import AsyncElasticsearch + +ELASTICSEARCH_URL = os.environ["ELASTICSEARCH_URL"] +client = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + global client + client = AsyncElasticsearch(ELASTICSEARCH_URL) + yield + await client.close() + +app = FastAPI(lifespan=lifespan) + +@app.get("/") +async def main(): + return await client.info() +``` + +You can run this example by saving it to `main.py` and executing `ELASTICSEARCH_URL=http://localhost:9200 uvicorn main:app`. + + +## Async Helpers [_async_helpers] + +Async variants of all helpers are available in `elasticsearch.helpers` and are all prefixed with `async_*`. You’ll notice that these APIs are identical to the ones in the sync [*Client helpers*](/reference/client-helpers.md) documentation. + +All async helpers that accept an iterator or generator also accept async iterators and async generators. + +[Reference documentation](https://elasticsearch-py.readthedocs.io/en/latest/async.html#api-reference) + diff --git a/docs/reference/client-helpers.md b/docs/reference/client-helpers.md new file mode 100644 index 000000000..02ab4e026 --- /dev/null +++ b/docs/reference/client-helpers.md @@ -0,0 +1,72 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/client-helpers.html +--- + +# Client helpers [client-helpers] + +You can find here a collection of simple helper functions that abstract some specifics of the raw API. For detailed examples, refer to [this page](https://elasticsearch-py.readthedocs.io/en/stable/helpers.md). + + +## Bulk helpers [bulk-helpers] + +There are several helpers for the bulk API since its requirement for specific formatting and other considerations can make it cumbersome if used directly. + +All bulk helpers accept an instance of `{{es}}` class and an iterable `action` (any iterable, can also be a generator, which is ideal in most cases since it allows you to index large datasets without the need of loading them into memory). + +The items in the iterable `action` should be the documents we wish to index in several formats. The most common one is the same as returned by `search()`, for example: + +```yaml +{ + '_index': 'index-name', + '_id': 42, + '_routing': 5, + 'pipeline': 'my-ingest-pipeline', + '_source': { + "title": "Hello World!", + "body": "..." + } +} +``` + +Alternatively, if `_source` is not present, it pops all metadata fields from the doc and use the rest as the document data: + +```yaml +{ + "_id": 42, + "_routing": 5, + "title": "Hello World!", + "body": "..." +} +``` + +The `bulk()` api accepts `index`, `create`, `delete`, and `update` actions. Use the `_op_type` field to specify an action (`_op_type` defaults to `index`): + +```yaml +{ + '_op_type': 'delete', + '_index': 'index-name', + '_id': 42, +} +{ + '_op_type': 'update', + '_index': 'index-name', + '_id': 42, + 'doc': {'question': 'The life, universe and everything.'} +} +``` + + +## Scan [scan] + +Simple abstraction on top of the `scroll()` API - a simple iterator that yields all hits as returned by underlining scroll requests. + +By default scan does not return results in any pre-determined order. To have a standard order in the returned documents (either by score or explicit sort definition) when scrolling, use `preserve_order=True`. This may be an expensive operation and will negate the performance benefits of using `scan`. + +```py +scan(es, + query={"query": {"match": {"title": "python"}}}, + index="orders-*" +) +``` + diff --git a/docs/guide/configuration.asciidoc b/docs/reference/configuration.md similarity index 69% rename from docs/guide/configuration.asciidoc rename to docs/reference/configuration.md index 15c3f413c..7d500a970 100644 --- a/docs/guide/configuration.asciidoc +++ b/docs/reference/configuration.md @@ -1,92 +1,86 @@ -[[config]] -== Configuration +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/config.html +--- -This page contains information about the most important configuration options of -the Python {es} client. +# Configuration [config] +This page contains information about the most important configuration options of the Python {{es}} client. -[discrete] -[[tls-and-ssl]] -=== TLS/SSL + +## TLS/SSL [tls-and-ssl] The options in this section can only be used when the node is configured for HTTPS. An error will be raised if using these options with an HTTP node. -[discrete] -==== Verifying server certificates -The typical route to verify a cluster certificate is via a "CA bundle" which can be specified via the `ca_certs` parameter. If no options are given and the https://github.com/certifi/python-certifi[certifi package] is installed then certifi's CA bundle is used by default. +### Verifying server certificates [_verifying_server_certificates] + +The typical route to verify a cluster certificate is via a "CA bundle" which can be specified via the `ca_certs` parameter. If no options are given and the [certifi package](https://github.com/certifi/python-certifi) is installed then certifi’s CA bundle is used by default. If you have your own CA bundle to use you can configure via the `ca_certs` parameter: -[source,python] ------------------------------------- +```python client = Elasticsearch( "https://...", ca_certs="/path/to/certs.pem" ) ------------------------------------- +``` -If using a generated certificate or certificate with a known fingerprint you can use the `ssl_assert_fingerprint` to specify the fingerprint which tries to match the server's leaf certificate during the TLS handshake. If there is any matching certificate the connection is verified, otherwise a `TlsError` is raised. +If using a generated certificate or certificate with a known fingerprint you can use the `ssl_assert_fingerprint` to specify the fingerprint which tries to match the server’s leaf certificate during the TLS handshake. If there is any matching certificate the connection is verified, otherwise a `TlsError` is raised. In Python 3.9 and earlier only the leaf certificate will be verified but in Python 3.10+ private APIs are used to verify any certificate in the certificate chain. This helps when using certificates that are generated on a multi-node cluster. -[source,python] ------------------------------------- +```python client = Elasticsearch( "https://...", ssl_assert_fingerprint=( "315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3" ) ) ------------------------------------- +``` To disable certificate verification use the `verify_certs=False` parameter. This option should be avoided in production, instead use the other options to verify the clusters' certificate. -[source,python] ------------------------------------- +```python client = Elasticsearch( "https://...", verify_certs=False ) ------------------------------------- +``` + -[discrete] -==== TLS versions +### TLS versions [_tls_versions] Configuring the minimum TLS version to connect to is done via the `ssl_version` parameter. By default this is set to a minimum value of TLSv1.2. Use the `ssl.TLSVersion` enumeration to specify versions. -[source,python] ------------------------------------- +```python import ssl client = Elasticsearch( ..., ssl_version=ssl.TLSVersion.TLSv1_2 ) ------------------------------------- +``` -[discrete] -==== Client TLS certificate authentication + +### Client TLS certificate authentication [_client_tls_certificate_authentication] Elasticsearch can be configured to authenticate clients via TLS client certificates. Client certificate and keys can be configured via the `client_cert` and `client_key` parameters: -[source,python] ------------------------------------- +```python client = Elasticsearch( ..., client_cert="/path/to/cert.pem", client_key="/path/to/key.pem", ) ------------------------------------- +``` -[discrete] -==== Using an SSLContext +### Using an SSLContext [_using_an_sslcontext] -For advanced users an `ssl.SSLContext` object can be used for configuring TLS via the `ssl_context` parameter. The `ssl_context` parameter can't be combined with any other TLS options except for the `ssl_assert_fingerprint` parameter. +For advanced users an `ssl.SSLContext` object can be used for configuring TLS via the `ssl_context` parameter. The `ssl_context` parameter can’t be combined with any other TLS options except for the `ssl_assert_fingerprint` parameter. -[source,python] ------------------------------------- +```python import ssl # Create and configure an SSLContext @@ -97,39 +91,30 @@ client = Elasticsearch( ..., ssl_context=ctx ) ------------------------------------- +``` -[discrete] -[[compression]] -=== HTTP compression +## HTTP compression [compression] -Compression of HTTP request and response bodies can be enabled with the `http_compress` parameter. -If enabled then HTTP request bodies will be compressed with `gzip` and HTTP responses will include -the `Accept-Encoding: gzip` HTTP header. By default compression is disabled. +Compression of HTTP request and response bodies can be enabled with the `http_compress` parameter. If enabled then HTTP request bodies will be compressed with `gzip` and HTTP responses will include the `Accept-Encoding: gzip` HTTP header. By default compression is disabled. -[source,python] ------------------------------------- +```python client = Elasticsearch( ..., http_compress=True # Enable compression! ) ------------------------------------- +``` -HTTP compression is recommended to be enabled when requests are traversing the network. -Compression is automatically enabled when connecting to Elastic Cloud. +HTTP compression is recommended to be enabled when requests are traversing the network. Compression is automatically enabled when connecting to Elastic Cloud. -[discrete] -[[timeouts]] -=== Request timeouts +## Request timeouts [timeouts] Requests can be configured to timeout if taking too long to be serviced. The `request_timeout` parameter can be passed via the client constructor or the client `.options()` method. When the request times out the node will raise a `ConnectionTimeout` exception which can trigger retries. Setting `request_timeout` to `None` will disable timeouts. -[source,python] ------------------------------------- +```python client = Elasticsearch( ..., request_timeout=10 # 10 second timeout @@ -137,17 +122,16 @@ client = Elasticsearch( # Search request will timeout in 5 seconds client.options(request_timeout=5).search(...) ------------------------------------- +``` + -[discrete] -==== API and server timeouts +### API and server timeouts [_api_and_server_timeouts] There are API-level timeouts to take into consideration when making requests which can cause the request to timeout on server-side rather than client-side. You may need to configure both a transport and API level timeout for long running operations. In the example below there are three different configurable timeouts for the `cluster.health` API all with different meanings for the request: -[source,python] ------------------------------------- +```python client.options( # Amount of time to wait for an HTTP response to start. request_timeout=30 @@ -157,19 +141,16 @@ client.options( # Amount of time to wait for info from the master node. master_timeout=10, ) ------------------------------------- +``` -[discrete] -[[retries]] -=== Retries +## Retries [retries] -Requests can be retried if they don't return with a successful response. This provides a way for requests to be resilient against transient failures or overloaded nodes. +Requests can be retried if they don’t return with a successful response. This provides a way for requests to be resilient against transient failures or overloaded nodes. The maximum number of retries per request can be configured via the `max_retries` parameter. Setting this parameter to 0 disables retries. This parameter can be set in the client constructor or per-request via the client `.options()` method: -[source,python] ------------------------------------- +```python client = Elasticsearch( ..., max_retries=5 @@ -182,29 +163,27 @@ client.options(max_retries=0).index( "title": "..." } ) ------------------------------------- +``` -[discrete] -==== Retrying on connection errors and timeouts + +### Retrying on connection errors and timeouts [_retrying_on_connection_errors_and_timeouts] Connection errors are automatically retried if retries are enabled. Retrying requests on connection timeouts can be enabled or disabled via the `retry_on_timeout` parameter. This parameter can be set on the client constructor or via the client `.options()` method: -[source,python] ------------------------------------- +```python client = Elasticsearch( ..., retry_on_timeout=True ) client.options(retry_on_timeout=False).info() ------------------------------------- +``` + -[discrete] -==== Retrying status codes +### Retrying status codes [_retrying_status_codes] By default if retries are enabled `retry_on_status` is set to `(429, 502, 503, 504)`. This parameter can be set on the client constructor or via the client `.options()` method. Setting this value to `()` will disable the default behavior. -[source,python] ------------------------------------- +```python client = Elasticsearch( ..., retry_on_status=() @@ -217,17 +196,16 @@ client.options(retry_on_status=[500]).index( "title": "..." } ) ------------------------------------- +``` -[discrete] -==== Ignoring status codes -By default an `ApiError` exception will be raised for any non-2XX HTTP requests that exhaust retries, if any. If you're expecting an HTTP error from the API but aren't interested in raising an exception you can use the `ignore_status` parameter via the client `.options()` method. +### Ignoring status codes [_ignoring_status_codes] + +By default an `ApiError` exception will be raised for any non-2XX HTTP requests that exhaust retries, if any. If you’re expecting an HTTP error from the API but aren’t interested in raising an exception you can use the `ignore_status` parameter via the client `.options()` method. A good example where this is useful is setting up or cleaning up resources in a cluster in a robust way: -[source,python] ------------------------------------- +```python client = Elasticsearch(...) # API request is robust against the index not existing: @@ -242,33 +220,33 @@ resp = client.options(ignore_status=[400]).indices.create( } ) resp.meta.status # Can be either '2XX' or '400' ------------------------------------- +``` When using the `ignore_status` parameter the error response will be returned serialized just like a non-error response. In these cases it can be useful to inspect the HTTP status of the response. To do this you can inspect the `resp.meta.status`. -[discrete] -[[sniffing]] -=== Sniffing for new nodes + +## Sniffing for new nodes [sniffing] Additional nodes can be discovered by a process called "sniffing" where the client will query the cluster for more nodes that can handle requests. Sniffing can happen at three different times: on client instantiation, before requests, and on a node failure. These three behaviors can be enabled and disabled with the `sniff_on_start`, `sniff_before_requests`, and `sniff_on_node_failure` parameters. -IMPORTANT: When using an HTTP load balancer or proxy you cannot use sniffing functionality as the cluster would supply the client with IP addresses to directly connect to the cluster, circumventing the load balancer. Depending on your configuration this might be something you don't want or break completely. +::::{important} +When using an HTTP load balancer or proxy you cannot use sniffing functionality as the cluster would supply the client with IP addresses to directly connect to the cluster, circumventing the load balancer. Depending on your configuration this might be something you don’t want or break completely. +:::: -[discrete] -==== Waiting between sniffing attempts + + +### Waiting between sniffing attempts [_waiting_between_sniffing_attempts] To avoid needlessly sniffing too often there is a delay between attempts to discover new nodes. This value can be controlled via the `min_delay_between_sniffing` parameter. -[discrete] -==== Filtering nodes which are sniffed -By default nodes which are marked with only a `master` role will not be used. To change the behavior the parameter `sniffed_node_callback` can be used. To mark a sniffed node not to be added to the node pool -return `None` from the `sniffed_node_callback`, otherwise return a `NodeConfig` instance. +### Filtering nodes which are sniffed [_filtering_nodes_which_are_sniffed] + +By default nodes which are marked with only a `master` role will not be used. To change the behavior the parameter `sniffed_node_callback` can be used. To mark a sniffed node not to be added to the node pool return `None` from the `sniffed_node_callback`, otherwise return a `NodeConfig` instance. -[source,python] ------------------------------------- +```python from typing import Optional, Dict, Any from elastic_transport import NodeConfig from elasticsearch import Elasticsearch @@ -287,13 +265,11 @@ client = Elasticsearch( "https://localhost:9200", sniffed_node_callback=filter_master_eligible_nodes ) ------------------------------------- +``` -The `node_info` parameter is part of the response from the `nodes.info()` API, below is an example -of what that object looks like: +The `node_info` parameter is part of the response from the `nodes.info()` API, below is an example of what that object looks like: -[source,json] ------------------------------------- +```json { "name": "SRZpKFZ", "transport_address": "127.0.0.1:9300", @@ -308,30 +284,26 @@ of what that object looks like: "max_content_length_in_bytes": 104857600 } } ------------------------------------- +``` -[discrete] -[[node-pool]] -=== Node Pool +## Node Pool [node-pool] -[discrete] -==== Selecting a node from the pool + +### Selecting a node from the pool [_selecting_a_node_from_the_pool] You can specify a node selector pattern via the `node_selector_class` parameter. The supported values are `round_robin` and `random`. Default is `round_robin`. -[source,python] ------------------------------------- +```python client = Elasticsearch( ..., node_selector_class="round_robin" ) ------------------------------------- +``` Custom selectors are also supported: -[source,python] ------------------------------------- +```python from elastic_transport import NodeSelector class CustomSelector(NodeSelector): @@ -341,12 +313,12 @@ client = Elasticsearch( ..., node_selector_class=CustomSelector ) ------------------------------------- +``` + -[discrete] -==== Marking nodes dead and alive +### Marking nodes dead and alive [_marking_nodes_dead_and_alive] -Individual nodes of Elasticsearch may have transient connectivity or load issues which may make them unable to service requests. To combat this the pool of nodes will detect when a node isn't able to service requests due to transport or API errors. +Individual nodes of Elasticsearch may have transient connectivity or load issues which may make them unable to service requests. To combat this the pool of nodes will detect when a node isn’t able to service requests due to transport or API errors. After a node has been timed out it will be moved back to the set of "alive" nodes but only after the node returns a successful response will the node be marked as "alive" in terms of consecutive errors. @@ -355,16 +327,13 @@ The `dead_node_backoff_factor` and `max_dead_node_backoff` parameters can be use The calculation is equal to `min(dead_node_backoff_factor * (2 ** (consecutive_failures - 1)), max_dead_node_backoff)`. -[discrete] -[[serializer]] -=== Serializers +## Serializers [serializer] Serializers transform bytes on the wire into native Python objects and vice-versa. By default the client ships with serializers for `application/json`, `application/x-ndjson`, `text/*`, `application/vnd.apache.arrow.stream` and `application/mapbox-vector-tile`. You can define custom serializers via the `serializers` parameter: -[source,python] ------------------------------------- +```python from elasticsearch import Elasticsearch, JsonSerializer class JsonSetSerializer(JsonSerializer): @@ -379,52 +348,47 @@ client = Elasticsearch( # Serializers are a mapping of 'mimetype' to Serializer class. serializers={"application/json": JsonSetSerializer()} ) ------------------------------------- +``` If the `orjson` package is installed, you can use the faster ``OrjsonSerializer`` for the default mimetype (``application/json``): -[source,python] ------------------------------------- +```python from elasticsearch import Elasticsearch, OrjsonSerializer es = Elasticsearch( ..., serializer=OrjsonSerializer() ) ------------------------------------- +``` orjson is particularly fast when serializing vectors as it has native numpy support. This will be the default in a future release. Note that you can install orjson with the `orjson` extra: -[source,sh] --------------------------------------------- +```sh $ python -m pip install elasticsearch[orjson] --------------------------------------------- +``` -[discrete] -[[nodes]] -=== Nodes -[discrete] -==== Node implementations +## Nodes [nodes] + + +### Node implementations [_node_implementations] The default node class for synchronous I/O is `urllib3` and the default node class for asynchronous I/O is `aiohttp`. For all of the built-in HTTP node implementations like `urllib3`, `requests`, and `aiohttp` you can specify with a simple string to the `node_class` parameter: -[source,python] ------------------------------------- +```python from elasticsearch import Elasticsearch client = Elasticsearch( ..., node_class="requests" ) ------------------------------------- +``` You can also specify a custom node implementation via the `node_class` parameter: -[source,python] ------------------------------------- +```python from elasticsearch import Elasticsearch from elastic_transport import Urllib3HttpNode @@ -435,17 +399,17 @@ client = Elasticsearch( ... node_class=CustomHttpNode ) ------------------------------------- +``` -[discrete] -==== HTTP connections per node + +### HTTP connections per node [_http_connections_per_node] Each node contains its own pool of HTTP connections to allow for concurrent requests. This value is configurable via the `connections_per_node` parameter: -[source,python] ------------------------------------- +```python client = Elasticsearch( ..., connections_per_node=5 ) ------------------------------------- +``` + diff --git a/docs/reference/connecting.md b/docs/reference/connecting.md new file mode 100644 index 000000000..aefc50b1f --- /dev/null +++ b/docs/reference/connecting.md @@ -0,0 +1,357 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html +--- + +# Connecting [connecting] + +This page contains the information you need to connect the Client with {{es}}. + + +## Connecting to Elastic Cloud [connect-ec] + +[Elastic Cloud](docs-content://deploy-manage/deploy/elastic-cloud/cloud-hosted.md) is the easiest way to get started with {{es}}. When connecting to Elastic Cloud with the Python {{es}} client you should always use the `cloud_id` parameter to connect. You can find this value within the "Manage Deployment" page after you’ve created a cluster (look in the top-left if you’re in Kibana). + +We recommend using a Cloud ID whenever possible because your client will be automatically configured for optimal use with Elastic Cloud including HTTPS and HTTP compression. + +```python +from elasticsearch import Elasticsearch + +# Password for the 'elastic' user generated by Elasticsearch +ELASTIC_PASSWORD = "" + +# Found in the 'Manage Deployment' page +CLOUD_ID = "deployment-name:dXMtZWFzdDQuZ2Nw..." + +# Create the client instance +client = Elasticsearch( + cloud_id=CLOUD_ID, + basic_auth=("elastic", ELASTIC_PASSWORD) +) + +# Successful response! +client.info() +# {'name': 'instance-0000000000', 'cluster_name': ...} +``` + + +## Connecting to a self-managed cluster [connect-self-managed-new] + +By default {{es}} will start with security features like authentication and TLS enabled. To connect to the {{es}} cluster you’ll need to configure the Python {{es}} client to use HTTPS with the generated CA certificate in order to make requests successfully. + +If you’re just getting started with {{es}} we recommend reading the documentation on [configuring](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md) and [starting {{es}}](docs-content://deploy-manage/maintenance/start-stop-services/start-stop-elasticsearch.md) to ensure your cluster is running as expected. + +When you start {{es}} for the first time you’ll see a distinct block like the one below in the output from {{es}} (you may have to scroll up if it’s been a while): + +```sh +---------------------------------------------------------------- +-> Elasticsearch security features have been automatically configured! +-> Authentication is enabled and cluster connections are encrypted. + +-> Password for the elastic user (reset with `bin/elasticsearch-reset-password -u elastic`): + lhQpLELkjkrawaBoaz0Q + +-> HTTP CA certificate SHA-256 fingerprint: + a52dd93511e8c6045e21f16654b77c9ee0f34aea26d9f40320b531c474676228 +... +---------------------------------------------------------------- +``` + +Note down the `elastic` user password and HTTP CA fingerprint for the next sections. In the examples below they will be stored in the variables `ELASTIC_PASSWORD` and `CERT_FINGERPRINT` respectively. + +Depending on the circumstances there are two options for verifying the HTTPS connection, either verifying with the CA certificate itself or via the HTTP CA certificate fingerprint. + + +### Verifying HTTPS with CA certificates [_verifying_https_with_ca_certificates] + +Using the `ca_certs` option is the default way the Python {{es}} client verifies an HTTPS connection. + +The generated root CA certificate can be found in the `certs` directory in your {{es}} config location (`$ES_CONF_PATH/certs/http_ca.crt`). If you’re running {{es}} in Docker there is [additional documentation for retrieving the CA certificate](docs-content://deploy-manage/deploy/self-managed/install-elasticsearch-with-docker.md). + +Once you have the `http_ca.crt` file somewhere accessible pass the path to the client via `ca_certs`: + +```python +from elasticsearch import Elasticsearch + +# Password for the 'elastic' user generated by Elasticsearch +ELASTIC_PASSWORD = "" + +# Create the client instance +client = Elasticsearch( + "https://localhost:9200", + ca_certs="/path/to/http_ca.crt", + basic_auth=("elastic", ELASTIC_PASSWORD) +) + +# Successful response! +client.info() +# {'name': 'instance-0000000000', 'cluster_name': ...} +``` + +::::{note} +If you don’t specify `ca_certs` or `ssl_assert_fingerprint` then the [certifi package](https://certifiio.readthedocs.io) will be used for `ca_certs` by default if available. +:::: + + + +### Verifying HTTPS with certificate fingerprints (Python 3.10 or later) [_verifying_https_with_certificate_fingerprints_python_3_10_or_later] + +::::{note} +Using this method **requires using Python 3.10 or later** and isn’t available when using the `aiohttp` HTTP client library so can’t be used with `AsyncElasticsearch`. +:::: + + +This method of verifying the HTTPS connection takes advantage of the certificate fingerprint value noted down earlier. Take this SHA256 fingerprint value and pass it to the Python {{es}} client via `ssl_assert_fingerprint`: + +```python +from elasticsearch import Elasticsearch + +# Fingerprint either from Elasticsearch startup or above script. +# Colons and uppercase/lowercase don't matter when using +# the 'ssl_assert_fingerprint' parameter +CERT_FINGERPRINT = "A5:2D:D9:35:11:E8:C6:04:5E:21:F1:66:54:B7:7C:9E:E0:F3:4A:EA:26:D9:F4:03:20:B5:31:C4:74:67:62:28" + +# Password for the 'elastic' user generated by Elasticsearch +ELASTIC_PASSWORD = "" + +client = Elasticsearch( + "https://localhost:9200", + ssl_assert_fingerprint=CERT_FINGERPRINT, + basic_auth=("elastic", ELASTIC_PASSWORD) +) + +# Successful response! +client.info() +# {'name': 'instance-0000000000', 'cluster_name': ...} +``` + +The certificate fingerprint can be calculated using `openssl x509` with the certificate file: + +```sh +openssl x509 -fingerprint -sha256 -noout -in /path/to/http_ca.crt +``` + +If you don’t have access to the generated CA file from {{es}} you can use the following script to output the root CA fingerprint of the {{es}} instance with `openssl s_client`: + +```sh +# Replace the values of 'localhost' and '9200' to the +# corresponding host and port values for the cluster. +openssl s_client -connect localhost:9200 -servername localhost -showcerts /dev/null \ + | openssl x509 -fingerprint -sha256 -noout -in /dev/stdin +``` + +The output of `openssl x509` will look something like this: + +```sh +SHA256 Fingerprint=A5:2D:D9:35:11:E8:C6:04:5E:21:F1:66:54:B7:7C:9E:E0:F3:4A:EA:26:D9:F4:03:20:B5:31:C4:74:67:62:28 +``` + + +## Connecting without security enabled [connect-no-security] + +::::{warning} +Running {{es}} without security enabled is not recommended. +:::: + + +If your cluster is configured with [security explicitly disabled](elasticsearch://reference/elasticsearch/configuration-reference/security-settings.md) then you can connect via HTTP: + +```python +from elasticsearch import Elasticsearch + +# Create the client instance +client = Elasticsearch("http://localhost:9200") + +# Successful response! +client.info() +# {'name': 'instance-0000000000', 'cluster_name': ...} +``` + + +## Connecting to multiple nodes [connect-url] + +The Python {{es}} client supports sending API requests to multiple nodes in the cluster. This means that work will be more evenly spread across the cluster instead of hammering the same node over and over with requests. To configure the client with multiple nodes you can pass a list of URLs, each URL will be used as a separate node in the pool. + +```python +from elasticsearch import Elasticsearch + +# List of nodes to connect use with different hosts and ports. +NODES = [ + "https://localhost:9200", + "https://localhost:9201", + "https://localhost:9202", +] + +# Password for the 'elastic' user generated by Elasticsearch +ELASTIC_PASSWORD = "" + +client = Elasticsearch( + NODES, + ca_certs="/path/to/http_ca.crt", + basic_auth=("elastic", ELASTIC_PASSWORD) +) +``` + +By default nodes are selected using round-robin, but alternate node selection strategies can be configured with `node_selector_class` parameter. + +::::{note} +If your {{es}} cluster is behind a load balancer like when using Elastic Cloud you won’t need to configure multiple nodes. Instead use the load balancer host and port. +:::: + + + +## Authentication [authentication] + +This section contains code snippets to show you how to connect to various {{es}} providers. All authentication methods are supported on the client constructor or via the per-request `.options()` method: + +```python +from elasticsearch import Elasticsearch + +# Authenticate from the constructor +client = Elasticsearch( + "https://localhost:9200", + ca_certs="/path/to/http_ca.crt", + basic_auth=("username", "password") +) + +# Authenticate via the .options() method: +client.options( + basic_auth=("username", "password") +).indices.get(index="*") + +# You can persist the authenticated client to use +# later or use for multiple API calls: +auth_client = client.options(api_key="api_key") +for i in range(10): + auth_client.index( + index="example-index", + document={"field": i} + ) +``` + + +### HTTP Basic authentication (Username and Password) [auth-basic] + +HTTP Basic authentication uses the `basic_auth` parameter by passing in a username and password within a tuple: + +```python +from elasticsearch import Elasticsearch + +# Adds the HTTP header 'Authorization: Basic ' +client = Elasticsearch( + "https://localhost:9200", + ca_certs="/path/to/http_ca.crt", + basic_auth=("username", "password") +) +``` + + +### HTTP Bearer authentication [auth-bearer] + +HTTP Bearer authentication uses the `bearer_auth` parameter by passing the token as a string. This authentication method is used by [Service Account Tokens](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-create-service-token) and [Bearer Tokens](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-get-token). + +```python +from elasticsearch import Elasticsearch + +# Adds the HTTP header 'Authorization: Bearer token-value' +client = Elasticsearch( + "https://localhost:9200", + bearer_auth="token-value" +) +``` + + +### API Key authentication [auth-apikey] + +You can configure the client to use {{es}}'s API Key for connecting to your cluster. These can be generated through the [Elasticsearch Create API key API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-create-api-key) or [Kibana Stack Management](docs-content://deploy-manage/api-keys/elasticsearch-api-keys.md#create-api-key). + +```python +from elasticsearch import Elasticsearch + +# Adds the HTTP header 'Authorization: ApiKey ' +client = Elasticsearch( + "https://localhost:9200", + ca_certs="/path/to/http_ca.crt", + api_key="api_key", +) +``` + + +## Enabling the Compatibility Mode [compatibility-mode] + +The {{es}} server version 8.0 is introducing a new compatibility mode that allows you a smoother upgrade experience from 7 to 8. In a nutshell, you can use the latest 7.x Python {{es}} {{es}} client with an 8.x {{es}} server, giving more room to coordinate the upgrade of your codebase to the next major version. + +If you want to leverage this functionality, please make sure that you are using the latest 7.x Python {{es}} client and set the environment variable `ELASTIC_CLIENT_APIVERSIONING` to `true`. The client is handling the rest internally. For every 8.0 and beyond Python {{es}} client, you’re all set! The compatibility mode is enabled by default. + + +## Using the Client in a Function-as-a-Service Environment [connecting-faas] + +This section illustrates the best practices for leveraging the {{es}} client in a Function-as-a-Service (FaaS) environment. + +The most influential optimization is to initialize the client outside of the function, the global scope. + +This practice does not only improve performance but also enables background functionality as – for example – [sniffing](https://www.elastic.co/blog/elasticsearch-sniffing-best-practices-what-when-why-how). The following examples provide a skeleton for the best practices. + +::::{important} +The async client shouldn’t be used within Function-as-a-Service as a new event loop must be started for each invocation. Instead the synchronous `Elasticsearch` client is recommended. +:::: + + + +### GCP Cloud Functions [connecting-faas-gcp] + +```python +from elasticsearch import Elasticsearch + +# Client initialization +client = Elasticsearch( + cloud_id="deployment-name:ABCD...", + api_key=... +) + +def main(request): + # Use the client + client.search(index=..., query={"match_all": {}}) +``` + + +### AWS Lambda [connecting-faas-aws] + +```python +from elasticsearch import Elasticsearch + +# Client initialization +client = Elasticsearch( + cloud_id="deployment-name:ABCD...", + api_key=... +) + +def main(event, context): + # Use the client + client.search(index=..., query={"match_all": {}}) +``` + + +### Azure Functions [connecting-faas-azure] + +```python +import azure.functions as func +from elasticsearch import Elasticsearch + +# Client initialization +client = Elasticsearch( + cloud_id="deployment-name:ABCD...", + api_key=... +) + +def main(request: func.HttpRequest) -> func.HttpResponse: + # Use the client + client.search(index=..., query={"match_all": {}}) +``` + +Resources used to assess these recommendations: + +* [GCP Cloud Functions: Tips & Tricks](https://cloud.google.com/functions/docs/bestpractices/tips#use_global_variables_to_reuse_objects_in_future_invocations) +* [Best practices for working with AWS Lambda functions](https://docs.aws.amazon.com/lambda/latest/dg/best-practices.md) +* [Azure Functions Python developer guide](https://docs.microsoft.com/en-us/azure/azure-functions/functions-reference-python?tabs=azurecli-linux%2Capplication-level#global-variables) +* [AWS Lambda: Comparing the effect of global scope](https://docs.aws.amazon.com/lambda/latest/operatorguide/global-scope.md) diff --git a/docs/guide/elasticsearch-dsl.asciidoc b/docs/reference/elasticsearch-dsl.md similarity index 50% rename from docs/guide/elasticsearch-dsl.asciidoc rename to docs/reference/elasticsearch-dsl.md index bd3fb5d19..4030e232d 100644 --- a/docs/guide/elasticsearch-dsl.asciidoc +++ b/docs/reference/elasticsearch-dsl.md @@ -1,15 +1,13 @@ -[[elasticsearch-dsl]] -== Elasticsearch Python DSL - -Elasticsearch DSL is a module of the official Python client that aims to help -with writing and running queries against Elasticsearch in a more convenient and -idiomatic way. It stays close to the Elasticsearch JSON DSL, mirroring its -terminology and structure. It exposes the whole range of the DSL from -Python either directly using defined classes or a queryset-like -expressions. Here is an example: - -[source,python] -.... +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/elasticsearch-dsl.html +--- + +# Elasticsearch Python DSL [elasticsearch-dsl] + +Elasticsearch DSL is a module of the official Python client that aims to help with writing and running queries against Elasticsearch in a more convenient and idiomatic way. It stays close to the Elasticsearch JSON DSL, mirroring its terminology and structure. It exposes the whole range of the DSL from Python either directly using defined classes or a queryset-like expressions. Here is an example: + +```python from elasticsearch.dsl import Search s = Search(index="my-index") \ @@ -18,12 +16,11 @@ s = Search(index="my-index") \ .exclude("match", description="beta") for hit in s: print(hit.title) -.... +``` Or with asynchronous Python: -[source,python] -.... +```python from elasticsearch.dsl import AsyncSearch async def run_query(): @@ -33,16 +30,13 @@ async def run_query(): .exclude("match", description="beta") async for hit in s: print(hit.title) -.... +``` + +It also provides an optional wrapper for working with documents as Python objects: defining mappings, retrieving and saving documents, wrapping the document data in user-defined classes. + +To use the other Elasticsearch APIs (eg. cluster health) just use the regular client. + + -It also provides an optional wrapper for working with documents as -Python objects: defining mappings, retrieving and saving documents, -wrapping the document data in user-defined classes. -To use the other Elasticsearch APIs (eg. cluster health) just use the -regular client. -include::dsl/configuration.asciidoc[] -include::dsl/tutorials.asciidoc[] -include::dsl/howto.asciidoc[] -include::dsl/examples.asciidoc[] diff --git a/docs/guide/esql-pandas.asciidoc b/docs/reference/esql-pandas.md similarity index 93% rename from docs/guide/esql-pandas.asciidoc rename to docs/reference/esql-pandas.md index 94887745c..506ea6579 100644 --- a/docs/guide/esql-pandas.asciidoc +++ b/docs/reference/esql-pandas.md @@ -1,29 +1,21 @@ -[[esql-pandas]] -=== ES|QL and Pandas - -The {ref}/esql.html[Elasticsearch Query Language (ES|QL)] provides a powerful -way to filter, transform, and analyze data stored in {es}. Designed to be easy -to learn and use, it is a perfect fit for data scientists familiar with Pandas -and other dataframe-based libraries. ES|QL queries produce tables with named -columns, which is the definition of dataframes. - -This page shows you an example of using ES|QL and Pandas together to work with -dataframes. - -[discrete] -[[import-data]] -==== Import data - -Use the -https://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/esql/qa/testFixtures/src/main/resources/employees.csv[`employees` sample data] and -https://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json[mapping]. -The easiest way to load this dataset is to run https://gist.github.com/pquentin/7cf29a5932cf52b293699dd994b1a276[two Elasticsearch API requests] in the Kibana Console. - -.Index mapping request -[%collapsible] -==== -[source,console] --------------------------------------------------- +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/esql-pandas.html +--- + +# ES|QL and Pandas [esql-pandas] + +The [Elasticsearch Query Language (ES|QL)](docs-content://explore-analyze/query-filter/languages/esql.md) provides a powerful way to filter, transform, and analyze data stored in {{es}}. Designed to be easy to learn and use, it is a perfect fit for data scientists familiar with Pandas and other dataframe-based libraries. ES|QL queries produce tables with named columns, which is the definition of dataframes. + +This page shows you an example of using ES|QL and Pandas together to work with dataframes. + + +## Import data [import-data] + +Use the [`employees` sample data](https://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/esql/qa/testFixtures/src/main/resources/employees.csv) and [mapping](https://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json). The easiest way to load this dataset is to run [two Elasticsearch API requests](https://gist.github.com/pquentin/7cf29a5932cf52b293699dd994b1a276) in the Kibana Console. + +::::{dropdown} Index mapping request +```console PUT employees { "mappings": { @@ -107,15 +99,13 @@ PUT employees } } } --------------------------------------------------- -// TEST[skip:TBD] -==== - -.Bulk request to ingest data -[%collapsible] -==== -[source,console] --------------------------------------------------- +``` + +:::: + + +::::{dropdown} Bulk request to ingest data +```console PUT employees/_bulk { "index": {}} {"birth_date":"1953-09-02T00:00:00Z","emp_no":"10001","first_name":"Georgi","gender":"M","hire_date":"1986-06-26T00:00:00Z","languages":"2","last_name":"Facello","salary":"57305","height":"2.03","still_hired":"true","avg_worked_seconds":"268728049","job_positions":["Senior Python Developer","Accountant"],"is_rehired":["false","true"],"salary_change":"1.19"} @@ -317,19 +307,17 @@ PUT employees/_bulk {"birth_date":"1956-05-25T00:00:00Z","emp_no":"10099","first_name":"Valter","gender":"F","hire_date":"1988-10-18T00:00:00Z","languages":"2","last_name":"Sullins","salary":"73578","height":"1.81","still_hired":"true","avg_worked_seconds":"377713748","is_rehired":["true","true"],"salary_change":["10.71","14.26","-8.78","-3.98"]} { "index": {}} {"birth_date":"1953-04-21T00:00:00Z","emp_no":"10100","first_name":"Hironobu","gender":"F","hire_date":"1987-09-21T00:00:00Z","languages":"4","last_name":"Haraldson","salary":"68431","height":"1.77","still_hired":"true","avg_worked_seconds":"223910853","job_positions":"Purchase Manager","is_rehired":["false","true","true","false"],"salary_change":["13.97","-7.49"]} --------------------------------------------------- -// TEST[skip:TBD] -==== +``` + +:::: -[discrete] -[[convert-dataset-pandas-dataframe]] -==== Convert the dataset -Use the ES|QL CSV import to convert the `employees` dataset to a Pandas -dataframe object. -[source,python] ------------------------------------- +## Convert the dataset [convert-dataset-pandas-dataframe] + +Use the ES|QL CSV import to convert the `employees` dataset to a Pandas dataframe object. + +```python from io import StringIO from elasticsearch import Elasticsearch import pandas as pd @@ -343,14 +331,11 @@ response = client.esql.query( ) df = pd.read_csv(StringIO(response.body)) print(df) ------------------------------------- +``` -Even though the dataset contains only 100 records, a LIMIT of 500 is specified to suppress -ES|QL warnings about potentially missing records. This prints the -following dataframe: +Even though the dataset contains only 100 records, a LIMIT of 500 is specified to suppress ES|QL warnings about potentially missing records. This prints the following dataframe: -[source,python] ------------------------------------- +```python avg_worked_seconds ... salary_change.long still_hired 0 268728049 ... 1 True 1 328922887 ... [-7, 11] True @@ -363,23 +348,16 @@ following dataframe: 97 272392146 ... [-2, 4, 8] False 98 377713748 ... [-8, -3, 10, 14] True 99 223910853 ... [-7, 13] True ------------------------------------- +``` -You can now analyze the data with Pandas or you can also continue transforming -the data using ES|QL. +You can now analyze the data with Pandas or you can also continue transforming the data using ES|QL. -[discrete] -[[analyze-data]] -==== Analyze the data with Pandas +## Analyze the data with Pandas [analyze-data] -In the next example, the {ref}/esql-commands.html#esql-stats-by[STATS ... BY] -command is utilized to count how many employees are speaking a given language. -The results are sorted with the `languages` column using -{ref}/esql-commands.html#esql-sort[SORT]: +In the next example, the [STATS …​ BY](elasticsearch://reference/query-languages/esql/esql-commands.md#esql-stats-by) command is utilized to count how many employees are speaking a given language. The results are sorted with the `languages` column using [SORT](elasticsearch://reference/query-languages/esql/esql-commands.md#esql-sort): -[source,python] ------------------------------------- +```python response = client.esql.query( query=""" FROM employees @@ -394,32 +372,25 @@ df = pd.read_csv( dtype={"count": "Int64", "languages": "Int64"}, ) print(df) ------------------------------------- +``` -Note that the `dtype` parameter of `pd.read_csv()` is useful when the type -inferred by Pandas is not enough. The code prints the following response: +Note that the `dtype` parameter of `pd.read_csv()` is useful when the type inferred by Pandas is not enough. The code prints the following response: -[source,python] ------------------------------------- +```python count languages 0 15 1 1 19 2 2 17 3 3 18 4 4 21 5 ------------------------------------- +``` -[discrete] -[[passing-params]] -==== Pass parameters to a query with ES|QL +## Pass parameters to a query with ES|QL [passing-params] -Use the -{ref}/esql-rest.html#esql-rest-params[built-in parameters support of the ES|QL REST API] -to pass parameters to a query: +Use the [built-in parameters support of the ES|QL REST API](docs-content://explore-analyze/query-filter/languages/esql-rest.md#esql-rest-params) to pass parameters to a query: -[source,python] ------------------------------------- +```python response = client.esql.query( query=""" FROM employees @@ -436,18 +407,16 @@ df = pd.read_csv( dtype={"count": "Int64", "languages": "Int64"}, ) print(df) ------------------------------------- +``` The code above outputs the following: -[source,python] ------------------------------------- +```python count languages 0 17 3 1 18 4 2 21 5 ------------------------------------- +``` + +If you want to learn more about ES|QL, refer to the [ES|QL documentation](docs-content://explore-analyze/query-filter/languages/esql.md). You can also check out this other [Python example using Boston Celtics data](https://github.com/elastic/elasticsearch-labs/blob/main/supporting-blog-content/Boston-Celtics-Demo/celtics-esql-demo.ipynb). -If you want to learn more about ES|QL, refer to the -{ref}/esql.html[ES|QL documentation]. You can also check out this other -https://github.com/elastic/elasticsearch-labs/blob/main/supporting-blog-content/Boston-Celtics-Demo/celtics-esql-demo.ipynb[Python example using Boston Celtics data]. \ No newline at end of file diff --git a/docs/reference/examples.md b/docs/reference/examples.md new file mode 100644 index 000000000..127312d8b --- /dev/null +++ b/docs/reference/examples.md @@ -0,0 +1,199 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/examples.html +--- + +# Examples [examples] + +Below you can find examples of how to use the most frequently called APIs with the Python client. + +* [Indexing a document](#ex-index) +* [Getting a document](#ex-get) +* [Refreshing an index](#ex-refresh) +* [Searching for a document](#ex-search) +* [Updating a document](#ex-update) +* [Deleting a document](#ex-delete) + + +## Indexing a document [ex-index] + +To index a document, you need to specify three pieces of information: `index`, `id`, and a `document`: + +```py +from datetime import datetime +from elasticsearch import Elasticsearch +client = Elasticsearch('https://localhost:9200') + +doc = { + 'author': 'author_name', + 'text': 'Interesting content...', + 'timestamp': datetime.now(), +} +resp = client.index(index="test-index", id=1, document=doc) +print(resp['result']) +``` + + +## Getting a document [ex-get] + +To get a document, you need to specify its `index` and `id`: + +```py +resp = client.get(index="test-index", id=1) +print(resp['_source']) +``` + + +## Refreshing an index [ex-refresh] + +You can perform the refresh operation on an index: + +```py +client.indices.refresh(index="test-index") +``` + + +## Searching for a document [ex-search] + +The `search()` method returns results that are matching a query: + +```py +resp = client.search(index="test-index", query={"match_all": {}}) +print("Got %d Hits:" % resp['hits']['total']['value']) +for hit in resp['hits']['hits']: + print("%(timestamp)s %(author)s: %(text)s" % hit["_source"]) +``` + + +## Updating a document [ex-update] + +To update a document, you need to specify three pieces of information: `index`, `id`, and a `doc`: + +```py +from datetime import datetime +from elasticsearch import Elasticsearch + +client = Elasticsearch('https://localhost:9200') + +doc = { + 'author': 'author_name', + 'text': 'Interesting modified content...', + 'timestamp': datetime.now(), +} +resp = client.update(index="test-index", id=1, doc=doc) +print(resp['result']) +``` + + +## Deleting a document [ex-delete] + +You can delete a document by specifying its `index`, and `id` in the `delete()` method: + +```py +client.delete(index="test-index", id=1) +``` + + +## Interactive examples [ex-interactive] + +The [elasticsearch-labs](https://github.com/elastic/elasticsearch-labs) repo contains interactive and executable [Python notebooks](https://github.com/elastic/elasticsearch-labs/tree/main/notebooks), sample apps, and resources for testing out Elasticsearch, using the Python client. These examples are mainly focused on vector search, hybrid search and generative AI use cases, but you’ll also find examples of basic operations like creating index mappings and performing lexical search. + + +### Search notebooks [_search_notebooks] + +The [Search](https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/search) folder is a good place to start if you’re new to Elasticsearch. This folder contains a number of notebooks that demonstrate the fundamentals of Elasticsearch, like indexing vectors, running lexical, semantic and *hybrid* searches, and more. + +The following notebooks are available: + +* [Quick start](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/00-quick-start.ipynb) +* [Keyword, querying, filtering](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/01-keyword-querying-filtering.ipynb) +* [Hybrid search](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/02-hybrid-search.ipynb) +* [Semantic search with ELSER](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/03-ELSER.ipynb) +* [Multilingual semantic search](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/04-multilingual.ipynb) +* [Query rules](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/05-query-rules.ipynb) +* [Synonyms API quick start](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/06-synonyms-api.ipynb) + +Here’s a brief overview of what you’ll learn in each notebook. + + +#### Quick start [_quick_start] + +In the [00-quick-start.ipynb](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/00-quick-start.ipynb) notebook you’ll learn how to: + +* Use the Elasticsearch Python client for various operations. +* Create and define an index for a sample dataset with `dense_vector` fields. +* Transform book titles into embeddings using [Sentence Transformers](https://www.sbert.net) and index them into Elasticsearch. +* Perform k-nearest neighbors (knn) semantic searches. +* Integrate traditional text-based search with semantic search, for a hybrid search system. +* Use reciprocal rank fusion (RRF) to intelligently combine search results from different retrieval systems. + + +#### Keyword, querying, filtering [_keyword_querying_filtering] + +In the [01-keyword-querying-filtering.ipynb](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/01-keyword-querying-filtering.ipynb) notebook, you’ll learn how to: + +* Use [query and filter contexts](docs-content://explore-analyze/query-filter/languages/querydsl.md) to search and filter documents in Elasticsearch. +* Execute full-text searches with `match` and `multi-match` queries. +* Query and filter documents based on `text`, `number`, `date`, or `boolean` values. +* Run multi-field searches using the `multi-match` query. +* Prioritize specific fields in the `multi-match` query for tailored results. + + +#### Hybrid search [_hybrid_search] + +In the [02-hybrid-search.ipynb](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/02-hybrid-search.ipynb) notebook, you’ll learn how to: + +* Combine results of traditional text-based search with semantic search, for a hybrid search system. +* Transform fields in the sample dataset into embeddings using the Sentence Transformer model and index them into Elasticsearch. +* Use the [RRF API](elasticsearch://reference/elasticsearch/rest-apis/reciprocal-rank-fusion.md#rrf-api) to combine the results of a `match` query and a `kNN` semantic search. +* Walk through a super simple toy example that demonstrates, step by step, how RRF ranking works. + + +#### Semantic search with ELSER [_semantic_search_with_elser] + +In the [03-ELSER.ipynb](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/03-ELSER.ipynb) notebook, you’ll learn how to: + +* Use the Elastic Learned Sparse Encoder (ELSER) for text expansion-powered semantic search, out of the box — without training, fine-tuning, or embeddings generation. +* Download and deploy the ELSER model in your Elastic environment. +* Create an Elasticsearch index named search-movies with specific mappings and index a dataset of movie descriptions. +* Create an ingest pipeline containing an inference processor for ELSER model execution. +* Reindex the data from search-movies into another index, elser-movies, using the ELSER pipeline for text expansion. +* Observe the results of running the documents through the model by inspecting the additional terms it adds to documents, which enhance searchability. +* Perform simple keyword searches on the elser-movies index to assess the impact of ELSER’s text expansion. +* Execute ELSER-powered semantic searches using the `text_expansion` query. + + +#### Multilingual semantic search [_multilingual_semantic_search] + +In the [04-multilingual.ipynb](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/04-multilingual.ipynb) notebook, you’ll learn how to: + +* Use a multilingual embedding model for semantic search across languages. +* Transform fields in the sample dataset into embeddings using the Sentence Transformer model and index them into Elasticsearch. +* Use filtering with a `kNN` semantic search. +* Walk through a super simple toy example that demonstrates, step by step, how multilingual search works across languages, and within non-English languages. + + +#### Query rules [_query_rules] + +In the [05-query-rules.ipynb](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/05-query-rules.ipynb) notebook, you’ll learn how to: + +* Use the query rules management APIs to create and edit promotional rules based on contextual queries. +* Apply these query rules by using the `rule_query` in Query DSL. + + +#### Synonyms API quick start [_synonyms_api_quick_start] + +In the [06-synonyms-api.ipynb](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/06-synonyms-api.ipynb) notebook, you’ll learn how to: + +* Use the synonyms management API to create a synonyms set to enhance your search recall. +* Configure an index to use search-time synonyms. +* Update synonyms in real time. +* Run queries that are enhanced by synonyms. + + +### Other notebooks [_other_notebooks] + +* [Generative AI](https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/generative-ai). Notebooks that demonstrate various use cases for Elasticsearch as the retrieval engine and vector store for LLM-powered applications. +* [Integrations](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations). Notebooks that demonstrate how to integrate popular services and projects with Elasticsearch, including OpenAI, Hugging Face, and LlamaIndex +* [Langchain](https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/langchain). Notebooks that demonstrate how to integrate Elastic with LangChain, a framework for developing applications powered by language models. + diff --git a/docs/guide/getting-started.asciidoc b/docs/reference/getting-started.md similarity index 52% rename from docs/guide/getting-started.asciidoc rename to docs/reference/getting-started.md index 58b6f33a5..df413e836 100644 --- a/docs/guide/getting-started.asciidoc +++ b/docs/reference/getting-started.md @@ -1,80 +1,75 @@ -[[getting-started-python]] -== Getting started +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/getting-started-python.html + - https://www.elastic.co/guide/en/serverless/current/elasticsearch-python-client-getting-started.html +--- -This page guides you through the installation process of the Python client, -shows you how to instantiate the client, and how to perform basic Elasticsearch -operations with it. +# Getting started [getting-started-python] -[discrete] -=== Requirements +This page guides you through the installation process of the Python client, shows you how to instantiate the client, and how to perform basic Elasticsearch operations with it. -* https://www.python.org/[Python] 3.8 or newer -* https://pip.pypa.io/en/stable/[`pip`], installed by default alongside Python -[discrete] -=== Installation +### Requirements [_requirements] + +* [Python](https://www.python.org/) 3.8 or newer +* [`pip`](https://pip.pypa.io/en/stable/), installed by default alongside Python + + +### Installation [_installation] To install the latest version of the client, run the following command: -[source,shell] --------------------------- +```shell python -m pip install elasticsearch --------------------------- +``` -Refer to the <> page to learn more. +Refer to the [*Installation*](/reference/installation.md) page to learn more. -[discrete] -=== Connecting +### Connecting [_connecting] -You can connect to the Elastic Cloud using an API key and the Elasticsearch -endpoint. +You can connect to the Elastic Cloud using an API key and the Elasticsearch endpoint. -[source,py] ----- +```py from elasticsearch import Elasticsearch client = Elasticsearch( "https://...", # Elasticsearch endpoint api_key="api_key", ) ----- +``` -Your Elasticsearch endpoint can be found on the **My deployment** page of your -deployment: +Your Elasticsearch endpoint can be found on the **My deployment** page of your deployment: -image::images/es-endpoint.jpg[alt="Finding Elasticsearch endpoint",align="center"] +:::{image} ../images/es-endpoint.jpg +:alt: Finding Elasticsearch endpoint +::: You can generate an API key on the **Management** page under Security. -image::images/create-api-key.png[alt="Create API key",align="center"] +:::{image} ../images/create-api-key.png +:alt: Create API key +::: -For other connection options, refer to the <> section. +For other connection options, refer to the [*Connecting*](/reference/connecting.md) section. -[discrete] -=== Operations +### Operations [_operations] -Time to use Elasticsearch! This section walks you through the basic, and most -important, operations of Elasticsearch. For more operations and more advanced -examples, refer to the <> page. +Time to use Elasticsearch! This section walks you through the basic, and most important, operations of Elasticsearch. For more operations and more advanced examples, refer to the [*Examples*](/reference/examples.md) page. -[discrete] -==== Creating an index +#### Creating an index [_creating_an_index] This is how you create the `my_index` index: -[source,py] ----- +```py client.indices.create(index="my_index") ----- +``` -Optionally, you can first define the expected types of your features with a -custom mapping. +Optionally, you can first define the expected types of your features with a custom mapping. -[source,py] ----- +```py mappings = { "properties": { "foo": {"type": "text"}, @@ -91,15 +86,14 @@ mappings = { } client.indices.create(index="my_index", mappings=mappings) ----- +``` + -[discrete] -==== Indexing documents +#### Indexing documents [_indexing_documents] This indexes a document with the index API: -[source,py] ----- +```py client.index( index="my_index", id="my_document_id", @@ -108,12 +102,11 @@ client.index( "bar": "bar", } ) ----- +``` You can also index multiple documents at once with the bulk helper function: -[source,py] ----- +```py from elasticsearch import helpers def generate_docs(): @@ -125,47 +118,38 @@ def generate_docs(): } helpers.bulk(client, generate_docs()) ----- +``` -These helpers are the recommended way to perform bulk ingestion. While it is -also possible to perform bulk ingestion using `client.bulk` directly, the -helpers handle retries, ingesting chunk by chunk and more. See the -<> page for more details. +These helpers are the recommended way to perform bulk ingestion. While it is also possible to perform bulk ingestion using `client.bulk` directly, the helpers handle retries, ingesting chunk by chunk and more. See the [*Client helpers*](/reference/client-helpers.md) page for more details. -[discrete] -==== Getting documents +#### Getting documents [_getting_documents] You can get documents by using the following code: -[source,py] ----- +```py client.get(index="my_index", id="my_document_id") ----- +``` -[discrete] -==== Searching documents +#### Searching documents [_searching_documents] -This is how you can create a single match query with the Python client: +This is how you can create a single match query with the Python client: -[source,py] ----- +```py client.search(index="my_index", query={ "match": { "foo": "foo" } }) ----- +``` -[discrete] -==== Updating documents +#### Updating documents [_updating_documents] This is how you can update a document, for example to add a new field: -[source,py] ----- +```py client.update( index="my_index", id="my_document_id", @@ -174,28 +158,23 @@ client.update( "new_field": "new value", } ) ----- +``` -[discrete] -==== Deleting documents +#### Deleting documents [_deleting_documents] -[source,py] ----- +```py client.delete(index="my_index", id="my_document_id") ----- +``` -[discrete] -==== Deleting an index +#### Deleting an index [_deleting_an_index] -[source,py] ----- +```py client.indices.delete(index="my_index") ----- +``` -[discrete] -== Further reading +## Further reading [_further_reading] -* Use <> for a more comfortable experience with the APIs. +* Use [*Client helpers*](/reference/client-helpers.md) for a more comfortable experience with the APIs. diff --git a/docs/reference/index.md b/docs/reference/index.md new file mode 100644 index 000000000..774e7d112 --- /dev/null +++ b/docs/reference/index.md @@ -0,0 +1,69 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/overview.html +--- + +# Python [overview] + +This is the official low-level Python client for {{es}}. Its goal is to provide common ground for all {{es}}-related code in Python. For this reason, the client is designed to be unopinionated and extendable. An API reference is available on [Read the Docs](https://elasticsearch-py.readthedocs.io). + + +## Compatibility [_compatibility] + +Language clients are forward compatible; meaning that the clients support communicating with greater or equal minor versions of {{es}} without breaking. It does not mean that the clients automatically support new features of newer {{es}} versions; it is only possible after a release of a new client version. For example, a 8.12 client version won’t automatically support the new features of the 8.13 version of {{es}}, the 8.13 client version is required for that. {{es}} language clients are only backwards compatible with default distributions and without guarantees made. + +| Elasticsearch version | elasticsearch-py branch | Supported | +| --- | --- | --- | +| main | main | | +| 8.x | 8.x | 8.x | +| 7.x | 7.x | 7.17 | + +If you have a need to have multiple versions installed at the same time older versions are also released as `elasticsearch7` and `elasticsearch8`. + + +## Example use [_example_use] + +Simple use-case: + +```python +>>> from datetime import datetime +>>> from elasticsearch import Elasticsearch + +# Connect to 'http://localhost:9200' +>>> client = Elasticsearch("http://localhost:9200") + +# Datetimes will be serialized: +>>> client.index(index="my-index-000001", id=42, document={"any": "data", "timestamp": datetime.now()}) +{'_id': '42', '_index': 'my-index-000001', '_type': 'test-type', '_version': 1, 'ok': True} + +# ...but not deserialized +>>> client.get(index="my-index-000001", id=42)['_source'] +{'any': 'data', 'timestamp': '2013-05-12T19:45:31.804229'} +``` + +::::{tip} +For an elaborate example of how to ingest data into Elastic Cloud, refer to [this page](docs-content://manage-data/ingest/ingesting-data-from-applications/ingest-data-with-python-on-elasticsearch-service.md). +:::: + + + +## Features [_features] + +The client’s features include: + +* Translating basic Python data types to and from JSON +* Configurable automatic discovery of cluster nodes +* Persistent connections +* Load balancing (with pluggable selection strategy) across all available nodes +* Node timeouts on transient errors +* Thread safety +* Pluggable architecture + +The client also contains a convenient set of [helpers](https://elasticsearch-py.readthedocs.org/en/master/helpers.md) for some of the more engaging tasks like bulk indexing and reindexing. + + +## Elasticsearch Python DSL [_elasticsearch_python_dsl] + +For a higher level access with more limited scope, have a look at the DSL module, which provides a more convenient and idiomatic way to write and manipulate queries. + diff --git a/docs/reference/installation.md b/docs/reference/installation.md new file mode 100644 index 000000000..20975821a --- /dev/null +++ b/docs/reference/installation.md @@ -0,0 +1,22 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/installation.html +--- + +# Installation [installation] + +[Download the latest version of Elasticsearch](https://www.elastic.co/downloads/elasticsearch) or [sign-up](https://cloud.elastic.co/registration?elektra=en-ess-sign-up-page) for a free trial of Elastic Cloud. + +The Python client for {{es}} can be installed with pip: + +```sh +$ python -m pip install elasticsearch +``` + +If your application uses async/await in Python you can install with the `async` extra: + +```sh +$ python -m pip install elasticsearch[async] +``` + +Read more about [how to use asyncio with this project](https://elasticsearch-py.readthedocs.io/en/master/async.md). diff --git a/docs/reference/integrations.md b/docs/reference/integrations.md new file mode 100644 index 000000000..282f69257 --- /dev/null +++ b/docs/reference/integrations.md @@ -0,0 +1,62 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/integrations.html +--- + +# Integrations [integrations] + +You can find integration options and information on this page. + + +## OpenTelemetry instrumentation [opentelemetry-intro] + +The Python Elasticsearch client supports native OpenTelemetry instrumentation following the [OpenTelemetry Semantic Conventions for Elasticsearch](https://opentelemetry.io/docs/specs/semconv/database/elasticsearch/). Refer to the [Using OpenTelemetry](/reference/opentelemetry.md) page for details. + + +## ES|QL [esql-intro] + +[ES|QL](docs-content://explore-analyze/query-filter/languages/esql.md) is available through the Python Elasticsearch client. Refer to the [ES|QL and Pandas](/reference/esql-pandas.md) page to learn more about using ES|QL and Pandas together with dataframes. + + +## Transport [transport] + +The handling of connections, retries, and pooling is handled by the [Elastic Transport Python](https://github.com/elastic/elastic-transport-python) library. Documentation on the low-level classes is available on [Read the Docs](https://elastic-transport-python.readthedocs.io). + + +## Tracking requests with Opaque ID [opaque-id] + +You can enrich your requests against Elasticsearch with an identifier string, that allows you to discover this identifier in [deprecation logs](docs-content://deploy-manage/monitor/logging-configuration/update-elasticsearch-logging-levels.md#deprecation-logging), to support you with [identifying search slow log origin](elasticsearch://reference/elasticsearch/index-settings/slow-log.md) or to help with [identifying running tasks](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-tasks). + +The opaque ID can be set via the `opaque_id` parameter via the client `.options()` method: + +```python +client = Elasticsearch(...) +client.options(opaque_id="request-id-...").search(...) +``` + + +## Type Hints [type-hints] + +Starting in `elasticsearch-py` v7.10.0 the library now ships with [type hints](https://www.python.org/dev/peps/pep-0484) and supports basic static type analysis with tools like [Mypy](http://mypy-lang.org) and [Pyright](https://github.com/microsoft/pyright). + +If we write a script that has a type error like using `request_timeout` with a `str` argument instead of `float` and then run Mypy on the script: + +```python +# script.py +from elasticsearch import Elasticsearch + +client = Elasticsearch(...) +client.options( + request_timeout="5" # type error! +).search(...) + +# $ mypy script.py +# script.py:5: error: Argument "request_timeout" to "search" of "Elasticsearch" has +# incompatible type "str"; expected "Union[int, float, None]" +# Found 1 error in 1 file (checked 1 source file) +``` + +Type hints also allow tools like your IDE to check types and provide better auto-complete functionality. + + + diff --git a/docs/reference/opentelemetry.md b/docs/reference/opentelemetry.md new file mode 100644 index 000000000..fb117d505 --- /dev/null +++ b/docs/reference/opentelemetry.md @@ -0,0 +1,80 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/opentelemetry.html +--- + +# Using OpenTelemetry [opentelemetry] + +You can use [OpenTelemetry](https://opentelemetry.io/) to monitor the performance and behavior of your {{es}} requests through the Elasticsearch Python client. The Python client comes with built-in OpenTelemetry instrumentation that emits [distributed tracing spans](docs-content://solutions/observability/apps/traces-2.md) by default. With that, applications using [manual OpenTelemetry instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry) or [automatic OpenTelemetry instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry) are enriched with additional spans that contain insightful information about the execution of the {{es}} requests. + +The native instrumentation in the Python client follows the [OpenTelemetry Semantic Conventions for {{es}}](https://opentelemetry.io/docs/specs/semconv/database/elasticsearch/). In particular, the instrumentation in the client covers the logical layer of {{es}} requests. A single span per request is created that is processed by the service through the Python client. The following image shows a trace that records the handling of two different {{es}} requests: an `info` request and a `search` request. + +:::{image} ../images/otel-waterfall-without-http.png +:alt: Distributed trace with Elasticsearch spans +:class: screenshot +::: + +Usually, OpenTelemetry auto-instrumentation modules come with instrumentation support for HTTP-level communication. In this case, in addition to the logical {{es}} client requests, spans will be captured for the physical HTTP requests emitted by the client. The following image shows a trace with both, {{es}} spans (in blue) and the corresponding HTTP-level spans (in red) after having installed the ``opentelemetry-instrumentation-urllib3`` package: + +:::{image} ../images/otel-waterfall-with-http.png +:alt: Distributed trace with Elasticsearch spans +:class: screenshot +::: + +Advanced Python client behavior such as nodes round-robin and request retries are revealed through the combination of logical {{es}} spans and the physical HTTP spans. The following example shows a `search` request in a scenario with two nodes: + +:::{image} ../images/otel-waterfall-retry.png +:alt: Distributed trace with Elasticsearch spans +:class: screenshot +::: + +The first node is unavailable and results in an HTTP error, while the retry to the second node succeeds. Both HTTP requests are subsumed by the logical {{es}} request span (in blue). + + +### Setup the OpenTelemetry instrumentation [_setup_the_opentelemetry_instrumentation] + +When using the [manual Python OpenTelemetry instrumentation](https://opentelemetry.io/docs/languages/python/instrumentation/) or the [OpenTelemetry Python agent](https://opentelemetry.io/docs/languages/python/automatic/), the Python client’s OpenTelemetry instrumentation is enabled by default and uses the global OpenTelemetry SDK with the global tracer provider. If you’re getting started with OpenTelemetry instrumentation, the following blog posts have step-by-step instructions to ingest and explore tracing data with the Elastic stack: + +* [Manual instrumentation with OpenTelemetry for Python applications](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry) +* [Automatic instrumentation with OpenTelemetry for Python applications](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry) + + +## Comparison with community instrumentation [_comparison_with_community_instrumentation] + +The [commmunity OpenTelemetry Elasticsearch instrumentation](https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/elasticsearch/elasticsearch.md) also instruments the client and sends OpenTelemetry traces, but was developed before the OpenTelemetry Semantic Conventions for {{es}}, so the traces attributes are inconsistent with other OpenTelemetry Elasticsearch client instrumentations. To avoid tracing the same requests twice, make sure to use only one instrumentation, either by uninstalling the opentelemetry-instrumentation-elasticsearch Python package or by [disabling the native instrumentation](#opentelemetry-config-enable). + + +### Configuring the OpenTelemetry instrumentation [_configuring_the_opentelemetry_instrumentation] + +You can configure this OpenTelemetry instrumentation through environment variables. The following configuration options are available. + + +#### Enable / Disable the OpenTelemetry instrumentation [opentelemetry-config-enable] + +With this configuration option you can enable (default) or disable the built-in OpenTelemetry instrumentation. + +**Default:** `true` + +| | | +| --- | --- | +| Environment Variable | `OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_ENABLED` | + + +#### Capture search request bodies [_capture_search_request_bodies] + +Per default, the built-in OpenTelemetry instrumentation does not capture request bodies due to data privacy considerations. You can use this option to enable capturing of search queries from the request bodies of {{es}} search requests in case you wish to gather this information regardless. The options are to capture the raw search query or not capture it at all. + +**Default:** `omit` + +**Valid Options:** `omit`, `raw` + +| | | +| --- | --- | +| Environment Variable | `OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_CAPTURE_SEARCH_QUERY` | + + +### Overhead [_overhead] + +The OpenTelemetry instrumentation (as any other monitoring approach) may come with a slight overhead on CPU, memory, and/or latency. The overhead may only occur when the instrumentation is enabled (default) and an OpenTelemetry SDK is active in the target application. When the instrumentation is disabled or no OpenTelemetry SDK is active within the target application, monitoring overhead is not expected when using the client. + +Even in cases where the instrumentation is enabled and is actively used (by an OpenTelemetry SDK), the overhead is minimal and negligible in the vast majority of cases. In edge cases where there is a noticeable overhead, the [instrumentation can be explicitly disabled](#opentelemetry-config-enable) to eliminate any potential impact on performance. diff --git a/docs/reference/toc.yml b/docs/reference/toc.yml new file mode 100644 index 000000000..8de284050 --- /dev/null +++ b/docs/reference/toc.yml @@ -0,0 +1,19 @@ +toc: + - file: index.md + - file: getting-started.md + - file: installation.md + - file: connecting.md + - file: configuration.md + - file: async.md + - file: integrations.md + children: + - file: opentelemetry.md + - file: esql-pandas.md + - file: examples.md + - file: elasticsearch-dsl.md + children: + - file: _configuration.md + - file: _tutorials.md + - file: _how_to_guides.md + - file: _examples.md + - file: client-helpers.md \ No newline at end of file diff --git a/docs/release-notes/breaking-changes.md b/docs/release-notes/breaking-changes.md new file mode 100644 index 000000000..b79d3cb96 --- /dev/null +++ b/docs/release-notes/breaking-changes.md @@ -0,0 +1,28 @@ +--- +navigation_title: "Elasticsearch Python Client" +--- + +# Elasticsearch Python Client breaking changes [elasticsearch-python-client-breaking-changes] +Before you upgrade, carefully review the Elasticsearch Python Client breaking changes and take the necessary steps to mitigate any issues. + +To learn how to upgrade, check out . + +% ## Next version [elasticsearch-python-client-nextversion-breaking-changes] +% **Release date:** Month day, year + +% ::::{dropdown} Title of breaking change +% Description of the breaking change. +% For more information, check [PR #](PR link). +% **Impact**
Impact of the breaking change. +% **Action**
Steps for mitigating deprecation impact. +% :::: + +% ## 9.0.0 [elasticsearch-python-client-900-breaking-changes] +% **Release date:** March 25, 2025 + +% ::::{dropdown} Title of breaking change +% Description of the breaking change. +% For more information, check [PR #](PR link). +% **Impact**
Impact of the breaking change. +% **Action**
Steps for mitigating deprecation impact. +% :::: \ No newline at end of file diff --git a/docs/release-notes/deprecations.md b/docs/release-notes/deprecations.md new file mode 100644 index 000000000..1b9bfbb74 --- /dev/null +++ b/docs/release-notes/deprecations.md @@ -0,0 +1,28 @@ +--- +navigation_title: "Elasticsearch Python Client" +--- + +# Elasticsearch Python Client deprecations [elasticsearch-python-client-deprecations] +Review the deprecated functionality for your Elasticsearch Python Client version. While deprecations have no immediate impact, we strongly encourage you update your implementation after you upgrade. + +To learn how to upgrade, check out . + +% ## Next version [elasticsearch-python-client-versionnext-deprecations] +% **Release date:** Month day, year + +% ::::{dropdown} Deprecation title +% Description of the deprecation. +% For more information, check [PR #](PR link). +% **Impact**
Impact of deprecation. +% **Action**
Steps for mitigating deprecation impact. +% :::: + +% ## 9.0.0 [elasticsearch-python-client-900-deprecations] +% **Release date:** March 25, 2025 + +% ::::{dropdown} Deprecation title +% Description of the deprecation. +% For more information, check [PR #](PR link). +% **Impact**
Impact of deprecation. +% **Action**
Steps for mitigating deprecation impact. +% :::: \ No newline at end of file diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md new file mode 100644 index 000000000..156625560 --- /dev/null +++ b/docs/release-notes/index.md @@ -0,0 +1,27 @@ +--- +navigation_title: "Elasticsearch Python Client" +--- + +# Elasticsearch Python Client release notes [elasticsearch-python-client-release-notes] + +Review the changes, fixes, and more in each version of Elasticsearch Python Client. + +To check for security updates, go to [Security announcements for the Elastic stack](https://discuss.elastic.co/c/announcements/security-announcements/31). + +% Release notes include only features, enhancements, and fixes. Add breaking changes, deprecations, and known issues to the applicable release notes sections. + +% ## version.next [felasticsearch-python-client-next-release-notes] +% **Release date:** Month day, year + +% ### Features and enhancements [elasticsearch-python-client-next-features-enhancements] +% * + +% ### Fixes [elasticsearch-python-client-next-fixes] +% * + +## 9.0.0 [elasticsearch-python-client-900-release-notes] +**Release date:** March 25, 2025 + +### Features and enhancements [elasticsearch-python-client-900-features-enhancements] + +### Fixes [elasticsearch-python-client-900-fixes] \ No newline at end of file diff --git a/docs/release-notes/known-issues.md b/docs/release-notes/known-issues.md new file mode 100644 index 000000000..da93abb27 --- /dev/null +++ b/docs/release-notes/known-issues.md @@ -0,0 +1,20 @@ +--- +navigation_title: "Elasticsearch Python Client" + +--- + +# Elasticsearch Python Client known issues [elasticsearch-python-client-known-issues] + +% Use the following template to add entries to this page. + +% :::{dropdown} Title of known issue +% **Details** +% On [Month/Day/Year], a known issue was discovered that [description of known issue]. + +% **Workaround** +% Workaround description. + +% **Resolved** +% On [Month/Day/Year], this issue was resolved. + +::: \ No newline at end of file diff --git a/docs/release-notes/toc.yml b/docs/release-notes/toc.yml new file mode 100644 index 000000000..a41006794 --- /dev/null +++ b/docs/release-notes/toc.yml @@ -0,0 +1,5 @@ +toc: + - file: index.md + - file: known-issues.md + - file: breaking-changes.md + - file: deprecations.md \ No newline at end of file diff --git a/docs/sphinx/_static/images/create-api-key.png b/docs/sphinx/_static/images/create-api-key.png new file mode 100644 index 000000000..d75c23030 Binary files /dev/null and b/docs/sphinx/_static/images/create-api-key.png differ diff --git a/docs/sphinx/quickstart.rst b/docs/sphinx/quickstart.rst index 563ea6f23..f7e527858 100644 --- a/docs/sphinx/quickstart.rst +++ b/docs/sphinx/quickstart.rst @@ -39,7 +39,7 @@ under **Cloud ID**. You can generate an API key on the **Management** page under Security. -.. image:: ../guide/images/create-api-key.png +.. image:: _static/images/create-api-key.png Confirm that the connection was successful.