From 6922ae83d04545ccd0b6b285f41e908031dc6743 Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Thu, 8 Jun 2023 19:03:04 -0700
Subject: [PATCH 01/18] Client abstraction over API resources (prototype)

---
 openai/client.py | 137 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 openai/client.py

diff --git a/openai/client.py b/openai/client.py
new file mode 100644
index 0000000000..c07c214194
--- /dev/null
+++ b/openai/client.py
@@ -0,0 +1,137 @@
+import logging
+import typing
+
+import openai
+
+LATEST_AZURE_API_VERSION = "2023-05-15"
+
+
+class AzureTokenAuth:
+    def __init__(self, credential=None):
+        if not credential:
+            try:
+                import azure.identity
+            except ImportError:
+                raise Exception(
+                    "You have to install the azure-identity package in order to use AzureTokenAuth"
+                )
+            credential = azure.identity.DefaultAzureCredential()
+        self._credential = credential
+        self._cached_token = None
+
+    def get_token(self) -> str:
+        if self._cached_token is None:
+            self._cached_token = self._credential.get_token(
+                "https://cognitiveservices.azure.com/.default"
+            )
+        return self._cached_token.token
+
+
+class ApiKeyAuth:
+    def __init__(self, key: str = ""):
+        self.key = key or openai.api_key
+
+    def get_token(self) -> str:
+        return self.key
+
+
+Backends = typing.Literal["azure", "openai", ""]
+
+
+class OpenAIClient:
+    def __init__(
+        self,
+        *,
+        api_base: str = "",
+        auth: typing.Union[str, ApiKeyAuth, AzureTokenAuth] = "",
+        api_version: str = "",
+        backend: Backends = "",
+    ):
+        self.api_base = api_base or openai.api_base
+        if auth == "azuredefault":
+            self.auth = AzureTokenAuth()
+        elif isinstance(auth, str):
+            self.auth = ApiKeyAuth(auth or openai.api_key)
+        else:
+            self.auth = auth
+
+        # Pick up api type from parameter or environment
+        backend = backend or (
+            "azure" if openai.api_type in ("azure", "azure_ad") else "openai"
+        )
+
+        self.backend = backend
+
+        if backend == "azure":
+            self.api_version = (
+                api_version or openai.api_version or LATEST_AZURE_API_VERSION
+            )
+            if isinstance(self.auth, AzureTokenAuth):
+                self.api_type = "azure_ad"
+            else:
+                self.api_type = "azure"
+        elif backend in ("openai", ""):
+            self.api_type = "open_ai"
+            self.api_version = api_version or openai.api_version
+        else:
+            raise ValueError(
+                f'Unknown `backend` {backend} - expected one of "azure" or "openai"'
+            )
+
+    def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> None:
+        backend = self.backend
+
+        kwargs.setdefault("api_base", self.api_base or openai.api_base)
+        kwargs.setdefault("api_key", self.auth.get_token())
+        kwargs.setdefault("api_type", self.api_type)
+        if self.api_version:
+            kwargs.setdefault("api_version", self.api_version)
+
+        for key, val in overrides.items():
+            kwargs.setdefault(key, val)
+            if kwargs[key] != val:
+                raise ValueError(f"No parameter named `{key}`")
+
+    def completion(self, prompt: str, **kwargs) -> openai.Completion:
+        self._populate_args(kwargs, prompt=prompt, stream=False)
+        return typing.cast(openai.Completion, openai.Completion.create(**kwargs))
+
+    def iter_completion(
+        self, prompt: str, **kwargs
+    ) -> typing.Iterable[openai.Completion]:
+        self._populate_args(kwargs, prompt=prompt, stream=True)
+        return typing.cast(
+            typing.Iterable[openai.Completion], openai.Completion.create(**kwargs)
+        )
+
+    def chatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
+        self._populate_args(kwargs, messages=messages, stream=False)
+        return typing.cast(
+            openai.ChatCompletion, openai.ChatCompletion.create(**kwargs)
+        )
+
+    def iter_chatcompletion(
+        self, messages, **kwargs
+    ) -> typing.Iterable[openai.ChatCompletion]:
+        self._populate_args(kwargs, messages=messages, stream=True)
+        return typing.cast(
+            typing.Iterable[openai.ChatCompletion],
+            openai.ChatCompletion.create(**kwargs),
+        )
+
+    def embeddings(self, input, **kwargs):
+        self._populate_args(kwargs, input=input)
+        return typing.cast(openai.Embedding, openai.Embedding.create(**kwargs))
+
+
+if __name__ == "__main__":
+    client = OpenAIClient(
+        api_base="https://achand-openai-0.openai.azure.com/",
+        auth="azuredefault",
+        backend="azure",
+    )
+    print(client.completion("what is up, my friend?", deployment_id="chatgpt"))
+    print(client.embeddings("What, or what is this?", deployment_id="arch")) # Doesn't work 'cause it is the wrong model...
+    oaiclient = OpenAIClient()
+    print(oaiclient.completion("what is up, my friend?", model="text-davinci-003"))
+    print(oaiclient.embeddings("What are embeddings?", model="text-embedding-ada-002"))

From e1c33e660cbf2ef722cef51f686efd4d9c72336d Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Mon, 12 Jun 2023 15:05:46 -0700
Subject: [PATCH 02/18] Added some async endpoints

---
 openai/client.py | 42 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index c07c214194..6f05983afe 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -96,6 +96,10 @@ def completion(self, prompt: str, **kwargs) -> openai.Completion:
         self._populate_args(kwargs, prompt=prompt, stream=False)
         return typing.cast(openai.Completion, openai.Completion.create(**kwargs))
 
+    async def acompletion(self, prompt: str, **kwargs) -> openai.Completion:
+        self._populate_args(kwargs, prompt=prompt, stream=False)
+        return typing.cast(openai.Completion, await openai.Completion.acreate(**kwargs))
+
     def iter_completion(
         self, prompt: str, **kwargs
     ) -> typing.Iterable[openai.Completion]:
@@ -104,12 +108,26 @@ def iter_completion(
             typing.Iterable[openai.Completion], openai.Completion.create(**kwargs)
         )
 
+    async def aiter_completion(
+        self, prompt: str, **kwargs
+    ) -> typing.AsyncIterable[openai.Completion]:
+        self._populate_args(kwargs, prompt=prompt, stream=True)
+        return typing.cast(
+            typing.AsyncIterable[openai.Completion], await openai.Completion.acreate(**kwargs)
+        )
+
     def chatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
         self._populate_args(kwargs, messages=messages, stream=False)
         return typing.cast(
             openai.ChatCompletion, openai.ChatCompletion.create(**kwargs)
         )
 
+    async def achatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
+        self._populate_args(kwargs, messages=messages, stream=False)
+        return typing.cast(
+            openai.ChatCompletion, await openai.ChatCompletion.acreate(**kwargs)
+        )
+
     def iter_chatcompletion(
         self, messages, **kwargs
     ) -> typing.Iterable[openai.ChatCompletion]:
@@ -119,10 +137,22 @@ def iter_chatcompletion(
             openai.ChatCompletion.create(**kwargs),
         )
 
-    def embeddings(self, input, **kwargs):
+    async def aiter_chatcompletion(
+        self, messages, **kwargs
+    ) -> typing.AsyncIterable[openai.ChatCompletion]:
+        self._populate_args(kwargs, messages=messages, stream=True)
+        return typing.cast(
+            typing.AsyncIterable[openai.ChatCompletion],
+            await openai.ChatCompletion.acreate(**kwargs),
+        )
+
+    def embeddings(self, input, **kwargs) -> openai.Embedding:
         self._populate_args(kwargs, input=input)
         return typing.cast(openai.Embedding, openai.Embedding.create(**kwargs))
 
+    async def aembeddings(self, input, **kwargs) -> openai.Embedding:
+        self._populate_args(kwargs, input=input)
+        return typing.cast(openai.Embedding, await openai.Embedding.acreate(**kwargs))
 
 if __name__ == "__main__":
     client = OpenAIClient(
@@ -131,7 +161,15 @@ def embeddings(self, input, **kwargs):
         backend="azure",
     )
     print(client.completion("what is up, my friend?", deployment_id="chatgpt"))
-    print(client.embeddings("What, or what is this?", deployment_id="arch")) # Doesn't work 'cause it is the wrong model...
+    # print(client.embeddings("What, or what is this?", deployment_id="arch")) # Doesn't work 'cause it is the wrong model...
     oaiclient = OpenAIClient()
     print(oaiclient.completion("what is up, my friend?", model="text-davinci-003"))
     print(oaiclient.embeddings("What are embeddings?", model="text-embedding-ada-002"))
+
+    import asyncio
+    async def stream_chat():
+        respco = await client.aiter_completion("what is up, my friend?", deployment_id="chatgpt")
+        async for rsp in respco:
+            print(rsp)
+
+    asyncio.run(stream_chat())

From ac61e451f351b8d3a341a0bc731919b54616ea08 Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Mon, 12 Jun 2023 17:04:35 -0700
Subject: [PATCH 03/18] Added image APIs

---
 openai/client.py | 62 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 5 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index 6f05983afe..c652bfe443 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -1,4 +1,4 @@
-import logging
+import time
 import typing
 
 import openai
@@ -20,10 +20,11 @@ def __init__(self, credential=None):
         self._cached_token = None
 
     def get_token(self) -> str:
-        if self._cached_token is None:
+        if self._cached_token is None or (self._cached_token.expires_on - time.time()) < 300:
             self._cached_token = self._credential.get_token(
                 "https://cognitiveservices.azure.com/.default"
             )
+
         return self._cached_token.token
 
 
@@ -39,6 +40,7 @@ def get_token(self) -> str:
 
 
 class OpenAIClient:
+
     def __init__(
         self,
         *,
@@ -88,6 +90,8 @@ def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> N
             kwargs.setdefault("api_version", self.api_version)
 
         for key, val in overrides.items():
+            if val == ...:
+                continue
             kwargs.setdefault(key, val)
             if kwargs[key] != val:
                 raise ValueError(f"No parameter named `{key}`")
@@ -154,6 +158,50 @@ async def aembeddings(self, input, **kwargs) -> openai.Embedding:
         self._populate_args(kwargs, input=input)
         return typing.cast(openai.Embedding, await openai.Embedding.acreate(**kwargs))
 
+    def image(self, prompt: str, *, n: int = ..., size: str = ...,
+              response_format: str = ..., user: str = ...,
+              **kwargs):
+        self._populate_args(kwargs, prompt = prompt, n  = n, size = size,
+                            response_format = response_format, user = user)
+        return typing.cast(openai.Image, openai.Image.create(**kwargs))
+    
+    async def aimage(self, prompt: str, *, n: int = ..., size: str = ...,
+              response_format: str = ..., user: str = ...,
+              **kwargs):
+        self._populate_args(kwargs, prompt = prompt, n  = n, size = size,
+                            response_format = response_format, user = user)
+        return typing.cast(openai.Image, await openai.Image.acreate(**kwargs))
+
+    def image_variation(self, image: bytes | typing.BinaryIO, *, n: int = ...,
+                        size: str = ..., response_format: str = ...,
+                        user: str = ..., **kwargs):
+        self._populate_args(kwargs, image = image, n  = n, size = size,
+                            response_format = response_format, user = user)
+        return typing.cast(openai.Image, openai.Image.create_variation(**kwargs))
+
+    async def aimage_variation(self, image: bytes | typing.BinaryIO, *, n: int = ...,
+                        size: str = ..., response_format: str = ...,
+                        user: str = ..., **kwargs):
+        self._populate_args(kwargs, image = image, n = n, size = size,
+                            response_format = response_format, user = user)
+        return typing.cast(openai.Image, await openai.Image.acreate_variation(**kwargs))
+
+    def image_edit(self, image: bytes | typing.BinaryIO, prompt: str, *, mask: str = ..., n: int = ...,
+                        size: str = ..., response_format: str = ...,
+                        user: str = ..., **kwargs):
+        self._populate_args(kwargs, image = image, n = n, size = size,
+                            prompt = prompt, mask = mask,
+                            response_format = response_format, user = user)
+        return typing.cast(openai.Image, openai.Image.create_edit(**kwargs))
+    
+    async def aimage_edit(self, image: bytes | typing.BinaryIO, prompt: str, *, mask: str = ..., n: int = ...,
+                        size: str = ..., response_format: str = ...,
+                        user: str = ..., **kwargs):
+        self._populate_args(kwargs, image = image, n = n, size = size,
+                            prompt = prompt, mask = mask,
+                            response_format = response_format, user = user)
+        return typing.cast(openai.Image, await openai.Image.acreate_edit(**kwargs))
+
 if __name__ == "__main__":
     client = OpenAIClient(
         api_base="https://achand-openai-0.openai.azure.com/",
@@ -162,9 +210,6 @@ async def aembeddings(self, input, **kwargs) -> openai.Embedding:
     )
     print(client.completion("what is up, my friend?", deployment_id="chatgpt"))
     # print(client.embeddings("What, or what is this?", deployment_id="arch")) # Doesn't work 'cause it is the wrong model...
-    oaiclient = OpenAIClient()
-    print(oaiclient.completion("what is up, my friend?", model="text-davinci-003"))
-    print(oaiclient.embeddings("What are embeddings?", model="text-embedding-ada-002"))
 
     import asyncio
     async def stream_chat():
@@ -173,3 +218,10 @@ async def stream_chat():
             print(rsp)
 
     asyncio.run(stream_chat())
+    
+
+    oaiclient = OpenAIClient()
+    print(oaiclient.completion("what is up, my friend?", model="text-davinci-003"))
+    print(oaiclient.embeddings("What are embeddings?", model="text-embedding-ada-002"))
+    rsp = oaiclient.image("Happy cattle", response_format="b64_json")
+    print(rsp)
\ No newline at end of file

From b8947689e8407bc75c90d9fb0327faf0cf4a89e8 Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Tue, 13 Jun 2023 16:38:15 -0700
Subject: [PATCH 04/18] Allow the use of `model` as a synonym for
 `deployment_id` when using azure/client abstraction

---
 openai/client.py | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index c652bfe443..ae9048b452 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -81,8 +81,8 @@ def __init__(
             )
 
     def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> None:
-        backend = self.backend
-
+        """Populate default arguments based on the current client configuration/defaults
+        """
         kwargs.setdefault("api_base", self.api_base or openai.api_base)
         kwargs.setdefault("api_key", self.auth.get_token())
         kwargs.setdefault("api_type", self.api_type)
@@ -94,20 +94,41 @@ def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> N
                 continue
             kwargs.setdefault(key, val)
             if kwargs[key] != val:
-                raise ValueError(f"No parameter named `{key}`")
+                raise TypeError(f"No parameter named `{key}`")
+
+    def _normalize_model(self, kwargs: typing.Dict[str, typing.Any]):
+        """Normalize model/engine/deployment_id based on which backend the client is
+           configured to target.
+
+           Specifically, it will pass the provided `model` parameter as `deployment_id`
+           unless `deployment_id` is explicitly passed in.
+        """
+        if len([param for param in kwargs if param in ('deployment_id', 'model', 'engine')]) != 1:
+            raise TypeError('You can only specify one of `deployment_id`, `model` and `engine`')
+        
+        if self.backend == 'azure':
+            try:
+                # We'll try to "rename" the `model` keyword to fit azure's `deployment_id`
+                # paradigm
+                kwargs['deployment_id'] = kwargs.pop('model')
+            except KeyError:
+                pass
 
     def completion(self, prompt: str, **kwargs) -> openai.Completion:
         self._populate_args(kwargs, prompt=prompt, stream=False)
+        self._normalize_model(kwargs)
         return typing.cast(openai.Completion, openai.Completion.create(**kwargs))
 
     async def acompletion(self, prompt: str, **kwargs) -> openai.Completion:
         self._populate_args(kwargs, prompt=prompt, stream=False)
+        self._normalize_model(kwargs)
         return typing.cast(openai.Completion, await openai.Completion.acreate(**kwargs))
 
     def iter_completion(
         self, prompt: str, **kwargs
     ) -> typing.Iterable[openai.Completion]:
         self._populate_args(kwargs, prompt=prompt, stream=True)
+        self._normalize_model(kwargs)
         return typing.cast(
             typing.Iterable[openai.Completion], openai.Completion.create(**kwargs)
         )
@@ -116,18 +137,21 @@ async def aiter_completion(
         self, prompt: str, **kwargs
     ) -> typing.AsyncIterable[openai.Completion]:
         self._populate_args(kwargs, prompt=prompt, stream=True)
+        self._normalize_model(kwargs)
         return typing.cast(
             typing.AsyncIterable[openai.Completion], await openai.Completion.acreate(**kwargs)
         )
 
     def chatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
         self._populate_args(kwargs, messages=messages, stream=False)
+        self._normalize_model(kwargs)
         return typing.cast(
             openai.ChatCompletion, openai.ChatCompletion.create(**kwargs)
         )
 
     async def achatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
         self._populate_args(kwargs, messages=messages, stream=False)
+        self._normalize_model(kwargs)
         return typing.cast(
             openai.ChatCompletion, await openai.ChatCompletion.acreate(**kwargs)
         )
@@ -136,6 +160,7 @@ def iter_chatcompletion(
         self, messages, **kwargs
     ) -> typing.Iterable[openai.ChatCompletion]:
         self._populate_args(kwargs, messages=messages, stream=True)
+        self._normalize_model(kwargs)
         return typing.cast(
             typing.Iterable[openai.ChatCompletion],
             openai.ChatCompletion.create(**kwargs),
@@ -145,6 +170,7 @@ async def aiter_chatcompletion(
         self, messages, **kwargs
     ) -> typing.AsyncIterable[openai.ChatCompletion]:
         self._populate_args(kwargs, messages=messages, stream=True)
+        self._normalize_model(kwargs)
         return typing.cast(
             typing.AsyncIterable[openai.ChatCompletion],
             await openai.ChatCompletion.acreate(**kwargs),
@@ -152,10 +178,12 @@ async def aiter_chatcompletion(
 
     def embeddings(self, input, **kwargs) -> openai.Embedding:
         self._populate_args(kwargs, input=input)
+        self._normalize_model(kwargs)
         return typing.cast(openai.Embedding, openai.Embedding.create(**kwargs))
 
     async def aembeddings(self, input, **kwargs) -> openai.Embedding:
         self._populate_args(kwargs, input=input)
+        self._normalize_model(kwargs)
         return typing.cast(openai.Embedding, await openai.Embedding.acreate(**kwargs))
 
     def image(self, prompt: str, *, n: int = ..., size: str = ...,
@@ -208,12 +236,12 @@ async def aimage_edit(self, image: bytes | typing.BinaryIO, prompt: str, *, mask
         auth="azuredefault",
         backend="azure",
     )
-    print(client.completion("what is up, my friend?", deployment_id="chatgpt"))
-    # print(client.embeddings("What, or what is this?", deployment_id="arch")) # Doesn't work 'cause it is the wrong model...
+    print(client.completion("what is up, my friend?", model="chatgpt"))
+    # print(client.embeddings("What, or what is this?", model="arch")) # Doesn't work 'cause it is the wrong model...
 
     import asyncio
     async def stream_chat():
-        respco = await client.aiter_completion("what is up, my friend?", deployment_id="chatgpt")
+        respco = await client.aiter_completion("what is up, my friend?", model="chatgpt")
         async for rsp in respco:
             print(rsp)
 

From b4fdcd6eaf3dabd94e202e0478f699d39af01d80 Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Tue, 13 Jun 2023 16:41:25 -0700
Subject: [PATCH 05/18] Formatteding client.py (black)

---
 openai/client.py | 200 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 150 insertions(+), 50 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index ae9048b452..11477aa979 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -20,7 +20,10 @@ def __init__(self, credential=None):
         self._cached_token = None
 
     def get_token(self) -> str:
-        if self._cached_token is None or (self._cached_token.expires_on - time.time()) < 300:
+        if (
+            self._cached_token is None
+            or (self._cached_token.expires_on - time.time()) < 300
+        ):
             self._cached_token = self._credential.get_token(
                 "https://cognitiveservices.azure.com/.default"
             )
@@ -40,7 +43,6 @@ def get_token(self) -> str:
 
 
 class OpenAIClient:
-
     def __init__(
         self,
         *,
@@ -81,8 +83,7 @@ def __init__(
             )
 
     def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> None:
-        """Populate default arguments based on the current client configuration/defaults
-        """
+        """Populate default arguments based on the current client configuration/defaults"""
         kwargs.setdefault("api_base", self.api_base or openai.api_base)
         kwargs.setdefault("api_key", self.auth.get_token())
         kwargs.setdefault("api_type", self.api_type)
@@ -98,19 +99,30 @@ def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> N
 
     def _normalize_model(self, kwargs: typing.Dict[str, typing.Any]):
         """Normalize model/engine/deployment_id based on which backend the client is
-           configured to target.
+        configured to target.
 
-           Specifically, it will pass the provided `model` parameter as `deployment_id`
-           unless `deployment_id` is explicitly passed in.
+        Specifically, it will pass the provided `model` parameter as `deployment_id`
+        unless `deployment_id` is explicitly passed in.
         """
-        if len([param for param in kwargs if param in ('deployment_id', 'model', 'engine')]) != 1:
-            raise TypeError('You can only specify one of `deployment_id`, `model` and `engine`')
-        
-        if self.backend == 'azure':
+        if (
+            len(
+                [
+                    param
+                    for param in kwargs
+                    if param in ("deployment_id", "model", "engine")
+                ]
+            )
+            != 1
+        ):
+            raise TypeError(
+                "You can only specify one of `deployment_id`, `model` and `engine`"
+            )
+
+        if self.backend == "azure":
             try:
                 # We'll try to "rename" the `model` keyword to fit azure's `deployment_id`
                 # paradigm
-                kwargs['deployment_id'] = kwargs.pop('model')
+                kwargs["deployment_id"] = kwargs.pop("model")
             except KeyError:
                 pass
 
@@ -139,7 +151,8 @@ async def aiter_completion(
         self._populate_args(kwargs, prompt=prompt, stream=True)
         self._normalize_model(kwargs)
         return typing.cast(
-            typing.AsyncIterable[openai.Completion], await openai.Completion.acreate(**kwargs)
+            typing.AsyncIterable[openai.Completion],
+            await openai.Completion.acreate(**kwargs),
         )
 
     def chatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
@@ -186,50 +199,135 @@ async def aembeddings(self, input, **kwargs) -> openai.Embedding:
         self._normalize_model(kwargs)
         return typing.cast(openai.Embedding, await openai.Embedding.acreate(**kwargs))
 
-    def image(self, prompt: str, *, n: int = ..., size: str = ...,
-              response_format: str = ..., user: str = ...,
-              **kwargs):
-        self._populate_args(kwargs, prompt = prompt, n  = n, size = size,
-                            response_format = response_format, user = user)
+    def image(
+        self,
+        prompt: str,
+        *,
+        n: int = ...,
+        size: str = ...,
+        response_format: str = ...,
+        user: str = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            prompt=prompt,
+            n=n,
+            size=size,
+            response_format=response_format,
+            user=user,
+        )
         return typing.cast(openai.Image, openai.Image.create(**kwargs))
-    
-    async def aimage(self, prompt: str, *, n: int = ..., size: str = ...,
-              response_format: str = ..., user: str = ...,
-              **kwargs):
-        self._populate_args(kwargs, prompt = prompt, n  = n, size = size,
-                            response_format = response_format, user = user)
+
+    async def aimage(
+        self,
+        prompt: str,
+        *,
+        n: int = ...,
+        size: str = ...,
+        response_format: str = ...,
+        user: str = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            prompt=prompt,
+            n=n,
+            size=size,
+            response_format=response_format,
+            user=user,
+        )
         return typing.cast(openai.Image, await openai.Image.acreate(**kwargs))
 
-    def image_variation(self, image: bytes | typing.BinaryIO, *, n: int = ...,
-                        size: str = ..., response_format: str = ...,
-                        user: str = ..., **kwargs):
-        self._populate_args(kwargs, image = image, n  = n, size = size,
-                            response_format = response_format, user = user)
+    def image_variation(
+        self,
+        image: bytes | typing.BinaryIO,
+        *,
+        n: int = ...,
+        size: str = ...,
+        response_format: str = ...,
+        user: str = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            image=image,
+            n=n,
+            size=size,
+            response_format=response_format,
+            user=user,
+        )
         return typing.cast(openai.Image, openai.Image.create_variation(**kwargs))
 
-    async def aimage_variation(self, image: bytes | typing.BinaryIO, *, n: int = ...,
-                        size: str = ..., response_format: str = ...,
-                        user: str = ..., **kwargs):
-        self._populate_args(kwargs, image = image, n = n, size = size,
-                            response_format = response_format, user = user)
+    async def aimage_variation(
+        self,
+        image: bytes | typing.BinaryIO,
+        *,
+        n: int = ...,
+        size: str = ...,
+        response_format: str = ...,
+        user: str = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            image=image,
+            n=n,
+            size=size,
+            response_format=response_format,
+            user=user,
+        )
         return typing.cast(openai.Image, await openai.Image.acreate_variation(**kwargs))
 
-    def image_edit(self, image: bytes | typing.BinaryIO, prompt: str, *, mask: str = ..., n: int = ...,
-                        size: str = ..., response_format: str = ...,
-                        user: str = ..., **kwargs):
-        self._populate_args(kwargs, image = image, n = n, size = size,
-                            prompt = prompt, mask = mask,
-                            response_format = response_format, user = user)
+    def image_edit(
+        self,
+        image: bytes | typing.BinaryIO,
+        prompt: str,
+        *,
+        mask: str = ...,
+        n: int = ...,
+        size: str = ...,
+        response_format: str = ...,
+        user: str = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            image=image,
+            n=n,
+            size=size,
+            prompt=prompt,
+            mask=mask,
+            response_format=response_format,
+            user=user,
+        )
         return typing.cast(openai.Image, openai.Image.create_edit(**kwargs))
-    
-    async def aimage_edit(self, image: bytes | typing.BinaryIO, prompt: str, *, mask: str = ..., n: int = ...,
-                        size: str = ..., response_format: str = ...,
-                        user: str = ..., **kwargs):
-        self._populate_args(kwargs, image = image, n = n, size = size,
-                            prompt = prompt, mask = mask,
-                            response_format = response_format, user = user)
+
+    async def aimage_edit(
+        self,
+        image: bytes | typing.BinaryIO,
+        prompt: str,
+        *,
+        mask: str = ...,
+        n: int = ...,
+        size: str = ...,
+        response_format: str = ...,
+        user: str = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            image=image,
+            n=n,
+            size=size,
+            prompt=prompt,
+            mask=mask,
+            response_format=response_format,
+            user=user,
+        )
         return typing.cast(openai.Image, await openai.Image.acreate_edit(**kwargs))
 
+
 if __name__ == "__main__":
     client = OpenAIClient(
         api_base="https://achand-openai-0.openai.azure.com/",
@@ -240,16 +338,18 @@ async def aimage_edit(self, image: bytes | typing.BinaryIO, prompt: str, *, mask
     # print(client.embeddings("What, or what is this?", model="arch")) # Doesn't work 'cause it is the wrong model...
 
     import asyncio
+
     async def stream_chat():
-        respco = await client.aiter_completion("what is up, my friend?", model="chatgpt")
+        respco = await client.aiter_completion(
+            "what is up, my friend?", model="chatgpt"
+        )
         async for rsp in respco:
             print(rsp)
 
     asyncio.run(stream_chat())
-    
 
     oaiclient = OpenAIClient()
     print(oaiclient.completion("what is up, my friend?", model="text-davinci-003"))
     print(oaiclient.embeddings("What are embeddings?", model="text-embedding-ada-002"))
     rsp = oaiclient.image("Happy cattle", response_format="b64_json")
-    print(rsp)
\ No newline at end of file
+    print(rsp)

From a4f45ef947d1ea80a84df4d3c47253e8d2a1a9c5 Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Wed, 14 Jun 2023 15:29:39 -0700
Subject: [PATCH 06/18] Added basic tests, more validation for client

---
 openai/client.py            |  47 +++++++++++--
 openai/tests/test_client.py | 130 ++++++++++++++++++++++++++++++++++++
 2 files changed, 172 insertions(+), 5 deletions(-)
 create mode 100644 openai/tests/test_client.py

diff --git a/openai/client.py b/openai/client.py
index 11477aa979..9f7002b357 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -7,7 +7,16 @@
 
 
 class AzureTokenAuth:
-    def __init__(self, credential=None):
+    """Authentication using an Azure AD token.
+    """
+
+    def __repr__(self):
+        return f"AzureTokenAuth({type(self._credential)})"
+
+    def __init__(self, *, credential=None):
+        """Create a new AzureTokenAuth instance. If no credential is passed, 
+        it will use ~azure.identity.DefaultAzureCredential
+        """
         if not credential:
             try:
                 import azure.identity
@@ -32,10 +41,17 @@ def get_token(self) -> str:
 
 
 class ApiKeyAuth:
+    """Authentication using an API key"""
+
+    def __repr__(self):
+        return f"ApiKeyAuth(api_key=<redacted>)"
+    
     def __init__(self, key: str = ""):
+        """Create a new ApiKeyAuth instance. If no key is passed, it will use ~openai.api_key"""
         self.key = key or openai.api_key
 
     def get_token(self) -> str:
+        """Get the API key"""
         return self.key
 
 
@@ -51,7 +67,17 @@ def __init__(
         api_version: str = "",
         backend: Backends = "",
     ):
-        self.api_base = api_base or openai.api_base
+        """Create a new OpenAI client.
+           
+           :param: api_base: The base URL for the API. If not specified, based on ~opeanai.api_base
+           :param: auth: The authentication method or key to use. If the string value "azuredefault" is passed, it will use ~azure.identity.DefaultAzureCredential
+           :param: api_version: The API version to use. If not specified, based on ~openai.api_version or ~openai.client.LATEST_AZURE_API_VERSION.
+           :param backend: One of 'azure' or 'openai'. If not specified, inferred from the auth method or ~openai.api_type
+        """
+
+        #
+        # This code is a bit messy, but it's because we want to hide the messiness from the caller.
+        #
         if auth == "azuredefault":
             self.auth = AzureTokenAuth()
         elif isinstance(auth, str):
@@ -61,7 +87,7 @@ def __init__(
 
         # Pick up api type from parameter or environment
         backend = backend or (
-            "azure" if openai.api_type in ("azure", "azure_ad") else "openai"
+            "azure" if openai.api_type in ("azure", "azure_ad") or isinstance(auth, AzureTokenAuth) else "openai"
         )
 
         self.backend = backend
@@ -74,14 +100,25 @@ def __init__(
                 self.api_type = "azure_ad"
             else:
                 self.api_type = "azure"
-        elif backend in ("openai", ""):
+        elif backend == "openai":
             self.api_type = "open_ai"
             self.api_version = api_version or openai.api_version
         else:
             raise ValueError(
                 f'Unknown `backend` {backend} - expected one of "azure" or "openai"'
             )
+        if not api_base and backend == 'azure':
+            raise ValueError("You have to specify `api_base` for the azure backend.")
+        self.api_base = api_base or openai.api_base
 
+    def __repr__(self): 
+        constructor_args = [
+            f"{name}={repr(value)}"
+            for name, value in self.__dict__.items()
+            if value is not None
+        ]
+        return f"OpenAIClient({','.join(constructor_args)})"
+    
     def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> None:
         """Populate default arguments based on the current client configuration/defaults"""
         kwargs.setdefault("api_base", self.api_base or openai.api_base)
@@ -115,7 +152,7 @@ def _normalize_model(self, kwargs: typing.Dict[str, typing.Any]):
             != 1
         ):
             raise TypeError(
-                "You can only specify one of `deployment_id`, `model` and `engine`"
+                "You must specify exactly one of `deployment_id`, `model` and `engine`"
             )
 
         if self.backend == "azure":
diff --git a/openai/tests/test_client.py b/openai/tests/test_client.py
new file mode 100644
index 0000000000..b59646c2ea
--- /dev/null
+++ b/openai/tests/test_client.py
@@ -0,0 +1,130 @@
+import pytest
+
+import openai.client
+
+@pytest.fixture
+def clear_oai_module(monkeypatch: pytest.MonkeyPatch):
+    for key in [ 'api_base', 'api_key', 'api_type', 'api_version']:
+        ...
+    monkeypatch.setattr(openai, 'api_base', "https://api.openai.com/v1")
+    monkeypatch.setattr(openai, 'api_key', None)
+    monkeypatch.setattr(openai, 'api_type', "open_ai")
+    monkeypatch.setattr(openai, 'api_version', None)
+
+def setup_oai_module(monkeypatch: pytest.MonkeyPatch, **kwargs):
+    for n, v in kwargs.items():
+        monkeypatch.setattr(openai, n, v)
+        
+def test_construct_client(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+    setup_oai_module(monkeypatch, api_key=None)
+    client = openai.client.OpenAIClient()
+    assert client.api_base == openai.api_base
+    assert client.api_type == openai.api_type
+    assert client.auth.get_token() is None
+
+def test_construct_azure_client(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+    setup_oai_module(monkeypatch, api_key=None, api_base='something different')
+
+    provided_api_base = 'https://contoso.microsoft.com'
+    client = openai.client.OpenAIClient(api_base=provided_api_base, backend='azure')
+    assert client.api_base == provided_api_base
+    assert client.api_type == 'azure'
+    assert client.auth.get_token() is None
+
+def test_construct_azure_client_aad(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+    provided_api_base = 'https://contoso.microsoft.com'
+    def mock_get_token(*args, **kwargs):
+        return 'expected token'
+    monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
+
+    client = openai.client.OpenAIClient(api_base=provided_api_base, backend='azure', auth=openai.client.AzureTokenAuth(credential='dummy'))
+    assert client.api_base == provided_api_base
+    assert client.api_type == 'azure_ad'
+    assert client.auth.get_token() == 'expected token'
+
+def test_construct_azure_client_api_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+    provided_api_base = 'https://contoso.microsoft.com'
+    client = openai.client.OpenAIClient(api_base=provided_api_base, backend='azure', auth='secret key')
+    assert client.api_base == provided_api_base
+    assert client.api_type == 'azure'
+    assert client.auth.get_token() == 'secret key'
+
+def test_construct_openai_client_api_key():
+    client = openai.client.OpenAIClient(auth='secret key')
+    assert client.api_base == openai.api_base
+    assert client.api_type == 'open_ai'
+    assert client.auth.get_token() == 'secret key'
+
+@pytest.fixture
+def embedding_response():
+    return   {
+                "object": "list",
+                "data": [
+                    {
+                    "object": "embedding",
+                    "embedding": [
+                        0.0023064255,
+                        -0.009327292,
+                        -0.0028842222,
+                    ],
+                    "index": 0
+                    }
+                ],
+                "model": "text-embedding-ada-002",
+                "usage": {
+                    "prompt_tokens": 8,
+                    "total_tokens": 8
+                }
+        }
+
+def test_make_call_client_aad(monkeypatch: pytest.MonkeyPatch, clear_oai_module, embedding_response):
+    provided_api_base = 'https://contoso.microsoft.com'
+    def mock_get_token(*args, **kwargs):
+        return 'expected token'
+    
+    def mock_embeddings_response(*args, **kwargs):
+        assert kwargs.get('deployment_id') == 'das deployment'
+        assert kwargs.get('api_version') == openai.client.LATEST_AZURE_API_VERSION
+        assert kwargs.get('api_type') == 'azure_ad'
+        return embedding_response
+
+    monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
+    monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
+
+    client = openai.client.OpenAIClient(backend='azure', api_base = provided_api_base, auth=openai.client.AzureTokenAuth(credential='dummy'))
+    client.embeddings("some data", model='das deployment')
+
+
+def test_make_call_client_azure_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module, embedding_response):
+    provided_api_base = 'https://contoso.microsoft.com'
+    def mock_get_token(*args, **kwargs):
+        return 'expected token'
+    def mock_embeddings_response(*args, **kwargs):
+        assert kwargs.get('deployment_id') == 'das deployment'
+        assert kwargs.get('api_version') == openai.client.LATEST_AZURE_API_VERSION
+        assert kwargs.get('api_type') == 'azure'
+        assert kwargs.get('api_key', 'secret key')
+        return embedding_response
+    
+    monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
+    monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
+
+    client = openai.client.OpenAIClient(backend='azure', api_base = provided_api_base, auth="secret key")
+    client.embeddings("some data", model='das deployment')
+
+
+def test_make_call_client_oai_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module, embedding_response):
+    provided_api_base = 'https://contoso.microsoft.com'
+    def mock_get_token(*args, **kwargs):
+        return 'expected token'
+    def mock_embeddings_response(*args, **kwargs):
+        assert kwargs.get('model') == 'das model'
+        assert kwargs.get('api_type') == 'open_ai'
+        assert kwargs.get('api_key', 'secret key')
+        return embedding_response
+    
+    monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
+    monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
+
+    client = openai.client.OpenAIClient(auth="secret key")
+    client.embeddings("some data", model='das model')
\ No newline at end of file

From 49c7c8bdf0b4cca5866fe30253875c2b5a87f9cf Mon Sep 17 00:00:00 2001
From: "Johan Stenberg (MSFT)" <johan.stenberg@microsoft.com>
Date: Thu, 15 Jun 2023 14:45:53 -0700
Subject: [PATCH 07/18] Update openai/client.py

Add missing `azuread` api_type support (for environment variables)

Co-authored-by: Krista Pratico <krpratic@microsoft.com>
---
 openai/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openai/client.py b/openai/client.py
index 9f7002b357..ed9cc0d2f6 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -87,7 +87,7 @@ def __init__(
 
         # Pick up api type from parameter or environment
         backend = backend or (
-            "azure" if openai.api_type in ("azure", "azure_ad") or isinstance(auth, AzureTokenAuth) else "openai"
+            "azure" if openai.api_type in ("azure", "azure_ad", "azuread") or isinstance(auth, AzureTokenAuth) else "openai"
         )
 
         self.backend = backend

From bd4960c653e72ac985018c684cc68630d293b1db Mon Sep 17 00:00:00 2001
From: "Johan Stenberg (MSFT)" <johan.stenberg@microsoft.com>
Date: Thu, 15 Jun 2023 15:39:28 -0700
Subject: [PATCH 08/18] Update openai/client.py

Co-authored-by: Krista Pratico <krpratic@microsoft.com>
---
 openai/client.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index ed9cc0d2f6..b5ad21ba96 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -69,10 +69,11 @@ def __init__(
     ):
         """Create a new OpenAI client.
            
-           :param: api_base: The base URL for the API. If not specified, based on ~opeanai.api_base
-           :param: auth: The authentication method or key to use. If the string value "azuredefault" is passed, it will use ~azure.identity.DefaultAzureCredential
-           :param: api_version: The API version to use. If not specified, based on ~openai.api_version or ~openai.client.LATEST_AZURE_API_VERSION.
-           :param backend: One of 'azure' or 'openai'. If not specified, inferred from the auth method or ~openai.api_type
+       :keyword str api_base: The base URL for the API. If not specified, based on ~opeanai.api_base
+       :keyword auth: The authentication method or key to use. If the string value "azuredefault" is passed, it will use ~azure.identity.DefaultAzureCredential
+       :paramtype auth: str or ~openai.client.ApiKeyAuth or ~openai.client.AzureTokenAuth
+       :keyword str api_version: The API version to use. If not specified, based on ~openai.api_version or ~openai.client.LATEST_AZURE_API_VERSION.
+       :keyword str backend: One of 'azure' or 'openai'. If not specified, inferred from the auth method or ~openai.api_type
         """
 
         #

From 91e9b581607c2dcee00b88069990a29a9d358078 Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Fri, 16 Jun 2023 17:28:36 -0700
Subject: [PATCH 09/18] Review feedback

---
 openai/client.py | 86 ++++++++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index 9f7002b357..fc1bbd532d 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -1,5 +1,11 @@
+import sys
 import time
-import typing
+
+from typing import Union, Dict, Any, cast, Iterable, BinaryIO
+if sys.version_info >= (3, 8):
+    from typing import Literal, AsyncIterable
+else:
+    from typing_extensions import Literal, AsyncIterable
 
 import openai
 
@@ -55,7 +61,7 @@ def get_token(self) -> str:
         return self.key
 
 
-Backends = typing.Literal["azure", "openai", ""]
+Backends = Literal["azure", "openai", ""]
 
 
 class OpenAIClient:
@@ -63,9 +69,10 @@ def __init__(
         self,
         *,
         api_base: str = "",
-        auth: typing.Union[str, ApiKeyAuth, AzureTokenAuth] = "",
+        auth: Union[str, ApiKeyAuth, AzureTokenAuth] = "",
         api_version: str = "",
         backend: Backends = "",
+        organization: str = "",
     ):
         """Create a new OpenAI client.
            
@@ -86,13 +93,11 @@ def __init__(
             self.auth = auth
 
         # Pick up api type from parameter or environment
-        backend = backend or (
+        self.backend = backend or (
             "azure" if openai.api_type in ("azure", "azure_ad") or isinstance(auth, AzureTokenAuth) else "openai"
         )
 
-        self.backend = backend
-
-        if backend == "azure":
+        if self.backend == "azure":
             self.api_version = (
                 api_version or openai.api_version or LATEST_AZURE_API_VERSION
             )
@@ -100,16 +105,18 @@ def __init__(
                 self.api_type = "azure_ad"
             else:
                 self.api_type = "azure"
-        elif backend == "openai":
+        elif self.backend == "openai":
             self.api_type = "open_ai"
             self.api_version = api_version or openai.api_version
         else:
             raise ValueError(
                 f'Unknown `backend` {backend} - expected one of "azure" or "openai"'
             )
-        if not api_base and backend == 'azure':
-            raise ValueError("You have to specify `api_base` for the azure backend.")
+
         self.api_base = api_base or openai.api_base
+        self.organization = organization or openai.organization
+        if self.backend == 'azure' and self.api_base == "https://api.openai.com/v1"
+            raise ValueError("You are using the 'openai.com' endpoint with an Azure credential or API type. Please provide the endpoint to your Azure resource instead.")
 
     def __repr__(self): 
         constructor_args = [
@@ -119,11 +126,12 @@ def __repr__(self):
         ]
         return f"OpenAIClient({','.join(constructor_args)})"
     
-    def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> None:
+    def _populate_args(self, kwargs: Dict[str, Any], **overrides) -> None:
         """Populate default arguments based on the current client configuration/defaults"""
         kwargs.setdefault("api_base", self.api_base or openai.api_base)
         kwargs.setdefault("api_key", self.auth.get_token())
         kwargs.setdefault("api_type", self.api_type)
+        kwargs.setdefault("organization", self.organization)
         if self.api_version:
             kwargs.setdefault("api_version", self.api_version)
 
@@ -134,7 +142,7 @@ def _populate_args(self, kwargs: typing.Dict[str, typing.Any], **overrides) -> N
             if kwargs[key] != val:
                 raise TypeError(f"No parameter named `{key}`")
 
-    def _normalize_model(self, kwargs: typing.Dict[str, typing.Any]):
+    def _normalize_model(self, kwargs: Dict[str, Any]):
         """Normalize model/engine/deployment_id based on which backend the client is
         configured to target.
 
@@ -166,75 +174,75 @@ def _normalize_model(self, kwargs: typing.Dict[str, typing.Any]):
     def completion(self, prompt: str, **kwargs) -> openai.Completion:
         self._populate_args(kwargs, prompt=prompt, stream=False)
         self._normalize_model(kwargs)
-        return typing.cast(openai.Completion, openai.Completion.create(**kwargs))
+        return cast(openai.Completion, openai.Completion.create(**kwargs))
 
     async def acompletion(self, prompt: str, **kwargs) -> openai.Completion:
         self._populate_args(kwargs, prompt=prompt, stream=False)
         self._normalize_model(kwargs)
-        return typing.cast(openai.Completion, await openai.Completion.acreate(**kwargs))
+        return cast(openai.Completion, await openai.Completion.acreate(**kwargs))
 
     def iter_completion(
         self, prompt: str, **kwargs
-    ) -> typing.Iterable[openai.Completion]:
+    ) -> Iterable[openai.Completion]:
         self._populate_args(kwargs, prompt=prompt, stream=True)
         self._normalize_model(kwargs)
-        return typing.cast(
-            typing.Iterable[openai.Completion], openai.Completion.create(**kwargs)
+        return cast(
+            Iterable[openai.Completion], openai.Completion.create(**kwargs)
         )
 
     async def aiter_completion(
         self, prompt: str, **kwargs
-    ) -> typing.AsyncIterable[openai.Completion]:
+    ) -> AsyncIterable[openai.Completion]:
         self._populate_args(kwargs, prompt=prompt, stream=True)
         self._normalize_model(kwargs)
-        return typing.cast(
-            typing.AsyncIterable[openai.Completion],
+        return cast(
+            AsyncIterable[openai.Completion],
             await openai.Completion.acreate(**kwargs),
         )
 
     def chatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
         self._populate_args(kwargs, messages=messages, stream=False)
         self._normalize_model(kwargs)
-        return typing.cast(
+        return cast(
             openai.ChatCompletion, openai.ChatCompletion.create(**kwargs)
         )
 
     async def achatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
         self._populate_args(kwargs, messages=messages, stream=False)
         self._normalize_model(kwargs)
-        return typing.cast(
+        return cast(
             openai.ChatCompletion, await openai.ChatCompletion.acreate(**kwargs)
         )
 
     def iter_chatcompletion(
         self, messages, **kwargs
-    ) -> typing.Iterable[openai.ChatCompletion]:
+    ) -> Iterable[openai.ChatCompletion]:
         self._populate_args(kwargs, messages=messages, stream=True)
         self._normalize_model(kwargs)
-        return typing.cast(
-            typing.Iterable[openai.ChatCompletion],
+        return cast(
+            Iterable[openai.ChatCompletion],
             openai.ChatCompletion.create(**kwargs),
         )
 
     async def aiter_chatcompletion(
         self, messages, **kwargs
-    ) -> typing.AsyncIterable[openai.ChatCompletion]:
+    ) -> AsyncIterable[openai.ChatCompletion]:
         self._populate_args(kwargs, messages=messages, stream=True)
         self._normalize_model(kwargs)
-        return typing.cast(
-            typing.AsyncIterable[openai.ChatCompletion],
+        return cast(
+            AsyncIterable[openai.ChatCompletion],
             await openai.ChatCompletion.acreate(**kwargs),
         )
 
     def embeddings(self, input, **kwargs) -> openai.Embedding:
         self._populate_args(kwargs, input=input)
         self._normalize_model(kwargs)
-        return typing.cast(openai.Embedding, openai.Embedding.create(**kwargs))
+        return cast(openai.Embedding, openai.Embedding.create(**kwargs))
 
     async def aembeddings(self, input, **kwargs) -> openai.Embedding:
         self._populate_args(kwargs, input=input)
         self._normalize_model(kwargs)
-        return typing.cast(openai.Embedding, await openai.Embedding.acreate(**kwargs))
+        return cast(openai.Embedding, await openai.Embedding.acreate(**kwargs))
 
     def image(
         self,
@@ -254,7 +262,7 @@ def image(
             response_format=response_format,
             user=user,
         )
-        return typing.cast(openai.Image, openai.Image.create(**kwargs))
+        return cast(openai.Image, openai.Image.create(**kwargs))
 
     async def aimage(
         self,
@@ -274,11 +282,11 @@ async def aimage(
             response_format=response_format,
             user=user,
         )
-        return typing.cast(openai.Image, await openai.Image.acreate(**kwargs))
+        return cast(openai.Image, await openai.Image.acreate(**kwargs))
 
     def image_variation(
         self,
-        image: bytes | typing.BinaryIO,
+        image: Union[bytes,  BinaryIO],
         *,
         n: int = ...,
         size: str = ...,
@@ -294,11 +302,11 @@ def image_variation(
             response_format=response_format,
             user=user,
         )
-        return typing.cast(openai.Image, openai.Image.create_variation(**kwargs))
+        return cast(openai.Image, openai.Image.create_variation(**kwargs))
 
     async def aimage_variation(
         self,
-        image: bytes | typing.BinaryIO,
+        image: Union[bytes, BinaryIO],
         *,
         n: int = ...,
         size: str = ...,
@@ -314,11 +322,11 @@ async def aimage_variation(
             response_format=response_format,
             user=user,
         )
-        return typing.cast(openai.Image, await openai.Image.acreate_variation(**kwargs))
+        return cast(openai.Image, await openai.Image.acreate_variation(**kwargs))
 
     def image_edit(
         self,
-        image: bytes | typing.BinaryIO,
+        image: Union[bytes, BinaryIO],
         prompt: str,
         *,
         mask: str = ...,
@@ -338,11 +346,11 @@ def image_edit(
             response_format=response_format,
             user=user,
         )
-        return typing.cast(openai.Image, openai.Image.create_edit(**kwargs))
+        return cast(openai.Image, openai.Image.create_edit(**kwargs))
 
     async def aimage_edit(
         self,
-        image: bytes | typing.BinaryIO,
+        image: Union[bytes, BinaryIO],
         prompt: str,
         *,
         mask: str = ...,

From 7a3160281b8057b893d205c4de295a402a171989 Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Fri, 16 Jun 2023 17:33:08 -0700
Subject: [PATCH 10/18] Fix tests

---
 openai/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openai/client.py b/openai/client.py
index 34b9afa046..0eef61f0ce 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -116,7 +116,7 @@ def __init__(
 
         self.api_base = api_base or openai.api_base
         self.organization = organization or openai.organization
-        if self.backend == 'azure' and self.api_base == "https://api.openai.com/v1"
+        if self.backend == 'azure' and self.api_base == "https://api.openai.com/v1":
             raise ValueError("You are using the 'openai.com' endpoint with an Azure credential or API type. Please provide the endpoint to your Azure resource instead.")
 
     def __repr__(self): 

From e3b59de1fe2dbabfa504a3fa49babd76161e78a6 Mon Sep 17 00:00:00 2001
From: Krista Pratico <krpratic@microsoft.com>
Date: Tue, 20 Jun 2023 16:27:54 -0700
Subject: [PATCH 11/18] adding docstrings initial

---
 openai/client.py | 78 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 62 insertions(+), 16 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index 0eef61f0ce..5bb5fab06d 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -1,7 +1,16 @@
 import sys
 import time
 
-from typing import Union, Dict, Any, cast, Iterable, BinaryIO
+from typing import (
+    Union,
+    Dict,
+    Any,
+    cast,
+    Iterable,
+    BinaryIO,
+    Optional,
+    TYPE_CHECKING
+)
 if sys.version_info >= (3, 8):
     from typing import Literal, AsyncIterable
 else:
@@ -9,19 +18,28 @@
 
 import openai
 
+if TYPE_CHECKING:
+    from azure.core.credentials import TokenCredential
+
+
 LATEST_AZURE_API_VERSION = "2023-05-15"
 
 
 class AzureTokenAuth:
-    """Authentication using an Azure AD token.
+    """Authentication using an Azure Active Directory token.
     """
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return f"AzureTokenAuth({type(self._credential)})"
 
-    def __init__(self, *, credential=None):
-        """Create a new AzureTokenAuth instance. If no credential is passed, 
-        it will use ~azure.identity.DefaultAzureCredential
+    def __init__(self, *, credential: Optional["TokenCredential"] = None) -> None:
+        """Create a new AzureTokenAuth instance. Requires the
+        azure-identity package.
+
+        :keyword credential: A credential type from the azure.identity library.
+         If no credential is passed, it will use ~azure.identity.DefaultAzureCredential. 
+        :paramtype credential: ~azure.core.credentials.TokenCredential or 
+         ~azure.identity.DefaultAzureCredential 
         """
         if not credential:
             try:
@@ -35,6 +53,13 @@ def __init__(self, *, credential=None):
         self._cached_token = None
 
     def get_token(self) -> str:
+        """Gets a valid AAD token to authenticate the request.
+        
+        .. note:: 
+
+            Do not directly interact with this API, it will be called
+            automatically when a token is needed for the request.
+        """
         if (
             self._cached_token is None
             or (self._cached_token.expires_on - time.time()) < 300
@@ -49,15 +74,25 @@ def get_token(self) -> str:
 class ApiKeyAuth:
     """Authentication using an API key"""
 
-    def __repr__(self):
-        return f"ApiKeyAuth(api_key=<redacted>)"
-    
-    def __init__(self, key: str = ""):
-        """Create a new ApiKeyAuth instance. If no key is passed, it will use ~openai.api_key"""
+    def __repr__(self) -> str:
+        return "ApiKeyAuth(api_key=<redacted>)"
+
+    def __init__(self, key: str = "") -> None:
+        """Create a new ApiKeyAuth instance.
+
+        :param str key: The API key associated with your account.
+         If no key is passed, it will use ~openai.api_key
+        """
         self.key = key or openai.api_key
 
     def get_token(self) -> str:
-        """Get the API key"""
+        """Get the API key
+        
+        .. note:: 
+
+            Do not directly interact with this API, it will be called
+            automatically when a token is needed for the request.
+        """
         return self.key
 
 
@@ -75,12 +110,16 @@ def __init__(
         organization: str = "",
     ):
         """Create a new OpenAI client.
-           
+
        :keyword str api_base: The base URL for the API. If not specified, based on ~opeanai.api_base
-       :keyword auth: The authentication method or key to use. If the string value "azuredefault" is passed, it will use ~azure.identity.DefaultAzureCredential
+       :keyword auth: The authentication method or key to use. If the string value "azuredefault" is passed,
+        it will use ~azure.identity.DefaultAzureCredential
        :paramtype auth: str or ~openai.client.ApiKeyAuth or ~openai.client.AzureTokenAuth
-       :keyword str api_version: The API version to use. If not specified, based on ~openai.api_version or ~openai.client.LATEST_AZURE_API_VERSION.
+       :keyword str api_version: The API version to use. If not specified, based on ~openai.api_version
+        or ~openai.client.LATEST_AZURE_API_VERSION.
        :keyword str backend: One of 'azure' or 'openai'. If not specified, inferred from the auth method or ~openai.api_type
+       :keyword str organization: The identifier of the organization to use for API requests.
+        If not specified, based on ~openai.organization.
         """
 
         #
@@ -119,7 +158,7 @@ def __init__(
         if self.backend == 'azure' and self.api_base == "https://api.openai.com/v1":
             raise ValueError("You are using the 'openai.com' endpoint with an Azure credential or API type. Please provide the endpoint to your Azure resource instead.")
 
-    def __repr__(self): 
+    def __repr__(self) -> str:
         constructor_args = [
             f"{name}={repr(value)}"
             for name, value in self.__dict__.items()
@@ -173,6 +212,13 @@ def _normalize_model(self, kwargs: Dict[str, Any]):
                 pass
 
     def completion(self, prompt: str, **kwargs) -> openai.Completion:
+        """Creates a completion for the provided prompt and parameters.
+
+        :param prompt: The prompt(s) to generate completions for,
+         encoded as a string, array of strings, array of tokens, 
+         or array of token arrays.
+        :type prompt: str or Iterable[str] or Iterable[float] or Iterable[Iterable[float]]
+        """
         self._populate_args(kwargs, prompt=prompt, stream=False)
         self._normalize_model(kwargs)
         return cast(openai.Completion, openai.Completion.create(**kwargs))

From 24dad1d332f0bd6c7f95bc63ca6200ec0634e067 Mon Sep 17 00:00:00 2001
From: Krista Pratico <krpratic@microsoft.com>
Date: Thu, 22 Jun 2023 12:03:17 -0700
Subject: [PATCH 12/18] Add live tests (#4)

Live (running against the actual service endpoints) tests added
---
 openai/tests/test_client.py | 364 ++++++++++++++++++++++++++++++++----
 1 file changed, 331 insertions(+), 33 deletions(-)

diff --git a/openai/tests/test_client.py b/openai/tests/test_client.py
index b59646c2ea..7cccadf3f4 100644
--- a/openai/tests/test_client.py
+++ b/openai/tests/test_client.py
@@ -1,7 +1,47 @@
+import os
 import pytest
-
 import openai.client
 
+# Live tests
+API_TYPE = ["azure", "openai", "azuredefault"]
+API_BASE = os.environ["AZURE_API_BASE"]
+AZURE_API_KEY = os.environ["AZURE_KEY"]
+OPENAI_API_KEY = os.environ["OPENAI_KEY"]
+API_VERSION  = "2023-06-01-preview"
+COMPLETION_MODEL = "text-davinci-003"
+CHAT_COMPLETION_MODEL = "gpt-35-turbo"
+CHAT_COMPLETION_MODEL_OPENAI = "gpt-3.5-turbo"
+EMBEDDINGS_MODEL = "text-embedding-ada-002"
+IMAGE_PATH = ""
+MASK_IMAGE_PATH = ""
+
+
+@pytest.fixture
+def client(api_type):
+    if api_type == "azure":
+        client = openai.client.OpenAIClient(
+            api_base=API_BASE,
+            auth=AZURE_API_KEY,
+            api_version=API_VERSION,
+            backend="azure"
+        )
+    elif api_type == "azuredefault":
+        api_type = "azure"
+        client = openai.client.OpenAIClient(
+            api_base=API_BASE,
+            auth="azuredefault",
+            api_version=API_VERSION,
+            backend="azure"
+        )
+    elif api_type == "openai":
+        client = openai.client.OpenAIClient(
+            auth=OPENAI_API_KEY,
+            backend="openai"
+        )
+
+    return client
+
+
 @pytest.fixture
 def clear_oai_module(monkeypatch: pytest.MonkeyPatch):
     for key in [ 'api_base', 'api_key', 'api_type', 'api_version']:
@@ -10,11 +50,14 @@ def clear_oai_module(monkeypatch: pytest.MonkeyPatch):
     monkeypatch.setattr(openai, 'api_key', None)
     monkeypatch.setattr(openai, 'api_type', "open_ai")
     monkeypatch.setattr(openai, 'api_version', None)
+    monkeypatch.setattr(openai, 'organization', None)
 
 def setup_oai_module(monkeypatch: pytest.MonkeyPatch, **kwargs):
     for n, v in kwargs.items():
         monkeypatch.setattr(openai, n, v)
-        
+
+
+# MOCK TESTS ------------------------------------------------
 def test_construct_client(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
     setup_oai_module(monkeypatch, api_key=None)
     client = openai.client.OpenAIClient()
@@ -50,34 +93,13 @@ def test_construct_azure_client_api_key(monkeypatch: pytest.MonkeyPatch, clear_o
     assert client.auth.get_token() == 'secret key'
 
 def test_construct_openai_client_api_key():
-    client = openai.client.OpenAIClient(auth='secret key')
+    client = openai.client.OpenAIClient(auth='secret key', organization="my org")
     assert client.api_base == openai.api_base
     assert client.api_type == 'open_ai'
+    assert client.organization == "my org"
     assert client.auth.get_token() == 'secret key'
 
-@pytest.fixture
-def embedding_response():
-    return   {
-                "object": "list",
-                "data": [
-                    {
-                    "object": "embedding",
-                    "embedding": [
-                        0.0023064255,
-                        -0.009327292,
-                        -0.0028842222,
-                    ],
-                    "index": 0
-                    }
-                ],
-                "model": "text-embedding-ada-002",
-                "usage": {
-                    "prompt_tokens": 8,
-                    "total_tokens": 8
-                }
-        }
-
-def test_make_call_client_aad(monkeypatch: pytest.MonkeyPatch, clear_oai_module, embedding_response):
+def test_make_call_client_aad(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
     provided_api_base = 'https://contoso.microsoft.com'
     def mock_get_token(*args, **kwargs):
         return 'expected token'
@@ -86,7 +108,6 @@ def mock_embeddings_response(*args, **kwargs):
         assert kwargs.get('deployment_id') == 'das deployment'
         assert kwargs.get('api_version') == openai.client.LATEST_AZURE_API_VERSION
         assert kwargs.get('api_type') == 'azure_ad'
-        return embedding_response
 
     monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
     monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
@@ -95,7 +116,7 @@ def mock_embeddings_response(*args, **kwargs):
     client.embeddings("some data", model='das deployment')
 
 
-def test_make_call_client_azure_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module, embedding_response):
+def test_make_call_client_azure_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
     provided_api_base = 'https://contoso.microsoft.com'
     def mock_get_token(*args, **kwargs):
         return 'expected token'
@@ -104,7 +125,6 @@ def mock_embeddings_response(*args, **kwargs):
         assert kwargs.get('api_version') == openai.client.LATEST_AZURE_API_VERSION
         assert kwargs.get('api_type') == 'azure'
         assert kwargs.get('api_key', 'secret key')
-        return embedding_response
     
     monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
     monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
@@ -113,7 +133,7 @@ def mock_embeddings_response(*args, **kwargs):
     client.embeddings("some data", model='das deployment')
 
 
-def test_make_call_client_oai_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module, embedding_response):
+def test_make_call_client_oai_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
     provided_api_base = 'https://contoso.microsoft.com'
     def mock_get_token(*args, **kwargs):
         return 'expected token'
@@ -121,10 +141,288 @@ def mock_embeddings_response(*args, **kwargs):
         assert kwargs.get('model') == 'das model'
         assert kwargs.get('api_type') == 'open_ai'
         assert kwargs.get('api_key', 'secret key')
-        return embedding_response
-    
+
     monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
     monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
 
     client = openai.client.OpenAIClient(auth="secret key")
-    client.embeddings("some data", model='das model')
\ No newline at end of file
+    client.embeddings("some data", model='das model')
+
+
+def test_populate_args():
+    client = openai.client.OpenAIClient()
+
+    # valid override
+    kwargs = {
+        "api_base": "expected",
+        "api_key": "expected",
+        "api_version": "expected",
+        "prompt": "expected",
+    }
+
+    overrides = {
+        "temperature": 0.1
+    }
+
+    client._populate_args(kwargs, **overrides)
+
+    assert kwargs == {
+        "api_base": "expected",
+        "api_key": "expected",
+        "api_type": "open_ai",
+        "api_version": "expected",
+        "prompt": "expected",
+        "organization": None,
+        "temperature": 0.1
+    }
+
+
+    # unexpected override by user
+    kwargs = {
+        "prompt": "expected",
+        "api_base": "expected",
+        "api_key": "expected",
+        "api_type": "expected",
+        "api_version": "expected",
+        "organization": "expected",
+        "stream": True
+    }
+
+    overrides = {
+        "stream": False
+    }
+
+    with pytest.raises(TypeError):
+        client._populate_args(kwargs, **overrides)
+
+    # attempt to change api_base on per-method call
+    kwargs = {
+        "prompt": "expected",
+        "api_base": "expected",
+        "api_key": "expected",
+        "api_type": "expected",
+        "api_version": "expected",
+        "organization": "expected",
+        "stream": True
+    }
+
+    overrides = {
+        "api_base": "update",
+    }
+
+    with pytest.raises(TypeError):
+        client._populate_args(kwargs, **overrides)
+
+
+def test_normalize_model():
+    client = openai.client.OpenAIClient(backend="azure", api_base="azurebase")
+
+    # azure: deployment_id --> deployment_id
+    kwargs = {"deployment_id": "ada"}
+    client._normalize_model(kwargs)
+    assert kwargs == {"deployment_id": "ada"}
+
+    # azure: engine --> engine
+    kwargs = {"engine": "ada"}
+    client._normalize_model(kwargs)
+    assert kwargs == {"engine": "ada"}
+
+    # azure: model --> deployment_id (normalized)
+    kwargs = {"model": "ada"}
+    client._normalize_model(kwargs)
+    assert kwargs == {"deployment_id": "ada"}
+
+    client = openai.client.OpenAIClient(backend="openai")
+    # openai: deployment_id --> exception
+    kwargs = {"deployment_id": "ada"}
+    client._normalize_model(kwargs)
+    # incorrect arg raised by library
+    assert kwargs == {"deployment_id": "ada"}
+
+    # openai: engine --> engine
+    kwargs = {"engine": "ada"}
+    client._normalize_model(kwargs)
+    assert kwargs == {"engine": "ada"}
+
+    # openai: model --> model
+    kwargs = {"model": "ada"}
+    client._normalize_model(kwargs)
+    assert kwargs == {"model": "ada"}
+
+    # too many args
+    kwargs = {"model": "ada", "deployment_id": "ada"}
+    with pytest.raises(TypeError):
+        client._normalize_model(kwargs)
+
+
+# LIVE TESTS ------------------------------------------------
+# COMPLETION TESTS
+@pytest.mark.parametrize("api_type", API_TYPE)
+def test_client_completion(client):
+    completion = client.completion(
+        prompt="hello world",
+        model=COMPLETION_MODEL
+    )
+    assert completion
+
+
+@pytest.mark.parametrize("api_type", API_TYPE)
+def test_client_completion_stream(client):
+    completion = client.iter_completion(
+        prompt="hello world",
+        model=COMPLETION_MODEL
+    )
+    for c in completion:
+        assert c
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", API_TYPE)
+async def test_client_acompletion(client):
+    completion = await client.acompletion(
+        prompt="hello world",
+        model=COMPLETION_MODEL
+    )
+    assert completion
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", API_TYPE)
+async def test_client_acompletion_stream(client):
+    completion = await client.aiter_completion(
+        prompt="hello world",
+        model=COMPLETION_MODEL
+    )
+    async for c in completion:
+        assert c
+
+
+# CHAT COMPLETION TESTS
+@pytest.mark.parametrize("api_type", API_TYPE)
+def test_client_chatcompletion(client):
+    chat_completion = client.chatcompletion(
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"}
+        ],
+        model=CHAT_COMPLETION_MODEL if client.backend == "azure" else CHAT_COMPLETION_MODEL_OPENAI
+    )
+    assert chat_completion
+
+@pytest.mark.parametrize("api_type", API_TYPE)
+def test_client_chat_completion_stream(client):
+    chat_completion = client.iter_chatcompletion(
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"}
+        ],
+        model=CHAT_COMPLETION_MODEL if client.backend == "azure" else CHAT_COMPLETION_MODEL_OPENAI
+    )
+    for c in chat_completion:
+        assert c
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", API_TYPE)
+async def test_client_achatcompletion(client):
+    chat_completion = await client.achatcompletion(
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"}
+        ],
+        model=CHAT_COMPLETION_MODEL if client.backend == "azure" else CHAT_COMPLETION_MODEL_OPENAI
+    )
+    assert chat_completion
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", API_TYPE)
+async def test_client_achat_completion_stream(client):
+    chat_completion = await client.aiter_chatcompletion(
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"}
+        ],
+        model=CHAT_COMPLETION_MODEL if client.backend == "azure" else CHAT_COMPLETION_MODEL_OPENAI
+    )
+    async for c in chat_completion:
+        assert c
+
+
+# EMBEDDING TESTS
+@pytest.mark.parametrize("api_type", API_TYPE)
+def test_client_embeddings(client):
+    embeddings = client.embeddings(
+        input="hello world",
+        model=EMBEDDINGS_MODEL
+    )
+    assert embeddings
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", API_TYPE)
+async def test_client_aembeddings(client):
+    embeddings = await client.aembeddings(
+        input="hello world",
+        model=EMBEDDINGS_MODEL
+    )
+    assert embeddings
+
+
+# IMAGE CREATE TESTS
+@pytest.mark.parametrize("api_type", API_TYPE)
+def test_client_image_create(client):
+    image = client.image(
+        prompt="A cute baby sea otter",
+        n=1
+    )
+    assert image
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", API_TYPE)
+async def test_client_aimage_create(client):
+    image = await client.aimage(
+        prompt="A cute baby sea otter",
+        n=1
+    )
+    assert image
+
+
+# IMAGE VARIATION TESTS
+@pytest.mark.parametrize("api_type", ["openai"])
+def test_client_image_variation(client):
+    variation = client.image_variation(
+        image=open(IMAGE_PATH, "rb"),
+        n=2,
+        size="1024x1024"
+    )
+    assert variation
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", ["openai"])
+async def test_client_aimage_variation(client):
+    variation = await client.aimage_variation(
+        image=open(IMAGE_PATH, "rb"),
+        n=2,
+        size="1024x1024"
+    )
+    assert variation
+
+# IMAGE EDIT TESTS
+@pytest.mark.parametrize("api_type", ["openai"])
+def test_client_image_edit(client):
+    edit = client.image_edit(
+        image=open(IMAGE_PATH, "rb"),
+        mask=open(MASK_IMAGE_PATH, "rb"),
+        prompt="A cute baby sea otter wearing a beret",
+        n=2,
+        size="1024x1024"
+    )
+    assert edit
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", ["openai"])
+async def test_client_aimage_edit(client):
+    edit = await client.aimage_edit(
+        image=open(IMAGE_PATH, "rb"),
+        mask=open(MASK_IMAGE_PATH, "rb"),
+        prompt="A cute baby sea otter wearing a beret",
+        n=2,
+        size="1024x1024"
+    )
+    assert edit

From 8bb7d612372265feb1f5c1b293b59e8391fcf466 Mon Sep 17 00:00:00 2001
From: Krista Pratico <krpratic@microsoft.com>
Date: Thu, 22 Jun 2023 12:33:41 -0700
Subject: [PATCH 13/18] Add edit, audio, and moderation APIs to client
 abstraction (#5)

* add edit, moderation, audio apis
---
 openai/client.py | 147 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 146 insertions(+), 1 deletion(-)

diff --git a/openai/client.py b/openai/client.py
index 0eef61f0ce..f7e754d079 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -371,8 +371,153 @@ async def aimage_edit(
             response_format=response_format,
             user=user,
         )
-        return typing.cast(openai.Image, await openai.Image.acreate_edit(**kwargs))
+        return cast(openai.Image, await openai.Image.acreate_edit(**kwargs))
 
+    def edit(
+        self,
+        instruction: str,
+        *,
+        input: str = ...,
+        n: int = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            instruction=instruction,
+            input=input,
+            n=n,
+            temperature=temperature,
+            top_p=top_p,
+        )
+        self._normalize_model(kwargs)
+        return cast(openai.Edit, openai.Edit.create(**kwargs))
+
+    async def aedit(
+        self,
+        instruction: str,
+        *,
+        input: str = ...,
+        n: int = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            instruction=instruction,
+            input=input,
+            n=n,
+            temperature=temperature,
+            top_p=top_p,
+        )
+        self._normalize_model(kwargs)
+        return cast(openai.Edit, await openai.Edit.acreate(**kwargs))
+
+    def moderation(
+        self,
+        input: Union[str, Iterable[str]],
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            input=input,
+        )
+        self._normalize_model(kwargs)
+        return cast(openai.Moderation, openai.Moderation.create(**kwargs))
+
+    async def amoderation(
+        self,
+        input: Union[str, Iterable[str]],
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            input=input,
+        )
+        self._normalize_model(kwargs)
+        return cast(openai.Moderation, await openai.Moderation.acreate(**kwargs))
+
+    def transcribe_audio(
+        self,
+        file: Union[bytes, BinaryIO],
+        *,
+        prompt: str = ...,
+        response_format: str = ...,
+        temperature: float = ...,
+        language: str = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            file=file,
+            prompt=prompt,
+            response_format=response_format,
+            temperature=temperature,
+            language=language
+        )
+        self._normalize_model(kwargs)
+        return cast(openai.Audio, openai.Audio.transcribe(**kwargs))
+
+    async def atranscribe_audio(
+        self,
+        file: Union[bytes, BinaryIO],
+        *,
+        prompt: str = ...,
+        response_format: str = ...,
+        temperature: float = ...,
+        language: str = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            file=file,
+            prompt=prompt,
+            response_format=response_format,
+            temperature=temperature,
+            language=language
+        )
+        self._normalize_model(kwargs)
+        return cast(openai.Audio, await openai.Audio.atranscribe(**kwargs))
+
+    def translate_audio(
+        self,
+        file: Union[bytes, BinaryIO],
+        *,
+        prompt: str = ...,
+        response_format: str = ...,
+        temperature: float = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            file=file,
+            prompt=prompt,
+            response_format=response_format,
+            temperature=temperature,
+        )
+        self._normalize_model(kwargs)
+        return cast(openai.Audio, openai.Audio.translate(**kwargs))
+
+    async def atranslate_audio(
+        self,
+        file: Union[bytes, BinaryIO],
+        *,
+        prompt: str = ...,
+        response_format: str = ...,
+        temperature: float = ...,
+        **kwargs,
+    ):
+        self._populate_args(
+            kwargs,
+            file=file,
+            prompt=prompt,
+            response_format=response_format,
+            temperature=temperature,
+        )
+        self._normalize_model(kwargs)
+        return cast(openai.Audio, await openai.Audio.atranslate(**kwargs))
 
 if __name__ == "__main__":
     client = OpenAIClient(

From 917b97e5fba2d9495896024ed057ffd78e7f4305 Mon Sep 17 00:00:00 2001
From: Johan Stenberg <johan.stenberg@microsoft.com>
Date: Fri, 23 Jun 2023 15:30:12 -0700
Subject: [PATCH 14/18] Better test fixtures

---
 openai/tests/test_client.py | 59 ++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 23 deletions(-)

diff --git a/openai/tests/test_client.py b/openai/tests/test_client.py
index 7cccadf3f4..c1e132d2f8 100644
--- a/openai/tests/test_client.py
+++ b/openai/tests/test_client.py
@@ -1,4 +1,12 @@
 import os
+import sys
+
+from typing import Union
+if sys.version_info >= (3, 8):
+    from typing import TypedDict
+else:
+    from typing_extensions import TypedDict
+
 import pytest
 import openai.client
 
@@ -44,8 +52,6 @@ def client(api_type):
 
 @pytest.fixture
 def clear_oai_module(monkeypatch: pytest.MonkeyPatch):
-    for key in [ 'api_base', 'api_key', 'api_type', 'api_version']:
-        ...
     monkeypatch.setattr(openai, 'api_base', "https://api.openai.com/v1")
     monkeypatch.setattr(openai, 'api_key', None)
     monkeypatch.setattr(openai, 'api_type', "open_ai")
@@ -56,16 +62,18 @@ def setup_oai_module(monkeypatch: pytest.MonkeyPatch, **kwargs):
     for n, v in kwargs.items():
         monkeypatch.setattr(openai, n, v)
 
-
 # MOCK TESTS ------------------------------------------------
-def test_construct_client(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+        
+@pytest.mark.usefixtures("clear_oai_module")
+def test_construct_client(monkeypatch: pytest.MonkeyPatch):
     setup_oai_module(monkeypatch, api_key=None)
     client = openai.client.OpenAIClient()
     assert client.api_base == openai.api_base
     assert client.api_type == openai.api_type
     assert client.auth.get_token() is None
 
-def test_construct_azure_client(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+@pytest.mark.usefixtures("clear_oai_module")
+def test_construct_azure_client(monkeypatch: pytest.MonkeyPatch):
     setup_oai_module(monkeypatch, api_key=None, api_base='something different')
 
     provided_api_base = 'https://contoso.microsoft.com'
@@ -74,7 +82,8 @@ def test_construct_azure_client(monkeypatch: pytest.MonkeyPatch, clear_oai_modul
     assert client.api_type == 'azure'
     assert client.auth.get_token() is None
 
-def test_construct_azure_client_aad(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+@pytest.mark.usefixtures("clear_oai_module")
+def test_construct_azure_client_aad(monkeypatch: pytest.MonkeyPatch):
     provided_api_base = 'https://contoso.microsoft.com'
     def mock_get_token(*args, **kwargs):
         return 'expected token'
@@ -85,7 +94,8 @@ def mock_get_token(*args, **kwargs):
     assert client.api_type == 'azure_ad'
     assert client.auth.get_token() == 'expected token'
 
-def test_construct_azure_client_api_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+@pytest.mark.usefixtures("clear_oai_module")
+def test_construct_azure_client_api_key(monkeypatch: pytest.MonkeyPatch):
     provided_api_base = 'https://contoso.microsoft.com'
     client = openai.client.OpenAIClient(api_base=provided_api_base, backend='azure', auth='secret key')
     assert client.api_base == provided_api_base
@@ -99,49 +109,52 @@ def test_construct_openai_client_api_key():
     assert client.organization == "my org"
     assert client.auth.get_token() == 'secret key'
 
-def test_make_call_client_aad(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+@pytest.mark.usefixtures("clear_oai_module")
+def test_make_call_client_aad(monkeypatch: pytest.MonkeyPatch):
     provided_api_base = 'https://contoso.microsoft.com'
     def mock_get_token(*args, **kwargs):
         return 'expected token'
     
     def mock_embeddings_response(*args, **kwargs):
-        assert kwargs.get('deployment_id') == 'das deployment'
-        assert kwargs.get('api_version') == openai.client.LATEST_AZURE_API_VERSION
-        assert kwargs.get('api_type') == 'azure_ad'
+        return args, kwargs
 
     monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
     monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
 
     client = openai.client.OpenAIClient(backend='azure', api_base = provided_api_base, auth=openai.client.AzureTokenAuth(credential='dummy'))
-    client.embeddings("some data", model='das deployment')
+    args, kwargs = client.embeddings("some data", model='das deployment')
 
+    assert kwargs.get('deployment_id') == 'das deployment'
+    assert kwargs.get('api_version') == openai.client.LATEST_AZURE_API_VERSION
+    assert kwargs.get('api_type') == 'azure_ad'
 
-def test_make_call_client_azure_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+@pytest.mark.usefixtures("clear_oai_module")
+def test_make_call_client_azure_key(monkeypatch: pytest.MonkeyPatch):
     provided_api_base = 'https://contoso.microsoft.com'
     def mock_get_token(*args, **kwargs):
         return 'expected token'
     def mock_embeddings_response(*args, **kwargs):
-        assert kwargs.get('deployment_id') == 'das deployment'
-        assert kwargs.get('api_version') == openai.client.LATEST_AZURE_API_VERSION
-        assert kwargs.get('api_type') == 'azure'
-        assert kwargs.get('api_key', 'secret key')
+        return args, kwargs
     
     monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
     monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
 
     client = openai.client.OpenAIClient(backend='azure', api_base = provided_api_base, auth="secret key")
-    client.embeddings("some data", model='das deployment')
+    args, kwargs = client.embeddings("some data", model='das deployment')
 
+    assert kwargs.get('deployment_id') == 'das deployment'
+    assert kwargs.get('api_version') == openai.client.LATEST_AZURE_API_VERSION
+    assert kwargs.get('api_type') == 'azure'
+    assert kwargs.get('api_key', 'secret key')
 
-def test_make_call_client_oai_key(monkeypatch: pytest.MonkeyPatch, clear_oai_module):
+@pytest.mark.usefixtures("clear_oai_module")
+def test_make_call_client_oai_key(monkeypatch: pytest.MonkeyPatch):
     provided_api_base = 'https://contoso.microsoft.com'
     def mock_get_token(*args, **kwargs):
         return 'expected token'
     def mock_embeddings_response(*args, **kwargs):
-        assert kwargs.get('model') == 'das model'
-        assert kwargs.get('api_type') == 'open_ai'
-        assert kwargs.get('api_key', 'secret key')
-
+        return args, kwargs
+    
     monkeypatch.setattr(openai.client.AzureTokenAuth, 'get_token', mock_get_token)
     monkeypatch.setattr(openai.Embedding, 'create', mock_embeddings_response)
 

From e1edec40e936d71933ca803ad7f68fc2042a1ae8 Mon Sep 17 00:00:00 2001
From: Krista Pratico <krpratic@microsoft.com>
Date: Wed, 28 Jun 2023 16:59:46 -0700
Subject: [PATCH 15/18] more docstrings

---
 openai/client.py | 441 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 427 insertions(+), 14 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index 9bb1c8053a..c7ad3c5a53 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -3,12 +3,13 @@
 
 from typing import (
     Union,
-    Dict,
+    mapping,
     Any,
     cast,
     Iterable,
     BinaryIO,
     Optional,
+    Mapping,
     TYPE_CHECKING
 )
 if sys.version_info >= (3, 8):
@@ -24,6 +25,7 @@
 
 LATEST_AZURE_API_VERSION = "2023-05-15"
 
+CompletionPrompt = Union[str, Iterable[str], Iterable[int], Iterable[Iterable[int]]]
 
 class AzureTokenAuth:
     """Authentication using an Azure Active Directory token.
@@ -161,13 +163,17 @@ def __init__(
     def __repr__(self) -> str:
         constructor_args = [
             f"{name}={repr(value)}"
-            for name, value in self.__dict__.items()
+            for name, value in self.__mapping__.items()
             if value is not None
         ]
         return f"OpenAIClient({','.join(constructor_args)})"
     
-    def _populate_args(self, kwargs: Dict[str, Any], **overrides) -> None:
-        """Populate default arguments based on the current client configuration/defaults"""
+    def _populate_args(self, kwargs: mapping[str, Any], **overrides) -> None:
+        """Populate default arguments based on the current client configuration/defaults
+
+        :param kwargs: The keyword arguments to send in the API request.
+        :param overrides: The user arguments provided to the client method.
+        """
         kwargs.setdefault("api_base", self.api_base or openai.api_base)
         kwargs.setdefault("api_key", self.auth.get_token())
         kwargs.setdefault("api_type", self.api_type)
@@ -182,12 +188,14 @@ def _populate_args(self, kwargs: Dict[str, Any], **overrides) -> None:
             if kwargs[key] != val:
                 raise TypeError(f"No parameter named `{key}`")
 
-    def _normalize_model(self, kwargs: Dict[str, Any]):
+    def _normalize_model(self, kwargs: mapping[str, Any]):
         """Normalize model/engine/deployment_id based on which backend the client is
         configured to target.
 
         Specifically, it will pass the provided `model` parameter as `deployment_id`
         unless `deployment_id` is explicitly passed in.
+
+        :param kwargs: The keyword arguments to send in the API request.
         """
         if (
             len(
@@ -211,26 +219,138 @@ def _normalize_model(self, kwargs: Dict[str, Any]):
             except KeyError:
                 pass
 
-    def completion(self, prompt: str, **kwargs) -> openai.Completion:
+    def completion(self, prompt: CompletionPrompt, **kwargs: Any) -> openai.Completion:
         """Creates a completion for the provided prompt and parameters.
 
         :param prompt: The prompt(s) to generate completions for,
          encoded as a string, array of strings, array of tokens, 
          or array of token arrays.
-        :type prompt: str or Iterable[str] or Iterable[float] or Iterable[Iterable[float]]
+        :keyword str model: ID of the model or deployment to use. Required.
+        :keyword str suffix: The suffix that comes after a completion of inserted text.
+        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword float temperature: What sampling temperature to use, between 0 and 2.
+         Higher values like 0.8 will make the output more random, while lower values
+         like 0.2 will make it more focused and deterministic.
+        :keyword float top_p: An alternative to sampling with temperature, called
+         nucleus sampling, where the model considers the results of the tokens with
+         top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+         probability mass are considered.
+        :keyword int n: How many completions to generate for each prompt.
+        :keyword int logprobs: Include the log probabilities on the logprobs most
+         likely tokens, as well the chosen tokens. For example, if logprobs is 5,
+         the API will return a list of the 5 most likely tokens. The API will always
+         return the logprob of the sampled token, so there may be up to logprobs+1
+         elements in the response. The maximum value for logprobs is 5.
+        :keyword bool echo: Echo back the prompt in addition to the completion.
+        :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
+         The returned text will not contain the stop sequence.
+        :paramtype stop: str or iterable[str]
+        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on whether they appear in the text so far, increasing
+         the model's likelihood to talk about new topics.
+        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on their existing frequency in the text so far,
+         decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword int best_of: Generates best_of completions server-side and returns
+         the "best" (the one with the highest log probability per token).
+         When used with n, best_of controls the number of candidate completions and
+         n specifies how many to return - best_of must be greater than n.
+        :keyword logit_bias: Modify the likelihood of specified tokens appearing
+         in the completion.
+        :paramtype logit_bias: mapping[int, int]
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
         """
         self._populate_args(kwargs, prompt=prompt, stream=False)
         self._normalize_model(kwargs)
         return cast(openai.Completion, openai.Completion.create(**kwargs))
 
-    async def acompletion(self, prompt: str, **kwargs) -> openai.Completion:
+    async def acompletion(self, prompt: CompletionPrompt, **kwargs: Any) -> openai.Completion:
+        """Creates a completion for the provided prompt and parameters.
+
+        :param prompt: The prompt(s) to generate completions for,
+         encoded as a string, array of strings, array of tokens, 
+         or array of token arrays.
+        :keyword str model: ID of the model or deployment to use. Required.
+        :keyword str suffix: The suffix that comes after a completion of inserted text.
+        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword float temperature: What sampling temperature to use, between 0 and 2.
+         Higher values like 0.8 will make the output more random, while lower values
+         like 0.2 will make it more focused and deterministic.
+        :keyword float top_p: An alternative to sampling with temperature, called
+         nucleus sampling, where the model considers the results of the tokens with
+         top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+         probability mass are considered.
+        :keyword int n: How many completions to generate for each prompt.
+        :keyword int logprobs: Include the log probabilities on the logprobs most
+         likely tokens, as well the chosen tokens. For example, if logprobs is 5,
+         the API will return a list of the 5 most likely tokens. The API will always
+         return the logprob of the sampled token, so there may be up to logprobs+1
+         elements in the response. The maximum value for logprobs is 5.
+        :keyword bool echo: Echo back the prompt in addition to the completion.
+        :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
+         The returned text will not contain the stop sequence.
+        :paramtype stop: str or iterable[str]
+        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on whether they appear in the text so far, increasing
+         the model's likelihood to talk about new topics.
+        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on their existing frequency in the text so far,
+         decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword int best_of: Generates best_of completions server-side and returns
+         the "best" (the one with the highest log probability per token).
+         When used with n, best_of controls the number of candidate completions and
+         n specifies how many to return - best_of must be greater than n.
+        :keyword logit_bias: Modify the likelihood of specified tokens appearing
+         in the completion.
+        :paramtype logit_bias: mapping[int, int]
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, prompt=prompt, stream=False)
         self._normalize_model(kwargs)
         return cast(openai.Completion, await openai.Completion.acreate(**kwargs))
 
     def iter_completion(
-        self, prompt: str, **kwargs
+        self, prompt: CompletionPrompt, **kwargs: Any
     ) -> Iterable[openai.Completion]:
+        """Creates a streaming completion for the provided prompt and parameters.
+
+        :param prompt: The prompt(s) to generate completions for,
+         encoded as a string, array of strings, array of tokens, 
+         or array of token arrays.
+        :keyword str model: ID of the model or deployment to use. Required.
+        :keyword str suffix: The suffix that comes after a completion of inserted text.
+        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword float temperature: What sampling temperature to use, between 0 and 2.
+         Higher values like 0.8 will make the output more random, while lower values
+         like 0.2 will make it more focused and deterministic.
+        :keyword float top_p: An alternative to sampling with temperature, called
+         nucleus sampling, where the model considers the results of the tokens with
+         top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+         probability mass are considered.
+        :keyword int n: How many completions to generate for each prompt.
+        :keyword int logprobs: Include the log probabilities on the logprobs most
+         likely tokens, as well the chosen tokens. For example, if logprobs is 5,
+         the API will return a list of the 5 most likely tokens. The API will always
+         return the logprob of the sampled token, so there may be up to logprobs+1
+         elements in the response. The maximum value for logprobs is 5.
+        :keyword bool echo: Echo back the prompt in addition to the completion.
+        :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
+         The returned text will not contain the stop sequence.
+        :paramtype stop: str or iterable[str]
+        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on whether they appear in the text so far, increasing
+         the model's likelihood to talk about new topics.
+        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on their existing frequency in the text so far,
+         decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword logit_bias: Modify the likelihood of specified tokens appearing
+         in the completion.
+        :paramtype logit_bias: mapping[int, int]
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, prompt=prompt, stream=True)
         self._normalize_model(kwargs)
         return cast(
@@ -238,8 +358,45 @@ def iter_completion(
         )
 
     async def aiter_completion(
-        self, prompt: str, **kwargs
+        self, prompt: CompletionPrompt, **kwargs: Any
     ) -> AsyncIterable[openai.Completion]:
+        """Creates a streaming completion for the provided prompt and parameters.
+
+        :param prompt: The prompt(s) to generate completions for,
+         encoded as a string, array of strings, array of tokens, 
+         or array of token arrays.
+        :keyword str model: ID of the model or deployment to use. Required.
+        :keyword str suffix: The suffix that comes after a completion of inserted text.
+        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword float temperature: What sampling temperature to use, between 0 and 2.
+         Higher values like 0.8 will make the output more random, while lower values
+         like 0.2 will make it more focused and deterministic.
+        :keyword float top_p: An alternative to sampling with temperature, called
+         nucleus sampling, where the model considers the results of the tokens with
+         top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+         probability mass are considered.
+        :keyword int n: How many completions to generate for each prompt.
+        :keyword int logprobs: Include the log probabilities on the logprobs most
+         likely tokens, as well the chosen tokens. For example, if logprobs is 5,
+         the API will return a list of the 5 most likely tokens. The API will always
+         return the logprob of the sampled token, so there may be up to logprobs+1
+         elements in the response. The maximum value for logprobs is 5.
+        :keyword bool echo: Echo back the prompt in addition to the completion.
+        :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
+         The returned text will not contain the stop sequence.
+        :paramtype stop: str or iterable[str]
+        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on whether they appear in the text so far, increasing
+         the model's likelihood to talk about new topics.
+        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on their existing frequency in the text so far,
+         decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword logit_bias: Modify the likelihood of specified tokens appearing
+         in the completion.
+        :paramtype logit_bias: mapping[int, int]
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, prompt=prompt, stream=True)
         self._normalize_model(kwargs)
         return cast(
@@ -247,14 +404,96 @@ async def aiter_completion(
             await openai.Completion.acreate(**kwargs),
         )
 
-    def chatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
+    def chatcompletion(
+        self,
+        messages: Iterable[Mapping[str, Any]],
+        **kwargs: Any
+    ) -> openai.ChatCompletion:
+        """Creates a model response for the given chat conversation.
+
+        :param messages: A list of messages comprising the conversation so far.
+        :keyword str model: ID of the model or deployment to use. Required.
+        :param functions: A list of functions the model may generate JSON inputs for.
+        :type functions: iterable[mapping[str, any]]
+        :param function_call: Controls how the model responds to function calls.
+         "none" means the model does not call a function, and responds to the
+         end-user. "auto" means the model can pick between an end-user or calling
+         a function. Specifying a particular function via {"name":\ "my_function"}
+         forces the model to call that function. "none" is the default when no
+         functions are present. "auto" is the default if functions are present.
+        :type function_call: str or mapping[str, any]
+        :param float temperature: What sampling temperature to use, between 0 and 2.
+         Higher values like 0.8 will make the output more random, while lower values
+         like 0.2 will make it more focused and deterministic.
+        :keyword float top_p: An alternative to sampling with temperature, called
+         nucleus sampling, where the model considers the results of the tokens with
+         top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+         probability mass are considered.
+        :keyword int n: How many completions to generate for each prompt.
+        :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
+         The returned text will not contain the stop sequence.
+        :paramtype stop: str or iterable[str]
+        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on whether they appear in the text so far, increasing
+         the model's likelihood to talk about new topics.
+        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on their existing frequency in the text so far,
+         decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword logit_bias: Modify the likelihood of specified tokens appearing
+         in the completion.
+        :paramtype logit_bias: mapping[int, int]
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, messages=messages, stream=False)
         self._normalize_model(kwargs)
         return cast(
             openai.ChatCompletion, openai.ChatCompletion.create(**kwargs)
         )
 
-    async def achatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
+    async def achatcompletion(
+        self,
+        messages: Iterable[Mapping[str, Any]],
+        **kwargs: Any
+    ) -> openai.ChatCompletion:
+        """Creates a model response for the given chat conversation.
+
+        :param messages: A list of messages comprising the conversation so far.
+        :keyword str model: ID of the model or deployment to use. Required.
+        :param functions: A list of functions the model may generate JSON inputs for.
+        :type functions: iterable[mapping[str, any]]
+        :param function_call: Controls how the model responds to function calls.
+         "none" means the model does not call a function, and responds to the
+         end-user. "auto" means the model can pick between an end-user or calling
+         a function. Specifying a particular function via {"name":\ "my_function"}
+         forces the model to call that function. "none" is the default when no
+         functions are present. "auto" is the default if functions are present.
+        :type function_call: str or mapping[str, any]
+        :param float temperature: What sampling temperature to use, between 0 and 2.
+         Higher values like 0.8 will make the output more random, while lower values
+         like 0.2 will make it more focused and deterministic.
+        :keyword float top_p: An alternative to sampling with temperature, called
+         nucleus sampling, where the model considers the results of the tokens with
+         top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+         probability mass are considered.
+        :keyword int n: How many completions to generate for each prompt.
+        :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
+         The returned text will not contain the stop sequence.
+        :paramtype stop: str or iterable[str]
+        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on whether they appear in the text so far, increasing
+         the model's likelihood to talk about new topics.
+        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on their existing frequency in the text so far,
+         decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword logit_bias: Modify the likelihood of specified tokens appearing
+         in the completion.
+        :paramtype logit_bias: mapping[int, int]
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, messages=messages, stream=False)
         self._normalize_model(kwargs)
         return cast(
@@ -262,8 +501,47 @@ async def achatcompletion(self, messages, **kwargs) -> openai.ChatCompletion:
         )
 
     def iter_chatcompletion(
-        self, messages, **kwargs
+        self,
+        messages: Iterable[Mapping[str, Any]],
+        **kwargs: Any
     ) -> Iterable[openai.ChatCompletion]:
+        """Creates a streaming model response for the given chat conversation.
+
+        :param messages: A list of messages comprising the conversation so far.
+        :keyword str model: ID of the model or deployment to use. Required.
+        :param functions: A list of functions the model may generate JSON inputs for.
+        :type functions: iterable[mapping[str, any]]
+        :param function_call: Controls how the model responds to function calls.
+         "none" means the model does not call a function, and responds to the
+         end-user. "auto" means the model can pick between an end-user or calling
+         a function. Specifying a particular function via {"name":\ "my_function"}
+         forces the model to call that function. "none" is the default when no
+         functions are present. "auto" is the default if functions are present.
+        :type function_call: str or mapping[str, any]
+        :param float temperature: What sampling temperature to use, between 0 and 2.
+         Higher values like 0.8 will make the output more random, while lower values
+         like 0.2 will make it more focused and deterministic.
+        :keyword float top_p: An alternative to sampling with temperature, called
+         nucleus sampling, where the model considers the results of the tokens with
+         top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+         probability mass are considered.
+        :keyword int n: How many completions to generate for each prompt.
+        :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
+         The returned text will not contain the stop sequence.
+        :paramtype stop: str or iterable[str]
+        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on whether they appear in the text so far, increasing
+         the model's likelihood to talk about new topics.
+        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on their existing frequency in the text so far,
+         decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword logit_bias: Modify the likelihood of specified tokens appearing
+         in the completion.
+        :paramtype logit_bias: mapping[int, int]
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, messages=messages, stream=True)
         self._normalize_model(kwargs)
         return cast(
@@ -272,8 +550,47 @@ def iter_chatcompletion(
         )
 
     async def aiter_chatcompletion(
-        self, messages, **kwargs
+        self,
+        messages: Iterable[Mapping[str, Any]],
+        **kwargs: Any
     ) -> AsyncIterable[openai.ChatCompletion]:
+        """Creates a streaming model response for the given chat conversation.
+
+        :param messages: A list of messages comprising the conversation so far.
+        :keyword str model: ID of the model or deployment to use. Required.
+        :param functions: A list of functions the model may generate JSON inputs for.
+        :type functions: iterable[mapping[str, any]]
+        :param function_call: Controls how the model responds to function calls.
+         "none" means the model does not call a function, and responds to the
+         end-user. "auto" means the model can pick between an end-user or calling
+         a function. Specifying a particular function via {"name":\ "my_function"}
+         forces the model to call that function. "none" is the default when no
+         functions are present. "auto" is the default if functions are present.
+        :type function_call: str or mapping[str, any]
+        :param float temperature: What sampling temperature to use, between 0 and 2.
+         Higher values like 0.8 will make the output more random, while lower values
+         like 0.2 will make it more focused and deterministic.
+        :keyword float top_p: An alternative to sampling with temperature, called
+         nucleus sampling, where the model considers the results of the tokens with
+         top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+         probability mass are considered.
+        :keyword int n: How many completions to generate for each prompt.
+        :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
+         The returned text will not contain the stop sequence.
+        :paramtype stop: str or iterable[str]
+        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on whether they appear in the text so far, increasing
+         the model's likelihood to talk about new topics.
+        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+         penalize new tokens based on their existing frequency in the text so far,
+         decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword logit_bias: Modify the likelihood of specified tokens appearing
+         in the completion.
+        :paramtype logit_bias: mapping[int, int]
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, messages=messages, stream=True)
         self._normalize_model(kwargs)
         return cast(
@@ -282,11 +599,35 @@ async def aiter_chatcompletion(
         )
 
     def embeddings(self, input, **kwargs) -> openai.Embedding:
+        """Creates an embedding vector representing the input text.
+
+        :param input: Input text to embed, encoded as a string or array
+         of tokens. To embed multiple inputs in a single request, pass
+         an array of strings or array of token arrays. Each input must
+         not exceed the max input tokens for the model (8191 tokens for 
+         text-embedding-ada-002)
+        :type input: str or iterable[str] or iterable[int] or iterable[iterable[int]]
+        :keyword str model: ID of the model or deployment to use. Required.
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, input=input)
         self._normalize_model(kwargs)
         return cast(openai.Embedding, openai.Embedding.create(**kwargs))
 
     async def aembeddings(self, input, **kwargs) -> openai.Embedding:
+        """Creates an embedding vector representing the input text.
+
+        :param input: Input text to embed, encoded as a string or array
+         of tokens. To embed multiple inputs in a single request, pass
+         an array of strings or array of token arrays. Each input must
+         not exceed the max input tokens for the model (8191 tokens for 
+         text-embedding-ada-002)
+        :type input: str or iterable[str] or iterable[int] or iterable[iterable[int]]
+        :keyword str model: ID of the model or deployment to use. Required.
+        :keyword str user: A unique identifier representing your end-user, which can 
+         help OpenAI to monitor and detect abuse.
+        """
         self._populate_args(kwargs, input=input)
         self._normalize_model(kwargs)
         return cast(openai.Embedding, await openai.Embedding.acreate(**kwargs))
@@ -301,6 +642,16 @@ def image(
         user: str = ...,
         **kwargs,
     ):
+        """Creates an image given a prompt.
+
+        :param prompt: A text description of the desired image(s). The maximum length is 1000 characters.
+        :keyword n: The number of images to generate. Must be between 1 and 10.
+        :keyword size: The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024.
+        :keyword response_format: The format in which the generated images are returned.
+         Must be one of url or b64_json.
+        :keyword user: A unique identifier representing your end-user, which can help OpenAI to
+         monitor and detect abuse.
+        """
         self._populate_args(
             kwargs,
             prompt=prompt,
@@ -321,6 +672,16 @@ async def aimage(
         user: str = ...,
         **kwargs,
     ):
+        """Creates an image given a prompt.
+
+        :param prompt: A text description of the desired image(s). The maximum length is 1000 characters.
+        :keyword n: The number of images to generate. Must be between 1 and 10.
+        :keyword size: The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024.
+        :keyword response_format: The format in which the generated images are returned.
+         Must be one of url or b64_json.
+        :keyword user: A unique identifier representing your end-user, which can help OpenAI to
+         monitor and detect abuse.
+        """
         self._populate_args(
             kwargs,
             prompt=prompt,
@@ -341,6 +702,17 @@ def image_variation(
         user: str = ...,
         **kwargs,
     ):
+        """Creates a variation of a given image.
+
+        :param image: The image to use as the basis for the variation(s).
+         Must be a valid PNG file, less than 4MB, and square.
+        :keyword n: The number of images to generate. Must be between 1 and 10.
+        :keyword size: The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024.
+        :keyword response_format: The format in which the generated images are returned.
+         Must be one of url or b64_json.
+        :keyword user: A unique identifier representing your end-user, which can help OpenAI to
+         monitor and detect abuse.
+        """
         self._populate_args(
             kwargs,
             image=image,
@@ -361,6 +733,17 @@ async def aimage_variation(
         user: str = ...,
         **kwargs,
     ):
+        """Creates a variation of a given image.
+
+        :param image: The image to use as the basis for the variation(s).
+         Must be a valid PNG file, less than 4MB, and square.
+        :keyword n: The number of images to generate. Must be between 1 and 10.
+        :keyword size: The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024.
+        :keyword response_format: The format in which the generated images are returned.
+         Must be one of url or b64_json.
+        :keyword user: A unique identifier representing your end-user, which can help OpenAI to
+         monitor and detect abuse.
+        """
         self._populate_args(
             kwargs,
             image=image,
@@ -383,6 +766,21 @@ def image_edit(
         user: str = ...,
         **kwargs,
     ):
+        """Creates an edited or extended image given an original image and a prompt.
+
+        :param image: The image to edit. Must be a valid PNG file, less than 4MB, and square.
+         If mask is not provided, image must have transparency, which will be used as the mask.
+        :param prompt: A text description of the desired image(s). The maximum length is 1000 characters.
+        :keyword mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+         indicate where image should be edited. Must be a valid PNG file, less than 4MB, and have the
+         same dimensions as image.
+        :keyword n: The number of images to generate. Must be between 1 and 10.
+        :keyword size: The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024.
+        :keyword response_format: The format in which the generated images are returned.
+         Must be one of url or b64_json.
+        :keyword user: A unique identifier representing your end-user, which can help OpenAI to
+         monitor and detect abuse.
+        """
         self._populate_args(
             kwargs,
             image=image,
@@ -417,6 +815,21 @@ async def aimage_edit(
             response_format=response_format,
             user=user,
         )
+        """Creates an edited or extended image given an original image and a prompt.
+
+        :param image: The image to edit. Must be a valid PNG file, less than 4MB, and square.
+         If mask is not provided, image must have transparency, which will be used as the mask.
+        :param prompt: A text description of the desired image(s). The maximum length is 1000 characters.
+        :keyword mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
+         indicate where image should be edited. Must be a valid PNG file, less than 4MB, and have the
+         same dimensions as image.
+        :keyword n: The number of images to generate. Must be between 1 and 10.
+        :keyword size: The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024.
+        :keyword response_format: The format in which the generated images are returned.
+         Must be one of url or b64_json.
+        :keyword user: A unique identifier representing your end-user, which can help OpenAI to
+         monitor and detect abuse.
+        """
         return cast(openai.Image, await openai.Image.acreate_edit(**kwargs))
 
     def edit(

From 48adec5cb3ed1f5af53c750b591253629fa46538 Mon Sep 17 00:00:00 2001
From: Krista Pratico <krpratic@microsoft.com>
Date: Wed, 19 Jul 2023 16:51:38 -0700
Subject: [PATCH 16/18] docstrings + type hints + remove prompt edits
 (deprecated)

---
 openai/client.py | 777 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 546 insertions(+), 231 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index c7ad3c5a53..7423bb704f 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -3,7 +3,6 @@
 
 from typing import (
     Union,
-    mapping,
     Any,
     cast,
     Iterable,
@@ -25,7 +24,6 @@
 
 LATEST_AZURE_API_VERSION = "2023-05-15"
 
-CompletionPrompt = Union[str, Iterable[str], Iterable[int], Iterable[Iterable[int]]]
 
 class AzureTokenAuth:
     """Authentication using an Azure Active Directory token.
@@ -163,12 +161,12 @@ def __init__(
     def __repr__(self) -> str:
         constructor_args = [
             f"{name}={repr(value)}"
-            for name, value in self.__mapping__.items()
+            for name, value in self.__dict__.items()
             if value is not None
         ]
         return f"OpenAIClient({','.join(constructor_args)})"
     
-    def _populate_args(self, kwargs: mapping[str, Any], **overrides) -> None:
+    def _populate_args(self, kwargs: Mapping[str, Any], **overrides) -> None:
         """Populate default arguments based on the current client configuration/defaults
 
         :param kwargs: The keyword arguments to send in the API request.
@@ -188,7 +186,7 @@ def _populate_args(self, kwargs: mapping[str, Any], **overrides) -> None:
             if kwargs[key] != val:
                 raise TypeError(f"No parameter named `{key}`")
 
-    def _normalize_model(self, kwargs: mapping[str, Any]):
+    def _normalize_model(self, kwargs: Mapping[str, Any]):
         """Normalize model/engine/deployment_id based on which backend the client is
         configured to target.
 
@@ -219,185 +217,341 @@ def _normalize_model(self, kwargs: mapping[str, Any]):
             except KeyError:
                 pass
 
-    def completion(self, prompt: CompletionPrompt, **kwargs: Any) -> openai.Completion:
+    def completion(
+        self,
+        prompt: Union[str, Iterable[str], Iterable[int], Iterable[Iterable[int]]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        suffix: str = ...,
+        max_tokens: int = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        n: int = ...,
+        logprobs: int = ...,
+        echo: bool = ...,
+        stop: Union[str, Iterable[str]] = ...,
+        presence_penalty: float = ...,
+        frequency_penalty: float = ...,
+        best_of: int = ...,
+        logit_bias: Mapping[int, int] = ...,
+        user: str = ...,
+        **kwargs: Any
+    ) -> openai.Completion:
         """Creates a completion for the provided prompt and parameters.
 
         :param prompt: The prompt(s) to generate completions for,
          encoded as a string, array of strings, array of tokens, 
          or array of token arrays.
-        :keyword str model: ID of the model or deployment to use. Required.
-        :keyword str suffix: The suffix that comes after a completion of inserted text.
-        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
-        :keyword float temperature: What sampling temperature to use, between 0 and 2.
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword suffix: The suffix that comes after a completion of inserted text.
+        :keyword max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword temperature: What sampling temperature to use, between 0 and 2.
          Higher values like 0.8 will make the output more random, while lower values
          like 0.2 will make it more focused and deterministic.
-        :keyword float top_p: An alternative to sampling with temperature, called
+        :keyword top_p: An alternative to sampling with temperature, called
          nucleus sampling, where the model considers the results of the tokens with
          top_p probability mass. So 0.1 means only the tokens comprising the top 10%
          probability mass are considered.
-        :keyword int n: How many completions to generate for each prompt.
-        :keyword int logprobs: Include the log probabilities on the logprobs most
+        :keyword n: How many completions to generate for each prompt.
+        :keyword logprobs: Include the log probabilities on the logprobs most
          likely tokens, as well the chosen tokens. For example, if logprobs is 5,
          the API will return a list of the 5 most likely tokens. The API will always
          return the logprob of the sampled token, so there may be up to logprobs+1
          elements in the response. The maximum value for logprobs is 5.
-        :keyword bool echo: Echo back the prompt in addition to the completion.
+        :keyword echo: Echo back the prompt in addition to the completion.
         :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
          The returned text will not contain the stop sequence.
-        :paramtype stop: str or iterable[str]
-        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword presence_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on whether they appear in the text so far, increasing
          the model's likelihood to talk about new topics.
-        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword frequency_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on their existing frequency in the text so far,
          decreasing the model's likelihood to repeat the same line verbatim.
-        :keyword int best_of: Generates best_of completions server-side and returns
+        :keyword best_of: Generates best_of completions server-side and returns
          the "best" (the one with the highest log probability per token).
          When used with n, best_of controls the number of candidate completions and
          n specifies how many to return - best_of must be greater than n.
         :keyword logit_bias: Modify the likelihood of specified tokens appearing
          in the completion.
-        :paramtype logit_bias: mapping[int, int]
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, prompt=prompt, stream=False)
+        self._populate_args(
+            kwargs,
+            model=model,
+            deployment_id=deployment_id,
+            suffix=suffix,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=n,
+            logprobs=logprobs,
+            echo=echo,
+            stop=stop,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            best_of=best_of,
+            logit_bias=logit_bias,
+            user=user,
+            prompt=prompt,
+            stream=False
+        )
         self._normalize_model(kwargs)
         return cast(openai.Completion, openai.Completion.create(**kwargs))
 
-    async def acompletion(self, prompt: CompletionPrompt, **kwargs: Any) -> openai.Completion:
+    async def acompletion(
+        self,
+        prompt: Union[str, Iterable[str], Iterable[int], Iterable[Iterable[int]]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        suffix: str = ...,
+        max_tokens: int = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        n: int = ...,
+        logprobs: int = ...,
+        echo: bool = ...,
+        stop: Union[str, Iterable[str]] = ...,
+        presence_penalty: float = ...,
+        frequency_penalty: float = ...,
+        best_of: int = ...,
+        logit_bias: Mapping[int, int] = ...,
+        user: str = ...,
+        **kwargs: Any
+    ) -> openai.Completion:
         """Creates a completion for the provided prompt and parameters.
 
         :param prompt: The prompt(s) to generate completions for,
          encoded as a string, array of strings, array of tokens, 
          or array of token arrays.
-        :keyword str model: ID of the model or deployment to use. Required.
-        :keyword str suffix: The suffix that comes after a completion of inserted text.
-        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
-        :keyword float temperature: What sampling temperature to use, between 0 and 2.
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword suffix: The suffix that comes after a completion of inserted text.
+        :keyword max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword temperature: What sampling temperature to use, between 0 and 2.
          Higher values like 0.8 will make the output more random, while lower values
          like 0.2 will make it more focused and deterministic.
-        :keyword float top_p: An alternative to sampling with temperature, called
+        :keyword top_p: An alternative to sampling with temperature, called
          nucleus sampling, where the model considers the results of the tokens with
          top_p probability mass. So 0.1 means only the tokens comprising the top 10%
          probability mass are considered.
-        :keyword int n: How many completions to generate for each prompt.
-        :keyword int logprobs: Include the log probabilities on the logprobs most
+        :keyword n: How many completions to generate for each prompt.
+        :keyword logprobs: Include the log probabilities on the logprobs most
          likely tokens, as well the chosen tokens. For example, if logprobs is 5,
          the API will return a list of the 5 most likely tokens. The API will always
          return the logprob of the sampled token, so there may be up to logprobs+1
          elements in the response. The maximum value for logprobs is 5.
-        :keyword bool echo: Echo back the prompt in addition to the completion.
+        :keyword echo: Echo back the prompt in addition to the completion.
         :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
          The returned text will not contain the stop sequence.
-        :paramtype stop: str or iterable[str]
-        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword presence_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on whether they appear in the text so far, increasing
          the model's likelihood to talk about new topics.
-        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword frequency_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on their existing frequency in the text so far,
          decreasing the model's likelihood to repeat the same line verbatim.
-        :keyword int best_of: Generates best_of completions server-side and returns
+        :keyword best_of: Generates best_of completions server-side and returns
          the "best" (the one with the highest log probability per token).
          When used with n, best_of controls the number of candidate completions and
          n specifies how many to return - best_of must be greater than n.
         :keyword logit_bias: Modify the likelihood of specified tokens appearing
          in the completion.
-        :paramtype logit_bias: mapping[int, int]
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, prompt=prompt, stream=False)
+        self._populate_args(
+            kwargs,
+            model=model,
+            deployment_id=deployment_id,
+            suffix=suffix,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=n,
+            logprobs=logprobs,
+            echo=echo,
+            stop=stop,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            best_of=best_of,
+            logit_bias=logit_bias,
+            user=user,
+            prompt=prompt,
+            stream=False
+        )
         self._normalize_model(kwargs)
         return cast(openai.Completion, await openai.Completion.acreate(**kwargs))
 
     def iter_completion(
-        self, prompt: CompletionPrompt, **kwargs: Any
+        self,
+        prompt: Union[str, Iterable[str], Iterable[int], Iterable[Iterable[int]]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        suffix: str = ...,
+        max_tokens: int = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        n: int = ...,
+        logprobs: int = ...,
+        echo: bool = ...,
+        stop: Union[str, Iterable[str]] = ...,
+        presence_penalty: float = ...,
+        frequency_penalty: float = ...,
+        best_of: int = ...,
+        logit_bias: Mapping[int, int] = ...,
+        user: str = ...,
+        **kwargs: Any
     ) -> Iterable[openai.Completion]:
         """Creates a streaming completion for the provided prompt and parameters.
 
         :param prompt: The prompt(s) to generate completions for,
          encoded as a string, array of strings, array of tokens, 
          or array of token arrays.
-        :keyword str model: ID of the model or deployment to use. Required.
-        :keyword str suffix: The suffix that comes after a completion of inserted text.
-        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
-        :keyword float temperature: What sampling temperature to use, between 0 and 2.
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword suffix: The suffix that comes after a completion of inserted text.
+        :keyword max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword temperature: What sampling temperature to use, between 0 and 2.
          Higher values like 0.8 will make the output more random, while lower values
          like 0.2 will make it more focused and deterministic.
-        :keyword float top_p: An alternative to sampling with temperature, called
+        :keyword top_p: An alternative to sampling with temperature, called
          nucleus sampling, where the model considers the results of the tokens with
          top_p probability mass. So 0.1 means only the tokens comprising the top 10%
          probability mass are considered.
-        :keyword int n: How many completions to generate for each prompt.
-        :keyword int logprobs: Include the log probabilities on the logprobs most
+        :keyword n: How many completions to generate for each prompt.
+        :keyword logprobs: Include the log probabilities on the logprobs most
          likely tokens, as well the chosen tokens. For example, if logprobs is 5,
          the API will return a list of the 5 most likely tokens. The API will always
          return the logprob of the sampled token, so there may be up to logprobs+1
          elements in the response. The maximum value for logprobs is 5.
-        :keyword bool echo: Echo back the prompt in addition to the completion.
+        :keyword echo: Echo back the prompt in addition to the completion.
         :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
          The returned text will not contain the stop sequence.
-        :paramtype stop: str or iterable[str]
-        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword presence_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on whether they appear in the text so far, increasing
          the model's likelihood to talk about new topics.
-        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword frequency_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on their existing frequency in the text so far,
          decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword best_of: Generates best_of completions server-side and returns
+         the "best" (the one with the highest log probability per token).
+         When used with n, best_of controls the number of candidate completions and
+         n specifies how many to return - best_of must be greater than n.
         :keyword logit_bias: Modify the likelihood of specified tokens appearing
          in the completion.
-        :paramtype logit_bias: mapping[int, int]
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, prompt=prompt, stream=True)
+        self._populate_args(
+            kwargs,
+            model=model,
+            deployment_id=deployment_id,
+            suffix=suffix,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=n,
+            logprobs=logprobs,
+            echo=echo,
+            stop=stop,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            best_of=best_of,
+            logit_bias=logit_bias,
+            user=user,
+            prompt=prompt,
+            stream=True
+        )
         self._normalize_model(kwargs)
         return cast(
             Iterable[openai.Completion], openai.Completion.create(**kwargs)
         )
 
     async def aiter_completion(
-        self, prompt: CompletionPrompt, **kwargs: Any
+        self,
+        prompt: Union[str, Iterable[str], Iterable[int], Iterable[Iterable[int]]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        suffix: str = ...,
+        max_tokens: int = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        n: int = ...,
+        logprobs: int = ...,
+        echo: bool = ...,
+        stop: Union[str, Iterable[str]] = ...,
+        presence_penalty: float = ...,
+        frequency_penalty: float = ...,
+        best_of: int = ...,
+        logit_bias: Mapping[int, int] = ...,
+        user: str = ...,
+        **kwargs: Any
     ) -> AsyncIterable[openai.Completion]:
         """Creates a streaming completion for the provided prompt and parameters.
 
         :param prompt: The prompt(s) to generate completions for,
          encoded as a string, array of strings, array of tokens, 
          or array of token arrays.
-        :keyword str model: ID of the model or deployment to use. Required.
-        :keyword str suffix: The suffix that comes after a completion of inserted text.
-        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
-        :keyword float temperature: What sampling temperature to use, between 0 and 2.
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword suffix: The suffix that comes after a completion of inserted text.
+        :keyword max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword temperature: What sampling temperature to use, between 0 and 2.
          Higher values like 0.8 will make the output more random, while lower values
          like 0.2 will make it more focused and deterministic.
-        :keyword float top_p: An alternative to sampling with temperature, called
+        :keyword top_p: An alternative to sampling with temperature, called
          nucleus sampling, where the model considers the results of the tokens with
          top_p probability mass. So 0.1 means only the tokens comprising the top 10%
          probability mass are considered.
-        :keyword int n: How many completions to generate for each prompt.
-        :keyword int logprobs: Include the log probabilities on the logprobs most
+        :keyword n: How many completions to generate for each prompt.
+        :keyword logprobs: Include the log probabilities on the logprobs most
          likely tokens, as well the chosen tokens. For example, if logprobs is 5,
          the API will return a list of the 5 most likely tokens. The API will always
          return the logprob of the sampled token, so there may be up to logprobs+1
          elements in the response. The maximum value for logprobs is 5.
-        :keyword bool echo: Echo back the prompt in addition to the completion.
+        :keyword echo: Echo back the prompt in addition to the completion.
         :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
          The returned text will not contain the stop sequence.
-        :paramtype stop: str or iterable[str]
-        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword presence_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on whether they appear in the text so far, increasing
          the model's likelihood to talk about new topics.
-        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword frequency_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on their existing frequency in the text so far,
          decreasing the model's likelihood to repeat the same line verbatim.
+        :keyword best_of: Generates best_of completions server-side and returns
+         the "best" (the one with the highest log probability per token).
+         When used with n, best_of controls the number of candidate completions and
+         n specifies how many to return - best_of must be greater than n.
         :keyword logit_bias: Modify the likelihood of specified tokens appearing
          in the completion.
-        :paramtype logit_bias: mapping[int, int]
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, prompt=prompt, stream=True)
+        self._populate_args(
+            kwargs,
+            model=model,
+            deployment_id=deployment_id,
+            suffix=suffix,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=n,
+            logprobs=logprobs,
+            echo=echo,
+            stop=stop,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            best_of=best_of,
+            logit_bias=logit_bias,
+            user=user,
+            prompt=prompt,
+            stream=True
+        )
         self._normalize_model(kwargs)
         return cast(
             AsyncIterable[openai.Completion],
@@ -407,46 +561,74 @@ async def aiter_completion(
     def chatcompletion(
         self,
         messages: Iterable[Mapping[str, Any]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        functions: Iterable[Mapping[str, Any]] = ...,
+        function_call: Union[str, Mapping[str, Any]] = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        n: int = ...,
+        stop: Union[str, Iterable[str]] = ...,
+        max_tokens: int = ...,
+        presence_penalty: float = ...,
+        frequency_penalty: float = ...,
+        logit_bias: Mapping[int, int] = ...,
+        user: str = ...,
         **kwargs: Any
     ) -> openai.ChatCompletion:
         """Creates a model response for the given chat conversation.
 
         :param messages: A list of messages comprising the conversation so far.
-        :keyword str model: ID of the model or deployment to use. Required.
-        :param functions: A list of functions the model may generate JSON inputs for.
-        :type functions: iterable[mapping[str, any]]
-        :param function_call: Controls how the model responds to function calls.
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword functions: A list of functions the model may generate JSON inputs for.
+        :keyword function_call: Controls how the model responds to function calls.
          "none" means the model does not call a function, and responds to the
          end-user. "auto" means the model can pick between an end-user or calling
          a function. Specifying a particular function via {"name":\ "my_function"}
          forces the model to call that function. "none" is the default when no
          functions are present. "auto" is the default if functions are present.
-        :type function_call: str or mapping[str, any]
-        :param float temperature: What sampling temperature to use, between 0 and 2.
+        :keyword temperature: What sampling temperature to use, between 0 and 2.
          Higher values like 0.8 will make the output more random, while lower values
          like 0.2 will make it more focused and deterministic.
-        :keyword float top_p: An alternative to sampling with temperature, called
+        :keyword top_p: An alternative to sampling with temperature, called
          nucleus sampling, where the model considers the results of the tokens with
          top_p probability mass. So 0.1 means only the tokens comprising the top 10%
          probability mass are considered.
-        :keyword int n: How many completions to generate for each prompt.
+        :keyword n: How many completions to generate for each prompt.
         :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
          The returned text will not contain the stop sequence.
-        :paramtype stop: str or iterable[str]
-        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
-        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword presence_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on whether they appear in the text so far, increasing
          the model's likelihood to talk about new topics.
-        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword frequency_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on their existing frequency in the text so far,
          decreasing the model's likelihood to repeat the same line verbatim.
         :keyword logit_bias: Modify the likelihood of specified tokens appearing
          in the completion.
-        :paramtype logit_bias: mapping[int, int]
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, messages=messages, stream=False)
+        self._populate_args(
+            kwargs,
+            messages=messages,
+            model=model,
+            deployment_id=deployment_id,
+            functions=functions,
+            function_call=function_call,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=n,
+            stop=stop,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            user=user,
+            stream=False
+        )
         self._normalize_model(kwargs)
         return cast(
             openai.ChatCompletion, openai.ChatCompletion.create(**kwargs)
@@ -455,45 +637,74 @@ def chatcompletion(
     async def achatcompletion(
         self,
         messages: Iterable[Mapping[str, Any]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        functions: Iterable[Mapping[str, Any]] = ...,
+        function_call: Union[str, Mapping[str, Any]] = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        n: int = ...,
+        stop: Union[str, Iterable[str]] = ...,
+        max_tokens: int = ...,
+        presence_penalty: float = ...,
+        frequency_penalty: float = ...,
+        logit_bias: Mapping[int, int] = ...,
+        user: str = ...,
         **kwargs: Any
     ) -> openai.ChatCompletion:
         """Creates a model response for the given chat conversation.
 
         :param messages: A list of messages comprising the conversation so far.
-        :keyword str model: ID of the model or deployment to use. Required.
-        :param functions: A list of functions the model may generate JSON inputs for.
-        :type functions: iterable[mapping[str, any]]
-        :param function_call: Controls how the model responds to function calls.
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword functions: A list of functions the model may generate JSON inputs for.
+        :keyword function_call: Controls how the model responds to function calls.
          "none" means the model does not call a function, and responds to the
          end-user. "auto" means the model can pick between an end-user or calling
          a function. Specifying a particular function via {"name":\ "my_function"}
          forces the model to call that function. "none" is the default when no
          functions are present. "auto" is the default if functions are present.
-        :type function_call: str or mapping[str, any]
-        :param float temperature: What sampling temperature to use, between 0 and 2.
+        :keyword temperature: What sampling temperature to use, between 0 and 2.
          Higher values like 0.8 will make the output more random, while lower values
          like 0.2 will make it more focused and deterministic.
-        :keyword float top_p: An alternative to sampling with temperature, called
+        :keyword top_p: An alternative to sampling with temperature, called
          nucleus sampling, where the model considers the results of the tokens with
          top_p probability mass. So 0.1 means only the tokens comprising the top 10%
          probability mass are considered.
-        :keyword int n: How many completions to generate for each prompt.
+        :keyword n: How many completions to generate for each prompt.
         :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
          The returned text will not contain the stop sequence.
-        :paramtype stop: str or iterable[str]
-        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
-        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword presence_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on whether they appear in the text so far, increasing
          the model's likelihood to talk about new topics.
-        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword frequency_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on their existing frequency in the text so far,
          decreasing the model's likelihood to repeat the same line verbatim.
         :keyword logit_bias: Modify the likelihood of specified tokens appearing
          in the completion.
-        :paramtype logit_bias: mapping[int, int]
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
+        self._populate_args(
+            kwargs,
+            messages=messages,
+            model=model,
+            deployment_id=deployment_id,
+            functions=functions,
+            function_call=function_call,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=n,
+            stop=stop,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            user=user,
+            stream=False
+        )
         self._populate_args(kwargs, messages=messages, stream=False)
         self._normalize_model(kwargs)
         return cast(
@@ -503,46 +714,74 @@ async def achatcompletion(
     def iter_chatcompletion(
         self,
         messages: Iterable[Mapping[str, Any]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        functions: Iterable[Mapping[str, Any]] = ...,
+        function_call: Union[str, Mapping[str, Any]] = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        n: int = ...,
+        stop: Union[str, Iterable[str]] = ...,
+        max_tokens: int = ...,
+        presence_penalty: float = ...,
+        frequency_penalty: float = ...,
+        logit_bias: Mapping[int, int] = ...,
+        user: str = ...,
         **kwargs: Any
     ) -> Iterable[openai.ChatCompletion]:
         """Creates a streaming model response for the given chat conversation.
 
         :param messages: A list of messages comprising the conversation so far.
-        :keyword str model: ID of the model or deployment to use. Required.
-        :param functions: A list of functions the model may generate JSON inputs for.
-        :type functions: iterable[mapping[str, any]]
-        :param function_call: Controls how the model responds to function calls.
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword functions: A list of functions the model may generate JSON inputs for.
+        :keyword function_call: Controls how the model responds to function calls.
          "none" means the model does not call a function, and responds to the
          end-user. "auto" means the model can pick between an end-user or calling
          a function. Specifying a particular function via {"name":\ "my_function"}
          forces the model to call that function. "none" is the default when no
          functions are present. "auto" is the default if functions are present.
-        :type function_call: str or mapping[str, any]
-        :param float temperature: What sampling temperature to use, between 0 and 2.
+        :keyword temperature: What sampling temperature to use, between 0 and 2.
          Higher values like 0.8 will make the output more random, while lower values
          like 0.2 will make it more focused and deterministic.
-        :keyword float top_p: An alternative to sampling with temperature, called
+        :keyword top_p: An alternative to sampling with temperature, called
          nucleus sampling, where the model considers the results of the tokens with
          top_p probability mass. So 0.1 means only the tokens comprising the top 10%
          probability mass are considered.
-        :keyword int n: How many completions to generate for each prompt.
+        :keyword n: How many completions to generate for each prompt.
         :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
          The returned text will not contain the stop sequence.
-        :paramtype stop: str or iterable[str]
-        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
-        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword presence_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on whether they appear in the text so far, increasing
          the model's likelihood to talk about new topics.
-        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword frequency_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on their existing frequency in the text so far,
          decreasing the model's likelihood to repeat the same line verbatim.
         :keyword logit_bias: Modify the likelihood of specified tokens appearing
          in the completion.
-        :paramtype logit_bias: mapping[int, int]
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, messages=messages, stream=True)
+        self._populate_args(
+            kwargs,
+            messages=messages,
+            model=model,
+            deployment_id=deployment_id,
+            functions=functions,
+            function_call=function_call,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=n,
+            stop=stop,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            user=user,
+            stream=True
+        )
         self._normalize_model(kwargs)
         return cast(
             Iterable[openai.ChatCompletion],
@@ -552,53 +791,89 @@ def iter_chatcompletion(
     async def aiter_chatcompletion(
         self,
         messages: Iterable[Mapping[str, Any]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        functions: Iterable[Mapping[str, Any]] = ...,
+        function_call: Union[str, Mapping[str, Any]] = ...,
+        temperature: float = ...,
+        top_p: float = ...,
+        n: int = ...,
+        stop: Union[str, Iterable[str]] = ...,
+        max_tokens: int = ...,
+        presence_penalty: float = ...,
+        frequency_penalty: float = ...,
+        logit_bias: Mapping[int, int] = ...,
+        user: str = ...,
         **kwargs: Any
     ) -> AsyncIterable[openai.ChatCompletion]:
         """Creates a streaming model response for the given chat conversation.
 
         :param messages: A list of messages comprising the conversation so far.
-        :keyword str model: ID of the model or deployment to use. Required.
-        :param functions: A list of functions the model may generate JSON inputs for.
-        :type functions: iterable[mapping[str, any]]
-        :param function_call: Controls how the model responds to function calls.
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword functions: A list of functions the model may generate JSON inputs for.
+        :keyword function_call: Controls how the model responds to function calls.
          "none" means the model does not call a function, and responds to the
          end-user. "auto" means the model can pick between an end-user or calling
          a function. Specifying a particular function via {"name":\ "my_function"}
          forces the model to call that function. "none" is the default when no
          functions are present. "auto" is the default if functions are present.
-        :type function_call: str or mapping[str, any]
-        :param float temperature: What sampling temperature to use, between 0 and 2.
+        :keyword temperature: What sampling temperature to use, between 0 and 2.
          Higher values like 0.8 will make the output more random, while lower values
          like 0.2 will make it more focused and deterministic.
-        :keyword float top_p: An alternative to sampling with temperature, called
+        :keyword top_p: An alternative to sampling with temperature, called
          nucleus sampling, where the model considers the results of the tokens with
          top_p probability mass. So 0.1 means only the tokens comprising the top 10%
          probability mass are considered.
-        :keyword int n: How many completions to generate for each prompt.
+        :keyword n: How many completions to generate for each prompt.
         :keyword stop: Up to 4 sequences where the API will stop generating further tokens.
          The returned text will not contain the stop sequence.
-        :paramtype stop: str or iterable[str]
-        :keyword int max_tokens: The maximum number of tokens to generate in the completion.
-        :keyword float presence_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword max_tokens: The maximum number of tokens to generate in the completion.
+        :keyword presence_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on whether they appear in the text so far, increasing
          the model's likelihood to talk about new topics.
-        :keyword float frequency_penalty: Number between -2.0 and 2.0. Positive values
+        :keyword frequency_penalty: Number between -2.0 and 2.0. Positive values
          penalize new tokens based on their existing frequency in the text so far,
          decreasing the model's likelihood to repeat the same line verbatim.
         :keyword logit_bias: Modify the likelihood of specified tokens appearing
          in the completion.
-        :paramtype logit_bias: mapping[int, int]
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, messages=messages, stream=True)
+        self._populate_args(
+            kwargs,
+            messages=messages,
+            model=model,
+            deployment_id=deployment_id,
+            functions=functions,
+            function_call=function_call,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            n=n,
+            stop=stop,
+            presence_penalty=presence_penalty,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            user=user,
+            stream=True
+        )
         self._normalize_model(kwargs)
         return cast(
             AsyncIterable[openai.ChatCompletion],
             await openai.ChatCompletion.acreate(**kwargs),
         )
 
-    def embeddings(self, input, **kwargs) -> openai.Embedding:
+    def embeddings(
+        self,
+        input: Union[str, Iterable[str], Iterable[int], Iterable[Iterable[int]]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        user: str = ...,
+        **kwargs: Any
+    ) -> openai.Embedding:
         """Creates an embedding vector representing the input text.
 
         :param input: Input text to embed, encoded as a string or array
@@ -606,16 +881,30 @@ def embeddings(self, input, **kwargs) -> openai.Embedding:
          an array of strings or array of token arrays. Each input must
          not exceed the max input tokens for the model (8191 tokens for 
          text-embedding-ada-002)
-        :type input: str or iterable[str] or iterable[int] or iterable[iterable[int]]
-        :keyword str model: ID of the model or deployment to use. Required.
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, input=input)
+        self._populate_args(
+            kwargs,
+            input=input,
+            model=model,
+            deployment_id=deployment_id,
+            user=user,
+        )
         self._normalize_model(kwargs)
         return cast(openai.Embedding, openai.Embedding.create(**kwargs))
 
-    async def aembeddings(self, input, **kwargs) -> openai.Embedding:
+    async def aembeddings(
+        self,
+        input: Union[str, Iterable[str], Iterable[int], Iterable[Iterable[int]]],
+        *,
+        model: str = ...,
+        deployment_id: str = ...,
+        user: str = ...,
+        **kwargs: Any
+    ) -> openai.Embedding:
         """Creates an embedding vector representing the input text.
 
         :param input: Input text to embed, encoded as a string or array
@@ -623,12 +912,18 @@ async def aembeddings(self, input, **kwargs) -> openai.Embedding:
          an array of strings or array of token arrays. Each input must
          not exceed the max input tokens for the model (8191 tokens for 
          text-embedding-ada-002)
-        :type input: str or iterable[str] or iterable[int] or iterable[iterable[int]]
-        :keyword str model: ID of the model or deployment to use. Required.
-        :keyword str user: A unique identifier representing your end-user, which can 
+        :keyword model: ID of the model or deployment to use.
+        :keyword deployment_id: ID of the deployment to use.
+        :keyword user: A unique identifier representing your end-user, which can 
          help OpenAI to monitor and detect abuse.
         """
-        self._populate_args(kwargs, input=input)
+        self._populate_args(
+            kwargs,
+            input=input,
+            model=model,
+            deployment_id=deployment_id,
+            user=user,
+        )
         self._normalize_model(kwargs)
         return cast(openai.Embedding, await openai.Embedding.acreate(**kwargs))
 
@@ -640,8 +935,8 @@ def image(
         size: str = ...,
         response_format: str = ...,
         user: str = ...,
-        **kwargs,
-    ):
+        **kwargs: Any,
+    ) -> openai.Image:
         """Creates an image given a prompt.
 
         :param prompt: A text description of the desired image(s). The maximum length is 1000 characters.
@@ -670,8 +965,8 @@ async def aimage(
         size: str = ...,
         response_format: str = ...,
         user: str = ...,
-        **kwargs,
-    ):
+        **kwargs: Any,
+    ) -> openai.Image:
         """Creates an image given a prompt.
 
         :param prompt: A text description of the desired image(s). The maximum length is 1000 characters.
@@ -700,8 +995,8 @@ def image_variation(
         size: str = ...,
         response_format: str = ...,
         user: str = ...,
-        **kwargs,
-    ):
+        **kwargs: Any,
+    ) -> openai.Image:
         """Creates a variation of a given image.
 
         :param image: The image to use as the basis for the variation(s).
@@ -731,8 +1026,8 @@ async def aimage_variation(
         size: str = ...,
         response_format: str = ...,
         user: str = ...,
-        **kwargs,
-    ):
+        **kwargs: Any,
+    ) -> openai.Image:
         """Creates a variation of a given image.
 
         :param image: The image to use as the basis for the variation(s).
@@ -764,8 +1059,8 @@ def image_edit(
         size: str = ...,
         response_format: str = ...,
         user: str = ...,
-        **kwargs,
-    ):
+        **kwargs: Any,
+    ) -> openai.Image:
         """Creates an edited or extended image given an original image and a prompt.
 
         :param image: The image to edit. Must be a valid PNG file, less than 4MB, and square.
@@ -803,18 +1098,8 @@ async def aimage_edit(
         size: str = ...,
         response_format: str = ...,
         user: str = ...,
-        **kwargs,
-    ):
-        self._populate_args(
-            kwargs,
-            image=image,
-            n=n,
-            size=size,
-            prompt=prompt,
-            mask=mask,
-            response_format=response_format,
-            user=user,
-        )
+        **kwargs: Any,
+    ) -> openai.Image:
         """Creates an edited or extended image given an original image and a prompt.
 
         :param image: The image to edit. Must be a valid PNG file, less than 4MB, and square.
@@ -830,58 +1115,34 @@ async def aimage_edit(
         :keyword user: A unique identifier representing your end-user, which can help OpenAI to
          monitor and detect abuse.
         """
-        return cast(openai.Image, await openai.Image.acreate_edit(**kwargs))
-
-    def edit(
-        self,
-        instruction: str,
-        *,
-        input: str = ...,
-        n: int = ...,
-        temperature: float = ...,
-        top_p: float = ...,
-        **kwargs,
-    ):
         self._populate_args(
             kwargs,
-            instruction=instruction,
-            input=input,
-            n=n,
-            temperature=temperature,
-            top_p=top_p,
-        )
-        self._normalize_model(kwargs)
-        return cast(openai.Edit, openai.Edit.create(**kwargs))
-
-    async def aedit(
-        self,
-        instruction: str,
-        *,
-        input: str = ...,
-        n: int = ...,
-        temperature: float = ...,
-        top_p: float = ...,
-        **kwargs,
-    ):
-        self._populate_args(
-            kwargs,
-            instruction=instruction,
-            input=input,
+            image=image,
             n=n,
-            temperature=temperature,
-            top_p=top_p,
+            size=size,
+            prompt=prompt,
+            mask=mask,
+            response_format=response_format,
+            user=user,
         )
-        self._normalize_model(kwargs)
-        return cast(openai.Edit, await openai.Edit.acreate(**kwargs))
+        return cast(openai.Image, await openai.Image.acreate_edit(**kwargs))
 
     def moderation(
         self,
         input: Union[str, Iterable[str]],
-        **kwargs,
-    ):
+        *,
+        model: str = ...,
+        **kwargs: Any,
+    ) -> openai.Moderation:
+        """Classifies if text violates OpenAI's Content Policy.
+
+        :param input: The input text to classify.
+        :keyword model: ID of the model to use.
+        """
         self._populate_args(
             kwargs,
             input=input,
+            model=model,
         )
         self._normalize_model(kwargs)
         return cast(openai.Moderation, openai.Moderation.create(**kwargs))
@@ -889,11 +1150,19 @@ def moderation(
     async def amoderation(
         self,
         input: Union[str, Iterable[str]],
-        **kwargs,
-    ):
+        *,
+        model: str = ...,
+        **kwargs: Any,
+    ) -> openai.Moderation:
+        """Classifies if text violates OpenAI's Content Policy.
+
+        :param input: The input text to classify.
+        :keyword model: ID of the model to use.
+        """
         self._populate_args(
             kwargs,
             input=input,
+            model=model,
         )
         self._normalize_model(kwargs)
         return cast(openai.Moderation, await openai.Moderation.acreate(**kwargs))
@@ -902,15 +1171,34 @@ def transcribe_audio(
         self,
         file: Union[bytes, BinaryIO],
         *,
+        model: str = ...,
         prompt: str = ...,
         response_format: str = ...,
         temperature: float = ...,
         language: str = ...,
         **kwargs,
-    ):
+    ) -> openai.Audio:
+        """Transcribes audio into the input language.
+
+        :param file: The audio file object (not file name) to transcribe,
+         in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+        :keyword model: ID of the model to use.
+        :keyword prompt: An optional text to guide the model's style or 
+         continue a previous audio segment. The prompt should match the audio language.
+        :keyword response_format: The format of the transcript output, in one of 
+         these options: json, text, srt, verbose_json, or vtt.
+        :keyword temperature: The sampling temperature, between 0 and 1. Higher values 
+         like 0.8 will make the output more random, while lower values like 0.2 will 
+         make it more focused and deterministic. If set to 0, the model will use log 
+         probability to automatically increase the temperature until certain thresholds 
+         are hit.
+        :keyword language: The language of the input audio. Supplying the input 
+         language in ISO-639-1 format will improve accuracy and latency.
+        """
         self._populate_args(
             kwargs,
             file=file,
+            model=model,
             prompt=prompt,
             response_format=response_format,
             temperature=temperature,
@@ -923,15 +1211,34 @@ async def atranscribe_audio(
         self,
         file: Union[bytes, BinaryIO],
         *,
+        model: str = ...,
         prompt: str = ...,
         response_format: str = ...,
         temperature: float = ...,
         language: str = ...,
         **kwargs,
-    ):
+    ) -> openai.Audio:
+        """Transcribes audio into the input language.
+
+        :param file: The audio file object (not file name) to transcribe,
+         in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+        :keyword model: ID of the model to use. Only whisper-1 is currently available.
+        :keyword prompt: An optional text to guide the model's style or 
+         continue a previous audio segment. The prompt should match the audio language.
+        :keyword response_format: The format of the transcript output, in one of 
+         these options: json, text, srt, verbose_json, or vtt.
+        :keyword temperature: The sampling temperature, between 0 and 1. Higher values 
+         like 0.8 will make the output more random, while lower values like 0.2 will 
+         make it more focused and deterministic. If set to 0, the model will use log 
+         probability to automatically increase the temperature until certain thresholds 
+         are hit.
+        :keyword language: The language of the input audio. Supplying the input 
+         language in ISO-639-1 format will improve accuracy and latency.
+        """
         self._populate_args(
             kwargs,
             file=file,
+            model=model,
             prompt=prompt,
             response_format=response_format,
             temperature=temperature,
@@ -944,14 +1251,31 @@ def translate_audio(
         self,
         file: Union[bytes, BinaryIO],
         *,
+        model: str = ...,
         prompt: str = ...,
         response_format: str = ...,
         temperature: float = ...,
         **kwargs,
-    ):
+    ) -> openai.Audio:
+        """Translates audio into English.
+
+        :param file: The audio file object (not file name) to transcribe,
+         in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+        :keyword model: ID of the model to use. Only whisper-1 is currently available.
+        :keyword prompt: An optional text to guide the model's style or 
+         continue a previous audio segment. The prompt should be in English.
+        :keyword response_format: The format of the transcript output, in one of 
+         these options: json, text, srt, verbose_json, or vtt.
+        :keyword temperature: The sampling temperature, between 0 and 1. Higher values 
+         like 0.8 will make the output more random, while lower values like 0.2 will 
+         make it more focused and deterministic. If set to 0, the model will use log 
+         probability to automatically increase the temperature until certain thresholds 
+         are hit.
+        """
         self._populate_args(
             kwargs,
             file=file,
+            model=model,
             prompt=prompt,
             response_format=response_format,
             temperature=temperature,
@@ -963,43 +1287,34 @@ async def atranslate_audio(
         self,
         file: Union[bytes, BinaryIO],
         *,
+        model: str = ...,
         prompt: str = ...,
         response_format: str = ...,
         temperature: float = ...,
         **kwargs,
-    ):
+    ) -> openai.Audio:
+        """Translates audio into English.
+
+        :param file: The audio file object (not file name) to transcribe,
+         in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+        :keyword model: ID of the model to use. Only whisper-1 is currently available.
+        :keyword prompt: An optional text to guide the model's style or 
+         continue a previous audio segment. The prompt should be in English.
+        :keyword response_format: The format of the transcript output, in one of 
+         these options: json, text, srt, verbose_json, or vtt.
+        :keyword temperature: The sampling temperature, between 0 and 1. Higher values 
+         like 0.8 will make the output more random, while lower values like 0.2 will 
+         make it more focused and deterministic. If set to 0, the model will use log 
+         probability to automatically increase the temperature until certain thresholds 
+         are hit.
+        """
         self._populate_args(
             kwargs,
             file=file,
+            model=model,
             prompt=prompt,
             response_format=response_format,
             temperature=temperature,
         )
         self._normalize_model(kwargs)
         return cast(openai.Audio, await openai.Audio.atranslate(**kwargs))
-
-if __name__ == "__main__":
-    client = OpenAIClient(
-        api_base="https://achand-openai-0.openai.azure.com/",
-        auth="azuredefault",
-        backend="azure",
-    )
-    print(client.completion("what is up, my friend?", model="chatgpt"))
-    # print(client.embeddings("What, or what is this?", model="arch")) # Doesn't work 'cause it is the wrong model...
-
-    import asyncio
-
-    async def stream_chat():
-        respco = await client.aiter_completion(
-            "what is up, my friend?", model="chatgpt"
-        )
-        async for rsp in respco:
-            print(rsp)
-
-    asyncio.run(stream_chat())
-
-    oaiclient = OpenAIClient()
-    print(oaiclient.completion("what is up, my friend?", model="text-davinci-003"))
-    print(oaiclient.embeddings("What are embeddings?", model="text-embedding-ada-002"))
-    rsp = oaiclient.image("Happy cattle", response_format="b64_json")
-    print(rsp)

From e82ed27580195665e20a570c80a5bb1616bf8be8 Mon Sep 17 00:00:00 2001
From: Krista Pratico <krpratic@microsoft.com>
Date: Wed, 19 Jul 2023 16:55:58 -0700
Subject: [PATCH 17/18] fix find/replace mistake

---
 openai/client.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index 7423bb704f..8de5bf9b81 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -4,6 +4,7 @@
 from typing import (
     Union,
     Any,
+    Dict,
     cast,
     Iterable,
     BinaryIO,
@@ -166,7 +167,7 @@ def __repr__(self) -> str:
         ]
         return f"OpenAIClient({','.join(constructor_args)})"
     
-    def _populate_args(self, kwargs: Mapping[str, Any], **overrides) -> None:
+    def _populate_args(self, kwargs: Dict[str, Any], **overrides) -> None:
         """Populate default arguments based on the current client configuration/defaults
 
         :param kwargs: The keyword arguments to send in the API request.
@@ -186,7 +187,7 @@ def _populate_args(self, kwargs: Mapping[str, Any], **overrides) -> None:
             if kwargs[key] != val:
                 raise TypeError(f"No parameter named `{key}`")
 
-    def _normalize_model(self, kwargs: Mapping[str, Any]):
+    def _normalize_model(self, kwargs: Dict[str, Any]):
         """Normalize model/engine/deployment_id based on which backend the client is
         configured to target.
 

From 0cfaaff69e81268a0f24aebf138ecbdfc39c681a Mon Sep 17 00:00:00 2001
From: Krista Pratico <krpratic@microsoft.com>
Date: Mon, 24 Jul 2023 10:47:57 -0700
Subject: [PATCH 18/18] add missing tests + adjust moderation kwargs passed

---
 openai/client.py            | 14 ++++++---
 openai/tests/test_client.py | 61 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/openai/client.py b/openai/client.py
index 8de5bf9b81..b8e9bb6b3d 100644
--- a/openai/client.py
+++ b/openai/client.py
@@ -587,7 +587,7 @@ def chatcompletion(
         :keyword function_call: Controls how the model responds to function calls.
          "none" means the model does not call a function, and responds to the
          end-user. "auto" means the model can pick between an end-user or calling
-         a function. Specifying a particular function via {"name":\ "my_function"}
+         a function. Specifying a particular function via {"name": "my_function"}
          forces the model to call that function. "none" is the default when no
          functions are present. "auto" is the default if functions are present.
         :keyword temperature: What sampling temperature to use, between 0 and 2.
@@ -663,7 +663,7 @@ async def achatcompletion(
         :keyword function_call: Controls how the model responds to function calls.
          "none" means the model does not call a function, and responds to the
          end-user. "auto" means the model can pick between an end-user or calling
-         a function. Specifying a particular function via {"name":\ "my_function"}
+         a function. Specifying a particular function via {"name": "my_function"}
          forces the model to call that function. "none" is the default when no
          functions are present. "auto" is the default if functions are present.
         :keyword temperature: What sampling temperature to use, between 0 and 2.
@@ -740,7 +740,7 @@ def iter_chatcompletion(
         :keyword function_call: Controls how the model responds to function calls.
          "none" means the model does not call a function, and responds to the
          end-user. "auto" means the model can pick between an end-user or calling
-         a function. Specifying a particular function via {"name":\ "my_function"}
+         a function. Specifying a particular function via {"name": "my_function"}
          forces the model to call that function. "none" is the default when no
          functions are present. "auto" is the default if functions are present.
         :keyword temperature: What sampling temperature to use, between 0 and 2.
@@ -817,7 +817,7 @@ async def aiter_chatcompletion(
         :keyword function_call: Controls how the model responds to function calls.
          "none" means the model does not call a function, and responds to the
          end-user. "auto" means the model can pick between an end-user or calling
-         a function. Specifying a particular function via {"name":\ "my_function"}
+         a function. Specifying a particular function via {"name": "my_function"}
          forces the model to call that function. "none" is the default when no
          functions are present. "auto" is the default if functions are present.
         :keyword temperature: What sampling temperature to use, between 0 and 2.
@@ -1146,6 +1146,9 @@ def moderation(
             model=model,
         )
         self._normalize_model(kwargs)
+        kwargs.pop("api_base")
+        kwargs.pop("api_type")
+        kwargs.pop("organization")
         return cast(openai.Moderation, openai.Moderation.create(**kwargs))
 
     async def amoderation(
@@ -1166,6 +1169,9 @@ async def amoderation(
             model=model,
         )
         self._normalize_model(kwargs)
+        kwargs.pop("api_base")
+        kwargs.pop("api_type")
+        kwargs.pop("organization")
         return cast(openai.Moderation, await openai.Moderation.acreate(**kwargs))
 
     def transcribe_audio(
diff --git a/openai/tests/test_client.py b/openai/tests/test_client.py
index 7cccadf3f4..889c7f9430 100644
--- a/openai/tests/test_client.py
+++ b/openai/tests/test_client.py
@@ -7,13 +7,14 @@
 API_BASE = os.environ["AZURE_API_BASE"]
 AZURE_API_KEY = os.environ["AZURE_KEY"]
 OPENAI_API_KEY = os.environ["OPENAI_KEY"]
-API_VERSION  = "2023-06-01-preview"
+API_VERSION  = "2023-07-01-preview"
 COMPLETION_MODEL = "text-davinci-003"
 CHAT_COMPLETION_MODEL = "gpt-35-turbo"
 CHAT_COMPLETION_MODEL_OPENAI = "gpt-3.5-turbo"
 EMBEDDINGS_MODEL = "text-embedding-ada-002"
 IMAGE_PATH = ""
 MASK_IMAGE_PATH = ""
+AUDIO_FILE_PATH = ""
 
 
 @pytest.fixture
@@ -426,3 +427,61 @@ async def test_client_aimage_edit(client):
         size="1024x1024"
     )
     assert edit
+
+# MODERATION TESTS
+@pytest.mark.parametrize("api_type", ["openai"])
+def test_client_moderation(client):
+    mod = client.moderation(
+        input="hello world",
+        model="text-moderation-latest"
+    )
+    assert mod
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", ["openai"])
+async def test_client_amoderation(client):
+    mod = await client.amoderation(
+        input="hello world",
+        model="text-moderation-latest"
+    )
+    assert mod
+
+# AUDIO TRANSCRIBE TESTS
+@pytest.mark.parametrize("api_type", ["openai"])
+def test_client_transcribe_audio(client):
+    file = open(AUDIO_FILE_PATH, "rb")
+    audio = client.transcribe_audio(
+        file=file,
+        model="whisper-1"
+    )
+    assert audio
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", ["openai"])
+async def test_client_atranscribe_audio(client):
+    file = open(AUDIO_FILE_PATH, "rb")
+    audio = await client.atranscribe_audio(
+        file=file,
+        model="whisper-1"
+    )
+    assert audio
+
+# AUDIO TRANSLATE TESTS
+@pytest.mark.parametrize("api_type", ["openai"])
+def test_client_translate_audio(client):
+    file = open(AUDIO_FILE_PATH, "rb")
+    audio = client.translate_audio(
+        file=file,
+        model="whisper-1"
+    )
+    assert audio
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("api_type", ["openai"])
+async def test_client_atranslate_audio(client):
+    file = open(AUDIO_FILE_PATH, "rb")
+    audio = await client.atranslate_audio(
+        file=file,
+        model="whisper-1"
+    )
+    assert audio