Skip to content

Chat Completions

openwebui_client.completions

OpenWebUI completions class for handling file parameters in chat completions.

OpenWebUICompletions

Bases: Completions

Extended Completions class that supports the 'files' parameter for OpenWebUI.

Source code in openwebui_client/completions.py
class OpenWebUICompletions(Completions):
    """Extended Completions class that supports the 'files' parameter for OpenWebUI."""

    def __init__(self, client: OpenAI) -> None:
        """Initialize the OpenWebUI completions handler.

        Args:
            client: The OpenAI client to use for requests
        """
        # Pass the full OpenAI client, not just its internal client
        super().__init__(client=client)

    @required_args(["messages", "model"], ["messages", "model", "stream"])
    def create(  # pyright: ignore[reportIncompatibleMethodOverride]
        self,
        *,
        messages: Iterable[ChatCompletionMessageParam],
        model: Union[str, ChatModel],
        audio: Union[Optional[ChatCompletionAudioParam], NotGiven] = NOT_GIVEN,
        files: Union[Optional[Collection[FileObject]], NotGiven] = NOT_GIVEN,
        frequency_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN,
        function_call: Union[
            completion_create_params.FunctionCall, NotGiven
        ] = NOT_GIVEN,
        functions: Union[
            Iterable[completion_create_params.Function], NotGiven
        ] = NOT_GIVEN,
        logit_bias: Union[Optional[Dict[str, int]], NotGiven] = NOT_GIVEN,
        logprobs: Union[Optional[bool], NotGiven] = NOT_GIVEN,
        max_completion_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN,
        max_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN,
        metadata: Union[Optional[Metadata], NotGiven] = NOT_GIVEN,
        modalities: Union[
            Optional[List[Literal["text", "audio"]]], NotGiven
        ] = NOT_GIVEN,
        n: Union[Optional[int], NotGiven] = NOT_GIVEN,
        parallel_tool_calls: Union[bool, NotGiven] = NOT_GIVEN,
        prediction: Union[
            Optional[ChatCompletionPredictionContentParam], NotGiven
        ] = NOT_GIVEN,
        presence_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN,
        reasoning_effort: Union[Optional[ReasoningEffort], NotGiven] = NOT_GIVEN,
        response_format: Union[
            completion_create_params.ResponseFormat, NotGiven
        ] = NOT_GIVEN,
        seed: Union[Optional[int], NotGiven] = NOT_GIVEN,
        service_tier: Union[
            Optional[Literal["auto", "default", "flex"]], NotGiven
        ] = NOT_GIVEN,
        stop: Union[Optional[str], List[str], None, NotGiven] = NOT_GIVEN,
        store: Union[Optional[bool], NotGiven] = NOT_GIVEN,
        stream: Union[Optional[Literal[False]], Literal[True], NotGiven] = NOT_GIVEN,
        stream_options: Union[
            Optional[ChatCompletionStreamOptionsParam], NotGiven
        ] = NOT_GIVEN,
        temperature: Union[Optional[float], NotGiven] = NOT_GIVEN,
        tool_choice: Union[ChatCompletionToolChoiceOptionParam, NotGiven] = NOT_GIVEN,
        tools: Union[Iterable[ChatCompletionToolParam], NotGiven] = NOT_GIVEN,
        top_logprobs: Union[Optional[int], NotGiven] = NOT_GIVEN,
        top_p: Union[Optional[float], NotGiven] = NOT_GIVEN,
        user: Union[str, NotGiven] = NOT_GIVEN,
        web_search_options: Union[
            completion_create_params.WebSearchOptions, NotGiven
        ] = NOT_GIVEN,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Union[Headers, None] = None,
        extra_query: Union[Query, None] = None,
        extra_body: Union[Body, None] = None,
        timeout: Union[float, httpx.Timeout, None, NotGiven] = NOT_GIVEN,
    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
        """Create a chat completion with support for the 'files' parameter.

        This overrides the standard create method to handle the 'files' parameter
        that OpenWebUI supports but is not in the standard OpenAI API.

        Args:
            messages: A list of messages comprising the conversation so far.
            model: ID of the model to use.
            files: A list of file IDs to attach to the completion request (OpenWebUI specific).

            # Standard OpenAI parameters, see OpenAI API docs for details
            audio: Audio input parameters.
            frequency_penalty: Penalizes repeated tokens according to frequency.
            function_call: Controls how the model uses functions.
            functions: Functions the model may call to interact with external systems.
            logit_bias: Modifies likelihood of specific tokens appearing in completion.
            logprobs: Whether to return log probabilities of the output tokens.
            max_completion_tokens: Maximum number of tokens that can be generated for completions.
            max_tokens: Maximum number of tokens to generate in the response.
            metadata: Additional metadata to include in the completion.
            modalities: List of modalities the model should handle.
            n: How many completions to generate for each prompt.
            parallel_tool_calls: Whether function and tool calls should be made in parallel.
            prediction: Control specifics of prediction content.
            presence_penalty: Penalizes new tokens based on their presence so far.
            reasoning_effort: Controls how much effort the model spends reasoning.
            response_format: Format in which the model should generate responses.
            seed: Enables deterministic sampling for consistent outputs.
            service_tier: The service tier to use for the request.
            stop: Sequences where the API will stop generating further tokens.
            store: Whether to persist completion for future retrieval.
            stream: Whether to stream back partial progress.
            stream_options: Options for streaming responses.
            temperature: Controls randomness in the response.
            tool_choice: Controls how the model selects tools.
            tools: List of tools the model may call.
            top_logprobs: Number of log probabilities to return per token.
            top_p: Controls diversity via nucleus sampling.
            user: Unique identifier representing your end-user.
            web_search_options: Options to configure web search behavior.

            # Additional parameters for HTTP requests
            extra_headers: Additional HTTP headers.
            extra_query: Additional query parameters.
            extra_body: Additional body parameters.
            timeout: Request timeout in seconds.

        Returns:
            A ChatCompletion object containing the model's response.
        """
        # Extract and handle the 'files' parameter specially
        # Handle special case for files parameter
        if files:
            _logger.debug(f"Including {len(files)} files in chat completion request")

            # When files are provided, we need to handle the request manually
            # because the OpenAI API doesn't support this parameter

            # Create a dictionary of parameters for the API call, excluding special parameters
            request_data = {
                k: v
                for k, v in locals().items()
                if k != "self" and (k is not None or k != NOT_GIVEN) and "__" not in k
            }

            # Make the request using direct HTTP request
            # OpenWebUI requires files as a parameter in the form data

            import requests

            # Extract the base URL from the client
            base_url = str(self._client.base_url).rstrip("/")

            # Construct the full URL - try the api prefix again
            url = f"{base_url}/chat/completions"

            # Set up authentication headers and content type for JSON
            headers = {
                "Authorization": f"Bearer {self._client.api_key}",
                "Content-Type": "application/json",
            }

            # Let's try another approach - create a JSON payload with all parameters
            # Then add that payload as 'json' parameter in form data
            payload = {
                "model": model,
                "messages": [
                    {"role": m["role"], "content": m.get("content")} for m in messages
                ],
                "max_tokens": max_tokens if max_tokens is not NOT_GIVEN else None,
            }

            # Add any additional OpenAI parameters to the payload
            for key, value in request_data.items():
                if (
                    key not in ["self", "files", "messages", "model", "max_tokens"]
                    and value is not NOT_GIVEN
                    and value is not None
                ):
                    payload[key] = value

            # Add file references if provided
            # Try sending file IDs in the format OpenWebUI expects
            if files:
                # Based on error messages, let's try a different format
                # Check OpenWebUI's API source to see expected format
                [f.id for f in files]

                # Format files exactly as shown in OpenWebUI's API docs
                formatted_files = [{"type": "file", "id": f.id} for f in files]
                payload["files"] = formatted_files

                # Log additional debug info about the file objects
                for i, file in enumerate(files):
                    _logger.debug(
                        f"File {i} details: id={file.id}, filename={getattr(file, 'filename', None)}"
                    )

            # No need for form data structure, send the payload directly as JSON

            # Print detailed request information
            _logger.debug(f"CHAT API - URL: {url}")
            _logger.debug(f"CHAT API - Headers: {headers}")
            _logger.debug(f"CHAT API - Payload: {payload}")
            float_timeout: float
            if timeout is not NOT_GIVEN and timeout is not None:
                if isinstance(timeout, Timeout):
                    float_timeout = (timeout.connect or 0) + (timeout.read or 0) or 60
                    if float_timeout == 0:
                        float_timeout = 60.0
                else:
                    float_timeout = float(timeout or 60.0)
            else:
                float_timeout = 60.0

            # Make the HTTP request with JSON payload
            http_response = requests.post(
                url, headers=headers, json=payload, timeout=float_timeout
            )

            # Print response details
            _logger.debug(f"CHAT API - Response Status: {http_response.status_code}")
            _logger.debug(f"CHAT API - Response Headers: {dict(http_response.headers)}")
            _logger.debug(
                f"CHAT API - Response Body: {http_response.text[:500]}..."
                if len(http_response.text) > 500
                else f"CHAT API - Response Body: {http_response.text}"
            )

            # Raise an exception for any HTTP error
            http_response.raise_for_status()

            # Parse the JSON response
            response_data = http_response.json()

            # Convert the response to a ChatCompletion object
            response = ChatCompletion(**response_data)
            return response
        else:
            # Without files, delegate to the parent implementation
            # Just don't pass the 'files' parameter which is None anyway
            standard_kwargs = {
                k: v
                for k, v in locals().items()
                if k not in ["self", "files"] and "__" not in k
            }
            return super().create(**standard_kwargs)

__init__(client)

Initialize the OpenWebUI completions handler.

Parameters:

Name Type Description Default
client OpenAI

The OpenAI client to use for requests

required
Source code in openwebui_client/completions.py
def __init__(self, client: OpenAI) -> None:
    """Initialize the OpenWebUI completions handler.

    Args:
        client: The OpenAI client to use for requests
    """
    # Pass the full OpenAI client, not just its internal client
    super().__init__(client=client)

create(*, messages, model, audio=NOT_GIVEN, files=NOT_GIVEN, frequency_penalty=NOT_GIVEN, function_call=NOT_GIVEN, functions=NOT_GIVEN, logit_bias=NOT_GIVEN, logprobs=NOT_GIVEN, max_completion_tokens=NOT_GIVEN, max_tokens=NOT_GIVEN, metadata=NOT_GIVEN, modalities=NOT_GIVEN, n=NOT_GIVEN, parallel_tool_calls=NOT_GIVEN, prediction=NOT_GIVEN, presence_penalty=NOT_GIVEN, reasoning_effort=NOT_GIVEN, response_format=NOT_GIVEN, seed=NOT_GIVEN, service_tier=NOT_GIVEN, stop=NOT_GIVEN, store=NOT_GIVEN, stream=NOT_GIVEN, stream_options=NOT_GIVEN, temperature=NOT_GIVEN, tool_choice=NOT_GIVEN, tools=NOT_GIVEN, top_logprobs=NOT_GIVEN, top_p=NOT_GIVEN, user=NOT_GIVEN, web_search_options=NOT_GIVEN, extra_headers=None, extra_query=None, extra_body=None, timeout=NOT_GIVEN)

Create a chat completion with support for the 'files' parameter.

This overrides the standard create method to handle the 'files' parameter that OpenWebUI supports but is not in the standard OpenAI API.

Parameters:

Name Type Description Default
messages Iterable[ChatCompletionMessageParam]

A list of messages comprising the conversation so far.

required
model Union[str, ChatModel]

ID of the model to use.

required
files Union[Optional[Collection[FileObject]], NotGiven]

A list of file IDs to attach to the completion request (OpenWebUI specific).

NOT_GIVEN
audio Union[Optional[ChatCompletionAudioParam], NotGiven]

Audio input parameters.

NOT_GIVEN
frequency_penalty Union[Optional[float], NotGiven]

Penalizes repeated tokens according to frequency.

NOT_GIVEN
function_call Union[FunctionCall, NotGiven]

Controls how the model uses functions.

NOT_GIVEN
functions Union[Iterable[Function], NotGiven]

Functions the model may call to interact with external systems.

NOT_GIVEN
logit_bias Union[Optional[Dict[str, int]], NotGiven]

Modifies likelihood of specific tokens appearing in completion.

NOT_GIVEN
logprobs Union[Optional[bool], NotGiven]

Whether to return log probabilities of the output tokens.

NOT_GIVEN
max_completion_tokens Union[Optional[int], NotGiven]

Maximum number of tokens that can be generated for completions.

NOT_GIVEN
max_tokens Union[Optional[int], NotGiven]

Maximum number of tokens to generate in the response.

NOT_GIVEN
metadata Union[Optional[Metadata], NotGiven]

Additional metadata to include in the completion.

NOT_GIVEN
modalities Union[Optional[List[Literal['text', 'audio']]], NotGiven]

List of modalities the model should handle.

NOT_GIVEN
n Union[Optional[int], NotGiven]

How many completions to generate for each prompt.

NOT_GIVEN
parallel_tool_calls Union[bool, NotGiven]

Whether function and tool calls should be made in parallel.

NOT_GIVEN
prediction Union[Optional[ChatCompletionPredictionContentParam], NotGiven]

Control specifics of prediction content.

NOT_GIVEN
presence_penalty Union[Optional[float], NotGiven]

Penalizes new tokens based on their presence so far.

NOT_GIVEN
reasoning_effort Union[Optional[ReasoningEffort], NotGiven]

Controls how much effort the model spends reasoning.

NOT_GIVEN
response_format Union[ResponseFormat, NotGiven]

Format in which the model should generate responses.

NOT_GIVEN
seed Union[Optional[int], NotGiven]

Enables deterministic sampling for consistent outputs.

NOT_GIVEN
service_tier Union[Optional[Literal['auto', 'default', 'flex']], NotGiven]

The service tier to use for the request.

NOT_GIVEN
stop Union[Optional[str], List[str], None, NotGiven]

Sequences where the API will stop generating further tokens.

NOT_GIVEN
store Union[Optional[bool], NotGiven]

Whether to persist completion for future retrieval.

NOT_GIVEN
stream Union[Optional[Literal[False]], Literal[True], NotGiven]

Whether to stream back partial progress.

NOT_GIVEN
stream_options Union[Optional[ChatCompletionStreamOptionsParam], NotGiven]

Options for streaming responses.

NOT_GIVEN
temperature Union[Optional[float], NotGiven]

Controls randomness in the response.

NOT_GIVEN
tool_choice Union[ChatCompletionToolChoiceOptionParam, NotGiven]

Controls how the model selects tools.

NOT_GIVEN
tools Union[Iterable[ChatCompletionToolParam], NotGiven]

List of tools the model may call.

NOT_GIVEN
top_logprobs Union[Optional[int], NotGiven]

Number of log probabilities to return per token.

NOT_GIVEN
top_p Union[Optional[float], NotGiven]

Controls diversity via nucleus sampling.

NOT_GIVEN
user Union[str, NotGiven]

Unique identifier representing your end-user.

NOT_GIVEN
web_search_options Union[WebSearchOptions, NotGiven]

Options to configure web search behavior.

NOT_GIVEN
extra_headers Union[Headers, None]

Additional HTTP headers.

None
extra_query Union[Query, None]

Additional query parameters.

None
extra_body Union[Body, None]

Additional body parameters.

None
timeout Union[float, Timeout, None, NotGiven]

Request timeout in seconds.

NOT_GIVEN

Returns:

Type Description
Union[ChatCompletion, Stream[ChatCompletionChunk]]

A ChatCompletion object containing the model's response.

Source code in openwebui_client/completions.py
@required_args(["messages", "model"], ["messages", "model", "stream"])
def create(  # pyright: ignore[reportIncompatibleMethodOverride]
    self,
    *,
    messages: Iterable[ChatCompletionMessageParam],
    model: Union[str, ChatModel],
    audio: Union[Optional[ChatCompletionAudioParam], NotGiven] = NOT_GIVEN,
    files: Union[Optional[Collection[FileObject]], NotGiven] = NOT_GIVEN,
    frequency_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN,
    function_call: Union[
        completion_create_params.FunctionCall, NotGiven
    ] = NOT_GIVEN,
    functions: Union[
        Iterable[completion_create_params.Function], NotGiven
    ] = NOT_GIVEN,
    logit_bias: Union[Optional[Dict[str, int]], NotGiven] = NOT_GIVEN,
    logprobs: Union[Optional[bool], NotGiven] = NOT_GIVEN,
    max_completion_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN,
    max_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN,
    metadata: Union[Optional[Metadata], NotGiven] = NOT_GIVEN,
    modalities: Union[
        Optional[List[Literal["text", "audio"]]], NotGiven
    ] = NOT_GIVEN,
    n: Union[Optional[int], NotGiven] = NOT_GIVEN,
    parallel_tool_calls: Union[bool, NotGiven] = NOT_GIVEN,
    prediction: Union[
        Optional[ChatCompletionPredictionContentParam], NotGiven
    ] = NOT_GIVEN,
    presence_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN,
    reasoning_effort: Union[Optional[ReasoningEffort], NotGiven] = NOT_GIVEN,
    response_format: Union[
        completion_create_params.ResponseFormat, NotGiven
    ] = NOT_GIVEN,
    seed: Union[Optional[int], NotGiven] = NOT_GIVEN,
    service_tier: Union[
        Optional[Literal["auto", "default", "flex"]], NotGiven
    ] = NOT_GIVEN,
    stop: Union[Optional[str], List[str], None, NotGiven] = NOT_GIVEN,
    store: Union[Optional[bool], NotGiven] = NOT_GIVEN,
    stream: Union[Optional[Literal[False]], Literal[True], NotGiven] = NOT_GIVEN,
    stream_options: Union[
        Optional[ChatCompletionStreamOptionsParam], NotGiven
    ] = NOT_GIVEN,
    temperature: Union[Optional[float], NotGiven] = NOT_GIVEN,
    tool_choice: Union[ChatCompletionToolChoiceOptionParam, NotGiven] = NOT_GIVEN,
    tools: Union[Iterable[ChatCompletionToolParam], NotGiven] = NOT_GIVEN,
    top_logprobs: Union[Optional[int], NotGiven] = NOT_GIVEN,
    top_p: Union[Optional[float], NotGiven] = NOT_GIVEN,
    user: Union[str, NotGiven] = NOT_GIVEN,
    web_search_options: Union[
        completion_create_params.WebSearchOptions, NotGiven
    ] = NOT_GIVEN,
    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
    # The extra values given here take precedence over values defined on the client or passed to this method.
    extra_headers: Union[Headers, None] = None,
    extra_query: Union[Query, None] = None,
    extra_body: Union[Body, None] = None,
    timeout: Union[float, httpx.Timeout, None, NotGiven] = NOT_GIVEN,
) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
    """Create a chat completion with support for the 'files' parameter.

    This overrides the standard create method to handle the 'files' parameter
    that OpenWebUI supports but is not in the standard OpenAI API.

    Args:
        messages: A list of messages comprising the conversation so far.
        model: ID of the model to use.
        files: A list of file IDs to attach to the completion request (OpenWebUI specific).

        # Standard OpenAI parameters, see OpenAI API docs for details
        audio: Audio input parameters.
        frequency_penalty: Penalizes repeated tokens according to frequency.
        function_call: Controls how the model uses functions.
        functions: Functions the model may call to interact with external systems.
        logit_bias: Modifies likelihood of specific tokens appearing in completion.
        logprobs: Whether to return log probabilities of the output tokens.
        max_completion_tokens: Maximum number of tokens that can be generated for completions.
        max_tokens: Maximum number of tokens to generate in the response.
        metadata: Additional metadata to include in the completion.
        modalities: List of modalities the model should handle.
        n: How many completions to generate for each prompt.
        parallel_tool_calls: Whether function and tool calls should be made in parallel.
        prediction: Control specifics of prediction content.
        presence_penalty: Penalizes new tokens based on their presence so far.
        reasoning_effort: Controls how much effort the model spends reasoning.
        response_format: Format in which the model should generate responses.
        seed: Enables deterministic sampling for consistent outputs.
        service_tier: The service tier to use for the request.
        stop: Sequences where the API will stop generating further tokens.
        store: Whether to persist completion for future retrieval.
        stream: Whether to stream back partial progress.
        stream_options: Options for streaming responses.
        temperature: Controls randomness in the response.
        tool_choice: Controls how the model selects tools.
        tools: List of tools the model may call.
        top_logprobs: Number of log probabilities to return per token.
        top_p: Controls diversity via nucleus sampling.
        user: Unique identifier representing your end-user.
        web_search_options: Options to configure web search behavior.

        # Additional parameters for HTTP requests
        extra_headers: Additional HTTP headers.
        extra_query: Additional query parameters.
        extra_body: Additional body parameters.
        timeout: Request timeout in seconds.

    Returns:
        A ChatCompletion object containing the model's response.
    """
    # Extract and handle the 'files' parameter specially
    # Handle special case for files parameter
    if files:
        _logger.debug(f"Including {len(files)} files in chat completion request")

        # When files are provided, we need to handle the request manually
        # because the OpenAI API doesn't support this parameter

        # Create a dictionary of parameters for the API call, excluding special parameters
        request_data = {
            k: v
            for k, v in locals().items()
            if k != "self" and (k is not None or k != NOT_GIVEN) and "__" not in k
        }

        # Make the request using direct HTTP request
        # OpenWebUI requires files as a parameter in the form data

        import requests

        # Extract the base URL from the client
        base_url = str(self._client.base_url).rstrip("/")

        # Construct the full URL - try the api prefix again
        url = f"{base_url}/chat/completions"

        # Set up authentication headers and content type for JSON
        headers = {
            "Authorization": f"Bearer {self._client.api_key}",
            "Content-Type": "application/json",
        }

        # Let's try another approach - create a JSON payload with all parameters
        # Then add that payload as 'json' parameter in form data
        payload = {
            "model": model,
            "messages": [
                {"role": m["role"], "content": m.get("content")} for m in messages
            ],
            "max_tokens": max_tokens if max_tokens is not NOT_GIVEN else None,
        }

        # Add any additional OpenAI parameters to the payload
        for key, value in request_data.items():
            if (
                key not in ["self", "files", "messages", "model", "max_tokens"]
                and value is not NOT_GIVEN
                and value is not None
            ):
                payload[key] = value

        # Add file references if provided
        # Try sending file IDs in the format OpenWebUI expects
        if files:
            # Based on error messages, let's try a different format
            # Check OpenWebUI's API source to see expected format
            [f.id for f in files]

            # Format files exactly as shown in OpenWebUI's API docs
            formatted_files = [{"type": "file", "id": f.id} for f in files]
            payload["files"] = formatted_files

            # Log additional debug info about the file objects
            for i, file in enumerate(files):
                _logger.debug(
                    f"File {i} details: id={file.id}, filename={getattr(file, 'filename', None)}"
                )

        # No need for form data structure, send the payload directly as JSON

        # Print detailed request information
        _logger.debug(f"CHAT API - URL: {url}")
        _logger.debug(f"CHAT API - Headers: {headers}")
        _logger.debug(f"CHAT API - Payload: {payload}")
        float_timeout: float
        if timeout is not NOT_GIVEN and timeout is not None:
            if isinstance(timeout, Timeout):
                float_timeout = (timeout.connect or 0) + (timeout.read or 0) or 60
                if float_timeout == 0:
                    float_timeout = 60.0
            else:
                float_timeout = float(timeout or 60.0)
        else:
            float_timeout = 60.0

        # Make the HTTP request with JSON payload
        http_response = requests.post(
            url, headers=headers, json=payload, timeout=float_timeout
        )

        # Print response details
        _logger.debug(f"CHAT API - Response Status: {http_response.status_code}")
        _logger.debug(f"CHAT API - Response Headers: {dict(http_response.headers)}")
        _logger.debug(
            f"CHAT API - Response Body: {http_response.text[:500]}..."
            if len(http_response.text) > 500
            else f"CHAT API - Response Body: {http_response.text}"
        )

        # Raise an exception for any HTTP error
        http_response.raise_for_status()

        # Parse the JSON response
        response_data = http_response.json()

        # Convert the response to a ChatCompletion object
        response = ChatCompletion(**response_data)
        return response
    else:
        # Without files, delegate to the parent implementation
        # Just don't pass the 'files' parameter which is None anyway
        standard_kwargs = {
            k: v
            for k, v in locals().items()
            if k not in ["self", "files"] and "__" not in k
        }
        return super().create(**standard_kwargs)