import os
import json
from unstructured_client import UnstructuredClient
from unstructured_client.models.operations import PartitionRequest
from unstructured_client.models.shared import (
    PartitionParameters,
    Files,
    Strategy
)
from unstructured_client.models.errors import (
    UnstructuredClientError,
    HTTPValidationError
)
from unstructured_client.models.errors.servererror import ServerError
from unstructured_client.models.errors.responsevalidationerror import ResponseValidationError
import httpx
try:
    client = UnstructuredClient(
        # For example, intentionally leave out the API key to intentionally throw an error.
        # api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")
    )
    filename = "PATH_TO_INPUT_FILE"
    request = PartitionRequest(
        partition_parameters=PartitionParameters(
            files=Files(
                content=open(filename, "rb"),
                file_name=filename,
            ),
            strategy=Strategy.VLM,
            vlm_model="gpt-4o",
            vlm_model_provider="openai",
            languages=['eng'],
            split_pdf_page=True,            # If True, splits the PDF file into smaller chunks of pages.
            # split_pdf_allow_failed=True,  # If True, the partitioning continues even if some pages fail.
            split_pdf_concurrency_level=15  # Set the number of concurrent request to the maximum value: 15.
        ),
    )
    response = client.general.partition(
        request=request
    )
    element_dicts = [element for element in response.elements]
    
    # Print the processed data's first element only.
    print(element_dicts[0])
    # Write the processed data to a local file.
    json_elements = json.dumps(element_dicts, indent=2)
    with open("PATH_TO_OUTPUT_FILE", "w") as file:
        file.write(json_elements)
except HTTPValidationError as e:
    print("Validation error (HTTP 422):", e)
except ServerError as e:
    print("Server error (HTTP 5XX):", e)
except ResponseValidationError as e:
    print("Response validation/type mismatch:", e)
except UnstructuredClientError as e:
    # This catches any other UnstructuredClientError not already caught above.
    # This and all of the other error classes in this example expose the following members:
    print("Other Unstructured client error:")
    print(f"Message:      {e.message}")
    print(f"Status code:  {e.status_code}")
    print(f"Body:         {e.body}")
    print(f"Raw response: {e.raw_response}")
    print(f"Headers:")
    for header in e.headers.raw:
        key = header[0].decode('utf-8')
        value = header[1].decode('utf-8')
        print(f"              {key}: {value}")
except httpx.ConnectError as e:
    print("HTTP connection error:", e)
except httpx.TimeoutException as e:
    print("HTTP timeout error:", e)
except httpx.RequestError as e:
    # This catches catch-all network errors from HTTP not already caught above.
    print("Other HTTPX request error:", e)
except Exception as e:
    # Optional: this catches any other unforeseen errors.
    print("Unexpected error:", e)