Azure AI Search

If you’re new to Unstructured, read this note first.Before you can create a destination connector, you must first sign in to your Unstructured account:
If you do not already have an Unstructured account, go to https://unstructured.io/contact and fill out the online form to indicate your interest.
If you already have an Unstructured account, sign in by using the URL of the sign in page that Unstructured provided to you when your Unstructured account was created.
If you do not have this URL, contact Unstructured Sales at sales@unstructured.io.
After you sign in, the Unstructured user interface (UI) appears, which you use to get your Unstructured API key. To learn how, watch this 40-second how-to video.After you create the destination connector, add it along with a source connector to a workflow. Then run the worklow as a job. To learn how, try out the hands-on Workflow Endpoint quickstart, go directly to the quickstart notebook, or watch the two 4-minute video tutorials for the Unstructured Python SDK.You can also create destination connectors with the Unstructured user interface (UI). Learn how.If you need help, reach out to the community on Slack, or contact us directly.You are now ready to start creating a destination connector! Keep reading to learn how.
Send processed data from Unstructured to Azure AI Search. The requirements are as follows. The following video shows how to fulfill the minimum set of Azure AI Search requirements: Here are some more details about these requirements:
The endpoint and API key for Azure AI Search. Create an endpoint and API key.
The name of the index in Azure AI Search. Create an index. The Azure AI Search index that you use must have an index schema that is compatible with the schema of the documents that Unstructured produces for you. Unstructured cannot provide a schema that is guaranteed to work in all circumstances. This is because these schemas will vary based on your source files’ types; how you want Unstructured to partition, chunk, and generate embeddings; any custom post-processing code that you run; and other factors. You can adapt the following index schema example for your own needs:
{
  "@odata.context": "https://ingest-test-azure-ai-search.search.windows.net/$metadata#indexes/$entity",
  "@odata.etag": "\"0x8DCED5D96393CA9\"",
  "name": "<my-index-name>",
  "defaultScoringProfile": null,
  "fields": [
    {
      "name": "id",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "stored": true,
      "sortable": true,
      "facetable": true,
      "key": true,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "record_id",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "stored": true,
      "sortable": true,
      "facetable": true,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "element_id",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "stored": true,
      "sortable": true,
      "facetable": true,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "text",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "stored": true,
      "sortable": true,
      "facetable": true,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "embeddings",
      "type": "Collection(Edm.Single)",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": 3072,
      "vectorSearchProfile": "embeddings-config-profile",
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "type",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "stored": true,
      "sortable": true,
      "facetable": true,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata",
      "type": "Edm.ComplexType",
      "fields": [
        {
          "name": "category_depth",
          "type": "Edm.Int32",
          "searchable": false,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "parent_id",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "attached_to_filename",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "filetype",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "last_modified",
          "type": "Edm.DateTimeOffset",
          "searchable": false,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "is_continuation",
          "type": "Edm.Boolean",
          "searchable": false,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "file_directory",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "filename",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "data_source",
          "type": "Edm.ComplexType",
          "fields": [
            {
              "name": "url",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "version",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "date_created",
              "type": "Edm.DateTimeOffset",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "date_modified",
              "type": "Edm.DateTimeOffset",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "date_processed",
              "type": "Edm.DateTimeOffset",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "permissions_data",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "record_locator",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            }
          ]
        },
        {
          "name": "coordinates",
          "type": "Edm.ComplexType",
          "fields": [
            {
              "name": "system",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "layout_width",
              "type": "Edm.Double",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "layout_height",
              "type": "Edm.Double",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "points",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            }
          ]
        },
        {
          "name": "languages",
          "type": "Collection(Edm.String)",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "page_number",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "orig_elements",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "links",
          "type": "Collection(Edm.String)",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "page_name",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "url",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "link_urls",
          "type": "Collection(Edm.String)",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "link_texts",
          "type": "Collection(Edm.String)",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "sent_from",
          "type": "Collection(Edm.String)",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "sent_to",
          "type": "Collection(Edm.String)",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "subject",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "section",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "header_footer_type",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "emphasized_text_contents",
          "type": "Collection(Edm.String)",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "emphasized_text_tags",
          "type": "Collection(Edm.String)",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "text_as_html",
          "type": "Edm.String",
          "searchable": true,
          "filterable": false,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": false,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "regex_metadata",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "detection_class_prob",
          "type": "Edm.Double",
          "searchable": false,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "partitioner_type",
          "type": "Edm.String",
          "searchable": false,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        }
      ]
    }
  ],
  "scoringProfiles": [],
  "corsOptions": null,
  "suggesters": [],
  "analyzers": [],
  "normalizers": [],
  "tokenizers": [],
  "tokenFilters": [],
  "charFilters": [],
  "encryptionKey": null,
  "similarity": {
    "@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
    "k1": null,
    "b": null
  },
  "semantic": null,
  "vectorSearch": {
    "algorithms": [
      {
        "name": "embeddings-config",
        "kind": "hnsw",
        "hnswParameters": {
          "metric": "cosine",
          "m": 4,
          "efConstruction": 400,
          "efSearch": 500
        },
        "exhaustiveKnnParameters": null
      }
    ],
    "profiles": [
      {
        "name": "embeddings-config-profile",
        "algorithm": "embeddings-config",
        "vectorizer": null,
        "compression": null
      }
    ],
    "vectorizers": [],
    "compressions": []
  }
}
Unstructured API

Workflow Endpoint

Partition Endpoint

Legacy APIs

Troubleshooting