Send processed data from Unstructured to Azure AI Search.

The requirements are as follows.

The following video shows how to fulfill the minimum set of Azure AI Search requirements:

Here are some more details about these requirements:

  • The endpoint and API key for Azure AI Search. Create an endpoint and API key.

  • The name of the index in Azure AI Search. Create an index.

    The Azure AI Search index that you use must have an index schema that is compatible with the schema of the documents that Unstructured produces for you. Unstructured cannot provide a schema that is guaranteed to work in all circumstances. This is because these schemas will vary based on your source files’ types; how you want Unstructured to partition, chunk, and generate embeddings; any custom post-processing code that you run; and other factors.

    You can adapt the following index schema example for your own needs:

    {
      "@odata.context": "https://ingest-test-azure-ai-search.search.windows.net/$metadata#indexes/$entity",
      "@odata.etag": "\"0x8DCED5D96393CA9\"",
      "name": "<my-index-name>",
      "defaultScoringProfile": null,
      "fields": [
        {
          "name": "id",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": true,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "record_id",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": true,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "element_id",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "text",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "embeddings",
          "type": "Collection(Edm.Single)",
          "searchable": true,
          "filterable": false,
          "retrievable": true,
          "stored": true,
          "sortable": false,
          "facetable": false,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": 3072,
          "vectorSearchProfile": "embeddings-config-profile",
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "type",
          "type": "Edm.String",
          "searchable": true,
          "filterable": true,
          "retrievable": true,
          "stored": true,
          "sortable": true,
          "facetable": true,
          "key": false,
          "indexAnalyzer": null,
          "searchAnalyzer": null,
          "analyzer": null,
          "normalizer": null,
          "dimensions": null,
          "vectorSearchProfile": null,
          "vectorEncoding": null,
          "synonymMaps": []
        },
        {
          "name": "metadata",
          "type": "Edm.ComplexType",
          "fields": [
            {
              "name": "category_depth",
              "type": "Edm.Int32",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "parent_id",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "attached_to_filename",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "filetype",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "last_modified",
              "type": "Edm.DateTimeOffset",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "is_continuation",
              "type": "Edm.Boolean",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "file_directory",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "filename",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "data_source",
              "type": "Edm.ComplexType",
              "fields": [
                {
                  "name": "url",
                  "type": "Edm.String",
                  "searchable": true,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "version",
                  "type": "Edm.String",
                  "searchable": true,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "date_created",
                  "type": "Edm.DateTimeOffset",
                  "searchable": false,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "date_modified",
                  "type": "Edm.DateTimeOffset",
                  "searchable": false,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "date_processed",
                  "type": "Edm.DateTimeOffset",
                  "searchable": false,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "permissions_data",
                  "type": "Edm.String",
                  "searchable": true,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "record_locator",
                  "type": "Edm.String",
                  "searchable": true,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                }
              ]
            },
            {
              "name": "coordinates",
              "type": "Edm.ComplexType",
              "fields": [
                {
                  "name": "system",
                  "type": "Edm.String",
                  "searchable": true,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "layout_width",
                  "type": "Edm.Double",
                  "searchable": false,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "layout_height",
                  "type": "Edm.Double",
                  "searchable": false,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                },
                {
                  "name": "points",
                  "type": "Edm.String",
                  "searchable": true,
                  "filterable": true,
                  "retrievable": true,
                  "stored": true,
                  "sortable": true,
                  "facetable": true,
                  "key": false,
                  "indexAnalyzer": null,
                  "searchAnalyzer": null,
                  "analyzer": null,
                  "normalizer": null,
                  "dimensions": null,
                  "vectorSearchProfile": null,
                  "vectorEncoding": null,
                  "synonymMaps": []
                }
              ]
            },
            {
              "name": "languages",
              "type": "Collection(Edm.String)",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "page_number",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "orig_elements",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "links",
              "type": "Collection(Edm.String)",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "page_name",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "url",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "link_urls",
              "type": "Collection(Edm.String)",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "link_texts",
              "type": "Collection(Edm.String)",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "sent_from",
              "type": "Collection(Edm.String)",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "sent_to",
              "type": "Collection(Edm.String)",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "subject",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "section",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "header_footer_type",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "emphasized_text_contents",
              "type": "Collection(Edm.String)",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "emphasized_text_tags",
              "type": "Collection(Edm.String)",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "text_as_html",
              "type": "Edm.String",
              "searchable": true,
              "filterable": false,
              "retrievable": true,
              "stored": true,
              "sortable": false,
              "facetable": false,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "regex_metadata",
              "type": "Edm.String",
              "searchable": true,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            },
            {
              "name": "detection_class_prob",
              "type": "Edm.Double",
              "searchable": false,
              "filterable": true,
              "retrievable": true,
              "stored": true,
              "sortable": true,
              "facetable": true,
              "key": false,
              "indexAnalyzer": null,
              "searchAnalyzer": null,
              "analyzer": null,
              "normalizer": null,
              "dimensions": null,
              "vectorSearchProfile": null,
              "vectorEncoding": null,
              "synonymMaps": []
            }
          ]
        }
      ],
      "scoringProfiles": [],
      "corsOptions": null,
      "suggesters": [],
      "analyzers": [],
      "normalizers": [],
      "tokenizers": [],
      "tokenFilters": [],
      "charFilters": [],
      "encryptionKey": null,
      "similarity": {
        "@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
        "k1": null,
        "b": null
      },
      "semantic": null,
      "vectorSearch": {
        "algorithms": [
          {
            "name": "embeddings-config",
            "kind": "hnsw",
            "hnswParameters": {
              "metric": "cosine",
              "m": 4,
              "efConstruction": 400,
              "efSearch": 500
            },
            "exhaustiveKnnParameters": null
          }
        ],
        "profiles": [
          {
            "name": "embeddings-config-profile",
            "algorithm": "embeddings-config",
            "vectorizer": null,
            "compression": null
          }
        ],
        "vectorizers": [],
        "compressions": []
      }
    }
    

    See also:

To create or change an Azure AI Search destination connector, see the following examples.

Replace the preceding placeholders as follows:

  • <name> (required) - A unique name for this connector.
  • <endpoint> (required) - The endpoint URL for Azure AI Search.
  • <index> (required) - The name of the index for Azure AI Search.
  • <azure-ai-search-key> (required) - The API key for Azure AI Search.

To change a connector, replace <connector-id> with the source connector’s unique ID. To get this ID, see List source connectors.