为数据源创建自定义 Data Catalog 条目

本文档介绍了如何创建自定义 Data Catalog 条目。如需集成自定义数据源,请调用 Data Catalog API,以便创建和管理带有自定义数据资源类型的 Data Catalog 条目。在本文档中,自定义数据资源类型的条目称为“自定义条目”。

创建条目组和自定义条目

自定义条目必须位于用户创建的条目组中。您创建条目组,然后在条目组中创建自定义条目。 如需了解详情,请参阅条目和条目组

创建条目后,您可以针对条目组设置 IAM 政策,以定义谁有权访问条目群组以及其中的条目。

Java

在试用此示例之前,请按照 Data Catalog 快速入门:使用客户端库中的 Java 设置说明进行操作。 如需了解详情,请参阅 Data Catalog Java API 参考文档

如需向 Data Catalog 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

import com.google.cloud.datacatalog.v1.ColumnSchema;
import com.google.cloud.datacatalog.v1.CreateEntryGroupRequest;
import com.google.cloud.datacatalog.v1.CreateEntryRequest;
import com.google.cloud.datacatalog.v1.DataCatalogClient;
import com.google.cloud.datacatalog.v1.Entry;
import com.google.cloud.datacatalog.v1.EntryGroup;
import com.google.cloud.datacatalog.v1.LocationName;
import com.google.cloud.datacatalog.v1.Schema;
import java.io.IOException;

// Sample to create custom entry
public class CreateCustomEntry {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "my-project";
    String entryGroupId = "onprem_entry_group";
    String entryId = "onprem_entry_id";
    createCustomEntry(projectId, entryGroupId, entryId);
  }

  public static void createCustomEntry(String projectId, String entryGroupId, String entryId)
      throws IOException {
    // Currently, Data Catalog stores metadata in the us-central1 region.
    String location = "us-central1";

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DataCatalogClient dataCatalogClient = DataCatalogClient.create()) {
      // Construct the EntryGroup for the EntryGroup request.
      EntryGroup entryGroup =
          EntryGroup.newBuilder()
              .setDisplayName("My awesome Entry Group")
              .setDescription("This Entry Group represents an external system")
              .build();

      // Construct the EntryGroup request to be sent by the client.
      CreateEntryGroupRequest entryGroupRequest =
          CreateEntryGroupRequest.newBuilder()
              .setParent(LocationName.of(projectId, location).toString())
              .setEntryGroupId(entryGroupId)
              .setEntryGroup(entryGroup)
              .build();

      // Use the client to send the API request.
      EntryGroup createdEntryGroup = dataCatalogClient.createEntryGroup(entryGroupRequest);

      // Construct the Entry for the Entry request.
      Entry entry =
          Entry.newBuilder()
              .setUserSpecifiedSystem("onprem_data_system")
              .setUserSpecifiedType("onprem_data_asset")
              .setDisplayName("My awesome data asset")
              .setDescription("This data asset is managed by an external system.")
              .setLinkedResource("//my-onprem-server.com/dataAssets/my-awesome-data-asset")
              .setSchema(
                  Schema.newBuilder()
                      .addColumns(
                          ColumnSchema.newBuilder()
                              .setColumn("first_column")
                              .setDescription("This columns consists of ....")
                              .setMode("NULLABLE")
                              .setType("DOUBLE")
                              .build())
                      .addColumns(
                          ColumnSchema.newBuilder()
                              .setColumn("second_column")
                              .setDescription("This columns consists of ....")
                              .setMode("REQUIRED")
                              .setType("STRING")
                              .build())
                      .build())
              .build();

      // Construct the Entry request to be sent by the client.
      CreateEntryRequest entryRequest =
          CreateEntryRequest.newBuilder()
              .setParent(createdEntryGroup.getName())
              .setEntryId(entryId)
              .setEntry(entry)
              .build();

      // Use the client to send the API request.
      Entry createdEntry = dataCatalogClient.createEntry(entryRequest);
      System.out.printf("Custom entry created with name: %s", createdEntry.getName());
    }
  }
}

Node.js

在试用此示例之前,请按照 Data Catalog 快速入门:使用客户端库中的 Node.js 设置说明进行操作。 如需了解详情,请参阅 Data Catalog Node.js API 参考文档

如需向 Data Catalog 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

// Import the Google Cloud client library.
const {DataCatalogClient} = require('@google-cloud/datacatalog').v1;
const datacatalog = new DataCatalogClient();

async function createCustomEntry() {
  // Create a custom entry within an entry group.

  /**
   * TODO(developer): Uncomment the following lines before running the sample.
   */
  // const projectId = 'my_project';
  // const entryGroupId = 'my_entry_group';
  // const entryId =  'my_entry';
  // const tagTemplateId = 'my_tag_template';

  // Currently, Data Catalog stores metadata in the us-central1 region.
  const location = 'us-central1';

  // Delete any pre-existing Entry with the same name
  // that will be used to create the new Entry.
  try {
    const entryName = datacatalog.entryPath(
      projectId,
      location,
      entryGroupId,
      entryId
    );
    await datacatalog.deleteEntry({name: entryName});
    console.log(`Deleted Entry: ${entryName}`);
  } catch (err) {
    console.log('Entry does not exist.');
  }

  // Delete any pre-existing Entry Group with the same name
  // that will be used to construct the new EntryGroup.
  try {
    const entryGroupName = datacatalog.entryGroupPath(
      projectId,
      location,
      entryGroupId
    );
    await datacatalog.deleteEntryGroup({name: entryGroupName});
    console.log(`Deleted Entry Group: ${entryGroupName}`);
  } catch (err) {
    console.log('Entry Group does not exist.');
  }

  // Delete any pre-existing Template with the same name
  // that will be used to create a new Template.
  const tagTemplateName = datacatalog.tagTemplatePath(
    projectId,
    location,
    tagTemplateId
  );

  let tagTemplateRequest = {
    name: tagTemplateName,
    force: true,
  };

  try {
    await datacatalog.deleteTagTemplate(tagTemplateRequest);
    console.log(`Deleted template: ${tagTemplateName}`);
  } catch (error) {
    console.log(`Cannot delete template: ${tagTemplateName}`);
  }

  // Construct the EntryGroup for the EntryGroup request.
  const entryGroup = {
    displayName: 'My awesome Entry Group',
    description: 'This Entry Group represents an external system',
  };

  // Construct the EntryGroup request to be sent by the client.
  const entryGroupRequest = {
    parent: datacatalog.locationPath(projectId, location),
    entryGroupId: entryGroupId,
    entryGroup: entryGroup,
  };

  // Use the client to send the API request.
  const [createdEntryGroup] =
    await datacatalog.createEntryGroup(entryGroupRequest);
  console.log(`Created entry group: ${createdEntryGroup.name}`);

  // Construct the Entry for the Entry request.
  const entry = {
    userSpecifiedSystem: 'onprem_data_system',
    userSpecifiedType: 'onprem_data_asset',
    displayName: 'My awesome data asset',
    description: 'This data asset is managed by an external system.',
    linkedResource: '//my-onprem-server.com/dataAssets/my-awesome-data-asset',
    schema: {
      columns: [
        {
          column: 'first_column',
          description: 'This columns consists of ....',
          mode: 'NULLABLE',
          type: 'STRING',
        },
        {
          column: 'second_column',
          description: 'This columns consists of ....',
          mode: 'NULLABLE',
          type: 'DOUBLE',
        },
      ],
    },
  };

  // Construct the Entry request to be sent by the client.
  const entryRequest = {
    parent: datacatalog.entryGroupPath(projectId, location, entryGroupId),
    entryId: entryId,
    entry: entry,
  };

  // Use the client to send the API request.
  const [createdEntry] = await datacatalog.createEntry(entryRequest);
  console.log(`Created entry: ${createdEntry.name}`);

  // Create a Tag Template.
  // For more field types, including ENUM, please refer to
  // https://cloud.google.com/data-catalog/docs/quickstarts/quickstart-search-tag#data-catalog-quickstart-nodejs.
  const fieldSource = {
    displayName: 'Source of data asset',
    type: {
      primitiveType: 'STRING',
    },
  };

  const tagTemplate = {
    displayName: 'Demo Tag Template',
    fields: {
      source: fieldSource,
    },
  };

  tagTemplateRequest = {
    parent: datacatalog.locationPath(projectId, location),
    tagTemplateId: tagTemplateId,
    tagTemplate: tagTemplate,
  };

  // Use the client to send the API request.
  const [createdTagTemplate] =
    await datacatalog.createTagTemplate(tagTemplateRequest);
  console.log(`Created template: ${createdTagTemplate.name}`);

  // Attach a Tag to the custom Entry.
  const tag = {
    template: createdTagTemplate.name,
    fields: {
      source: {
        stringValue: 'On-premises system name',
      },
    },
  };

  const tagRequest = {
    parent: createdEntry.name,
    tag: tag,
  };

  // Use the client to send the API request.
  const [createdTag] = await datacatalog.createTag(tagRequest);
  console.log(`Created tag: ${createdTag.name}`);
}
createCustomEntry();

Python

在试用此示例之前,请按照 Data Catalog 快速入门:使用客户端库中的 Python 设置说明进行操作。 如需了解详情,请参阅 Data Catalog Python API 参考文档

如需向 Data Catalog 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

# Import required modules.
from google.cloud import datacatalog_v1

# Google Cloud Platform project.
project_id = "my-project"
# Entry Group to be created.
entry_group_id = "my_new_entry_group_id"
# Entry to be created.
entry_id = "my_new_entry_id"
# Currently, Data Catalog stores metadata in the us-central1 region.
location = "us-central1"

datacatalog = datacatalog_v1.DataCatalogClient()

# Create an Entry Group.
entry_group_obj = datacatalog_v1.types.EntryGroup()
entry_group_obj.display_name = "My awesome Entry Group"
entry_group_obj.description = "This Entry Group represents an external system"

entry_group = datacatalog.create_entry_group(
    parent=datacatalog_v1.DataCatalogClient.common_location_path(
        project_id, location
    ),
    entry_group_id=entry_group_id,
    entry_group=entry_group_obj,
)
entry_group_name = entry_group.name
print("Created entry group: {}".format(entry_group_name))

# Create an Entry.
entry = datacatalog_v1.types.Entry()
entry.user_specified_system = "onprem_data_system"
entry.user_specified_type = "onprem_data_asset"
entry.display_name = "My awesome data asset"
entry.description = "This data asset is managed by an external system."
entry.linked_resource = "//my-onprem-server.com/dataAssets/my-awesome-data-asset"

# Create the Schema, this is optional.
entry.schema.columns.append(
    datacatalog_v1.types.ColumnSchema(
        column="first_column",
        type_="STRING",
        description="This columns consists of ....",
        mode=None,
    )
)

entry.schema.columns.append(
    datacatalog_v1.types.ColumnSchema(
        column="second_column",
        type_="DOUBLE",
        description="This columns consists of ....",
        mode=None,
    )
)

entry = datacatalog.create_entry(
    parent=entry_group_name, entry_id=entry_id, entry=entry
)
print("Created entry: {}".format(entry.name))

REST 和命令行

REST

请参阅以下示例,并参阅 DataGroup REST API entryGroups.createentryGroups.entries.create 文档。

1.创建一个条目组

在使用任何请求数据之前,请先进行以下替换:

  • project-id:您的 Google Cloud 项目 ID
  • entryGroupId:ID 必须以字母或下划线开头,只能包含英文字母、数字和下划线,长度不超过 64 个字符。
  • displayName:条目组的文本名称。

HTTP 方法和网址:

POST https://datacatalog.googleapis.com/v1/projects/project-id/locations/region/entryGroups?entryGroupId=entryGroupId

请求 JSON 正文:

{
  "displayName": "Entry Group display name"
}

如需发送您的请求,请展开以下选项之一:

您应该收到类似以下内容的 JSON 响应:

{
  "name": "projects/my_projectid/locations/us-central1/entryGroups/my_entry_group",
  "displayName": "Entry Group display name",
  "dataCatalogTimestamps": {
    "createTime": "2019-10-19T16:35:50.135Z",
    "updateTime": "2019-10-19T16:35:50.135Z"
  }
}

2.在条目组中创建一个自定义条目

在使用任何请求数据之前,请先进行以下替换:

  • project_id:您的 Google Cloud 项目 ID。
  • entryGroupId:现有 entryGroup 的 ID。此条目将在此 EntryGroup 中创建。
  • entryId:新条目的 ID。ID 必须以字母或下划线开头,只能包含英文字母、数字和下划线,长度不超过 64 个字符。
  • description:可选的条目说明
  • displayName:条目的可选文本名称。
  • userSpecifiedType:自定义类型名称。类型名称必须以字母或下划线开头,只能包含字母、数字和下划线,并且最多只能包含 64 个字符。
  • userSpecifiedSystem:该条目的非 Google Cloud 源系统,未与 Data Catalog 集成。源系统名称必须以字母或下划线开头,只能包含字母、数字和下划线,并且最多只能包含 64 个字符。
  • linkedResource:该条目引用的资源的可选完整名称。
  • schema:可选数据架构。

    JSON 架构示例
    { ...
      "schema": {
        "columns": [
          {
            "column": "first_name",
            "description": "First name",
            "mode": "REQUIRED",
            "type": "STRING"
          },
          {
            "column": "last_name",
            "description": "Last name",
            "mode": "REQUIRED",
            "type": "STRING"
          },
          {
            "column": "address",
            "description": "Address",
            "mode": "REPEATED",
            "subcolumns": [
              {
                "column": "city",
                "description": "City",
                "mode": "NULLABLE",
                "type": "STRING"
              },
              {
                "column": "state",
                "description": "State",
                "mode": "NULLABLE",
                "type": "STRING"
              }
            ],
            "type": "RECORD"
          }
        ]
      }
    ...
    }
    

HTTP 方法和网址:

POST https://datacatalog.googleapis.com/v1/projects/project_id/locations/region/entryGroups/entryGroupId/entries?entryId=entryId

请求 JSON 正文:

{
  "description": "Description",
  "displayName": "Display name",
  "userSpecifiedType": "my_type",
  "userSpecifiedSystem": "my_system",
  "linkedResource": "example.com/def",
  "schema": { schema }
}

如需发送您的请求,请展开以下选项之一:

您应该收到类似以下内容的 JSON 响应:

{
  "name": "projects/my_project_id/locations/us-central1/entryGroups/my_entryGroup_id/entries/my_entry_id",
  "userSpecifiedType": "my-type",
  "userSpecifiedSystem": "my_system",
  "displayName": "On-prem entry",
  "description": "My entry description.",
  "schema": {
    "columns": [
      {
        "type": "STRING",
        "description": "First name",
        "mode": "REQUIRED",
        "column": "first_name"
      },
      {
        "type": "STRING",
        "description": "Last name",
        "mode": "REQUIRED",
        "column": "last_name"
      },
      {
        "type": "RECORD",
        "description": "Address",
        "mode": "REPEATED",
        "column": "address",
        "subcolumns": [
          {
            "type": "STRING",
            "description": "City",
            "mode": "NULLABLE",
            "column": "city"
          },
          {
            "type": "STRING",
            "description": "State",
            "mode": "NULLABLE",
            "column": "state"
          }
        ]
      }
    ]
  },
  "sourceSystemTimestamps": {
    "createTime": "2019-10-23T23:11:26.326Z",
    "updateTime": "2019-10-23T23:11:26.326Z"
  },
"linkedResource": "example.com/def"
}