--- /dev/null
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.\r
+.. http://creativecommons.org/licenses/by/4.0\r
+\r
+.. _data-formats:\r
+\r
+Data Formats\r
+============\r
+\r
+| Because the DCAE designer composes your component with others at\r
+ service design time, in most cases you do not know what specific\r
+ component(s) your component will send data to during runtime. Thus, it\r
+ is vital that DCAE has a language of describing the data passed\r
+ between components, so that it is known which components are\r
+ composable with others. Data formats are descriptions of data—they are\r
+ the data contract between your component and other components. You\r
+ need to describe the available outputs and assumed inputs of your\r
+ components as data formats. These data descriptions are onboarded into\r
+ ASDC, and each receives a UUID. If component X outputs data format\r
+ DF-Y, and another component Z specifies DF-Y as their input data\r
+ format, then X is said to be *composable* with that component. The\r
+ data formats are referenced in the component specifications by the\r
+ data format’s id and version.\r
+| The vision is to have a repository of shared data formats that\r
+ developers and teams can re-use and also provide them the means to\r
+ extend and create new custom data formats.\r
+\r
+.. _dataformat_metadata:\r
+\r
+Meta Schema Definition\r
+----------------------\r
+\r
+The “Meta Schema” implementation defines how data format JSON schemas\r
+can be written to define user input. It is itself a JSON schema (thus it\r
+is a “meta schema”). It requires the name of the data format entry, the\r
+data format entry version and allows a description under “self” object.\r
+The meta schema version must be specified as the value of the\r
+“dataformatversion” key. Then the input schema itself is described.\r
+There are four types of schema descriptions objects - jsonschema for\r
+inline standard JSON Schema definitions of JSON inputs, delimitedschema\r
+for delimited data input using a defined JSON description, unstructured\r
+for unstructured text, and reference that allows a pointer to another\r
+artifact for a schema. The reference allows for XML schema, but can be\r
+used as a pointer to JSON, Delimited Format, and Unstructured schemas as\r
+well.\r
+\r
+The current Meta Schema implementation is defined below:\r
+\r
+::\r
+\r
+ {\r
+ "$schema": "http://json-schema.org/draft-04/schema#",\r
+ "title": "Data format specification schema Version 1.0",\r
+ "type": "object",\r
+ "oneOf": [{\r
+ "properties": {\r
+ "self": {\r
+ "$ref": "#/definitions/self"\r
+ },\r
+ "dataformatversion": {\r
+ "$ref": "#/definitions/dataformatversion"\r
+ },\r
+ "reference": {\r
+ \r
+ "type": "object",\r
+ "description": "A reference to an external schema - name/version is used to access the artifact",\r
+ "properties": {\r
+ "name": {\r
+ "$ref": "#/definitions/name"\r
+ },\r
+ "version": {\r
+ "$ref": "#/definitions/version"\r
+ },\r
+ "format": {\r
+ "$ref": "#/definitions/format"\r
+ }\r
+ },\r
+ "required": [\r
+ "name",\r
+ "version",\r
+ "format"\r
+ ],\r
+ "additionalProperties": false\r
+ }\r
+ },\r
+ "required": ["self", "dataformatversion", "reference"],\r
+ "additionalProperties": false\r
+ }, {\r
+ "properties": {\r
+ "self": {\r
+ "$ref": "#/definitions/self"\r
+ },\r
+ "dataformatversion": {\r
+ "$ref": "#/definitions/dataformatversion"\r
+ },\r
+ "jsonschema": {\r
+ "$schema": "http://json-schema.org/draft-04/schema#",\r
+ "description": "The actual JSON schema for this data format"\r
+ }\r
+ \r
+ },\r
+ "required": ["self", "dataformatversion", "jsonschema"],\r
+ "additionalProperties": false\r
+ }, {\r
+ "properties": {\r
+ "self": {\r
+ "$ref": "#/definitions/self"\r
+ },\r
+ "dataformatversion": {\r
+ "$ref": "#/definitions/dataformatversion"\r
+ },\r
+ "delimitedschema": {\r
+ "type": "object",\r
+ "description": "A JSON schema for delimited files",\r
+ "properties": {\r
+ "delimiter": {\r
+ "enum": [",", "|", "\t", ";"]\r
+ },\r
+ "fields": {\r
+ "type": "array",\r
+ "description": "Array of field descriptions",\r
+ "items": {\r
+ "$ref": "#/definitions/field"\r
+ }\r
+ }\r
+ },\r
+ "additionalProperties": false\r
+ }\r
+ },\r
+ "required": ["self", "dataformatversion", "delimitedschema"],\r
+ "additionalProperties": false\r
+ }, {\r
+ "properties": {\r
+ "self": {\r
+ "$ref": "#/definitions/self"\r
+ },\r
+ "dataformatversion": {\r
+ "$ref": "#/definitions/dataformatversion"\r
+ },\r
+ "unstructured": {\r
+ "type": "object",\r
+ "description": "A JSON schema for unstructured text",\r
+ "properties": {\r
+ "encoding": {\r
+ "type": "string",\r
+ "enum": ["ASCII", "UTF-8", "UTF-16", "UTF-32"]\r
+ }\r
+ },\r
+ "additionalProperties": false\r
+ \r
+ }\r
+ },\r
+ "required": ["self", "dataformatversion", "unstructured"],\r
+ "additionalProperties": false\r
+ }],\r
+ "definitions": {\r
+ "name": {\r
+ "type": "string"\r
+ },\r
+ "version": {\r
+ "type": "string",\r
+ "pattern": "^(\\d+\\.)(\\d+\\.)(\\*|\\d+)$"\r
+ },\r
+ "self": {\r
+ "description": "Identifying Information for the Data Format - name/version can be used to access the artifact",\r
+ "type": "object",\r
+ "properties": {\r
+ "name": {\r
+ "$ref": "#/definitions/name"\r
+ },\r
+ "version": {\r
+ "$ref": "#/definitions/version"\r
+ },\r
+ "description": {\r
+ "type": "string"\r
+ }\r
+ },\r
+ "required": [\r
+ "name",\r
+ "version"\r
+ ],\r
+ "additionalProperties": false\r
+ },\r
+ "format": {\r
+ "description": "Reference schema type",\r
+ "type": "string",\r
+ "enum": [\r
+ "JSON",\r
+ "Delimited Format",\r
+ "XML",\r
+ "Unstructured"\r
+ ]\r
+ },\r
+ "field": {\r
+ "description": "A field definition for the delimited schema",\r
+ "type": "object",\r
+ "properties": {\r
+ "name": {\r
+ "type": "string"\r
+ },\r
+ "description": {\r
+ "type": "string"\r
+ },\r
+ "fieldtype": {\r
+ "description": "the field type - from the XML schema types",\r
+ "type": "string",\r
+ "enum": ["string", "boolean",\r
+ "decimal", "float", "double",\r
+ "duration", "dateTime", "time",\r
+ "date", "gYearMonth", "gYear",\r
+ "gMonthDay", "gDay", "gMonth",\r
+ "hexBinary", "base64Binary",\r
+ "anyURI", "QName", "NOTATION",\r
+ "normalizedString", "token",\r
+ "language", "IDREFS", "ENTITIES",\r
+ "NMTOKEN", "NMTOKENS", "Name",\r
+ "NCName", "ID", "IDREF", "ENTITY",\r
+ "integer", "nonPositiveInteger",\r
+ "negativeInteger", "long", "int",\r
+ "short", "byte",\r
+ "nonNegativeInteger", "unsignedLong",\r
+ "unsignedInt", "unsignedShort",\r
+ "unsignedByte", "positiveInteger"\r
+ \r
+ ]\r
+ },\r
+ "fieldPattern": {\r
+ "description": "Regular expression that defines the field format",\r
+ "type": "integer"\r
+ },\r
+ "fieldMaxLength": {\r
+ "description": "The maximum length of the field",\r
+ "type": "integer"\r
+ },\r
+ "fieldMinLength": {\r
+ "description": "The minimum length of the field",\r
+ "type": "integer"\r
+ },\r
+ "fieldMinimum": {\r
+ "description": "The minimum numeric value of the field",\r
+ "type": "integer"\r
+ },\r
+ "fieldMaximum": {\r
+ "description": "The maximum numeric value of the field",\r
+ "type": "integer"\r
+ }\r
+ },\r
+ "additionalProperties": false\r
+ },\r
+ "dataformatversion": {\r
+ "type": "string",\r
+ "enum": ["1.0.0"]\r
+ }\r
+ }\r
+ }\r
+\r
+Examples\r
+-----------\r
+\r
+By reference example - Common Event Format \r
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r
+\r
+First the full JSON schema description of the Common Event Format would\r
+be loaded with a name of “Common Event Format” and the current version\r
+of “25.0.0”.\r
+\r
+Then the data format description is loaded by this schema:\r
+\r
+::\r
+\r
+ {\r
+ "self": {\r
+ "name": "Common Event Format Definition",\r
+ "version": "25.0.0",\r
+ "description": "Common Event Format Definition"\r
+ \r
+ },\r
+ "dataformatversion": "1.0.0",\r
+ "reference": {\r
+ "name": "Common Event Format",\r
+ "format": "JSON",\r
+ "version": "25.0.0"\r
+ }\r
+ }\r
+\r
+\r
+\r
+Simple JSON Example\r
+~~~~~~~~~~~~~~~~~~~~~~~~\r
+\r
+\r
+::\r
+\r
+ {\r
+ "self": {\r
+ "name": "Simple JSON Example",\r
+ "version": "1.0.0",\r
+ "description": "An example of unnested JSON schema for Input and output"\r
+ \r
+ },\r
+ "dataformatversion": "1.0.0",\r
+ "jsonschema": {\r
+ "$schema": "http://json-schema.org/draft-04/schema#",\r
+ "type": "object",\r
+ "properties": {\r
+ "raw-text": {\r
+ "type": "string"\r
+ }\r
+ },\r
+ "required": ["raw-text"],\r
+ "additionalProperties": false\r
+ }\r
+ }\r
+\r
+Nested JSON Example\r
+~~~~~~~~~~~~~~~~~~~~~~~~\r
+\r
+::\r
+\r
+ {\r
+ "self": {\r
+ "name": "Nested JSON Example",\r
+ "version": "1.0.0",\r
+ "description": "An example of nested JSON schema for Input and output"\r
+ \r
+ },\r
+ "dataformatversion": "1.0.0",\r
+ "jsonschema": {\r
+ "$schema": "http://json-schema.org/draft-04/schema#",\r
+ "properties": {\r
+ "numFound": {\r
+ "type": "integer"\r
+ },\r
+ "start": {\r
+ "type": "integer"\r
+ },\r
+ "engagements": {\r
+ "type": "array",\r
+ "items": {\r
+ "properties": {\r
+ "engagementID": {\r
+ "type": "string",\r
+ "transcript": {\r
+ "type": "array",\r
+ "items": {\r
+ "type": {\r
+ "type": "string"\r
+ },\r
+ "content": {\r
+ "type": "string"\r
+ },\r
+ "senderName": {\r
+ "type": "string"\r
+ },\r
+ "iso": {\r
+ "type": "string"\r
+ },\r
+ "timestamp": {\r
+ "type": "integer"\r
+ },\r
+ "senderId": {\r
+ "type": "string"\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
+ },\r
+ "additionalProperties": false\r
+ }\r
+ }\r
+\r
+Unstructured Example\r
+~~~~~~~~~~~~~~~~~~~~~~~~~\r
+\r
+::\r
+\r
+ {\r
+ "self": {\r
+ "name": "Unstructured Text Example",\r
+ "version": "25.0.0",\r
+ "description": "An example of a unstructured text used for both input and output for "\r
+ \r
+ },\r
+ "dataformatversion": "1.0.0",\r
+ "unstructured": {\r
+ "encoding": "UTF-8"\r
+ }\r
+ }\r
+\r
+\r
+An example of a delimited schema\r
+--------------------------------\r
+\r
+::\r
+\r
+ {\r
+ "self": {\r
+ "name": "Delimited Format Example",\r
+ "version": "1.0.0",\r
+ "description": "Delimited format example just for testing"\r
+\r
+ },\r
+ "dataformatversion": "1.0.0",\r
+ "delimitedschema": {\r
+ "delimiter": "|",\r
+ "fields": [{\r
+ "name": "field1",\r
+ "description": "test field1",\r
+ "fieldtype": "string"\r
+ }, {\r
+ "name": "field2",\r
+ "description": "test field2",\r
+ "fieldtype": "boolean"\r
+ }]\r
+ }\r
+ }\r