Merge "DCAE Controller documentation DCAEGEN2-213"
[dcaegen2.git] / docs / sections / components / data-formats.rst
diff --git a/docs/sections/components/data-formats.rst b/docs/sections/components/data-formats.rst
new file mode 100755 (executable)
index 0000000..ecb019c
--- /dev/null
@@ -0,0 +1,418 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.\r
+.. http://creativecommons.org/licenses/by/4.0\r
+\r
+.. _data-formats:\r
+\r
+Data Formats\r
+============\r
+\r
+| Because the DCAE designer composes your component with others at\r
+  service design time, in most cases you do not know what specific\r
+  component(s) your component will send data to during runtime. Thus, it\r
+  is vital that DCAE has a language of describing the data passed\r
+  between components, so that it is known which components are\r
+  composable with others. Data formats are descriptions of data—they are\r
+  the data contract between your component and other components. You\r
+  need to describe the available outputs and assumed inputs of your\r
+  components as data formats. These data descriptions are onboarded into\r
+  ASDC, and each receives a UUID. If component X outputs data format\r
+  DF-Y, and another component Z specifies DF-Y as their input data\r
+  format, then X is said to be *composable* with that component. The\r
+  data formats are referenced in the component specifications by the\r
+  data format’s id and version.\r
+| The vision is to have a repository of shared data formats that\r
+  developers and teams can re-use and also provide them the means to\r
+  extend and create new custom data formats.\r
+\r
+.. _dataformat_metadata:\r
+\r
+Meta Schema Definition\r
+----------------------\r
+\r
+The “Meta Schema” implementation defines how data format JSON schemas\r
+can be written to define user input. It is itself a JSON schema (thus it\r
+is a “meta schema”). It requires the name of the data format entry, the\r
+data format entry version and allows a description under “self” object.\r
+The meta schema version must be specified as the value of the\r
+“dataformatversion” key. Then the input schema itself is described.\r
+There are four types of schema descriptions objects - jsonschema for\r
+inline standard JSON Schema definitions of JSON inputs, delimitedschema\r
+for delimited data input using a defined JSON description, unstructured\r
+for unstructured text, and reference that allows a pointer to another\r
+artifact for a schema. The reference allows for XML schema, but can be\r
+used as a pointer to JSON, Delimited Format, and Unstructured schemas as\r
+well.\r
+\r
+The current Meta Schema implementation is defined below:\r
+\r
+::\r
+\r
+    {\r
+        "$schema": "http://json-schema.org/draft-04/schema#",\r
+        "title": "Data format specification schema Version 1.0",\r
+        "type": "object",\r
+        "oneOf": [{\r
+            "properties": {\r
+                "self": {\r
+                    "$ref": "#/definitions/self"\r
+                },\r
+                "dataformatversion": {\r
+                    "$ref": "#/definitions/dataformatversion"\r
+                },\r
+                "reference": {\r
+    \r
+                    "type": "object",\r
+                    "description": "A reference to an external schema - name/version is used to access the artifact",\r
+                    "properties": {\r
+                        "name": {\r
+                            "$ref": "#/definitions/name"\r
+                        },\r
+                        "version": {\r
+                            "$ref": "#/definitions/version"\r
+                        },\r
+                        "format": {\r
+                            "$ref": "#/definitions/format"\r
+                        }\r
+                    },\r
+                    "required": [\r
+                        "name",\r
+                        "version",\r
+                        "format"\r
+                    ],\r
+                    "additionalProperties": false\r
+                }\r
+            },\r
+            "required": ["self", "dataformatversion", "reference"],\r
+            "additionalProperties": false\r
+        }, {\r
+            "properties": {\r
+                "self": {\r
+                    "$ref": "#/definitions/self"\r
+                },\r
+                "dataformatversion": {\r
+                    "$ref": "#/definitions/dataformatversion"\r
+                },\r
+                "jsonschema": {\r
+                    "$schema": "http://json-schema.org/draft-04/schema#",\r
+                    "description": "The actual JSON schema for this data format"\r
+                }\r
+    \r
+            },\r
+            "required": ["self", "dataformatversion", "jsonschema"],\r
+            "additionalProperties": false\r
+        }, {\r
+            "properties": {\r
+                "self": {\r
+                    "$ref": "#/definitions/self"\r
+                },\r
+                "dataformatversion": {\r
+                    "$ref": "#/definitions/dataformatversion"\r
+                },\r
+                "delimitedschema": {\r
+                    "type": "object",\r
+                    "description": "A JSON schema for delimited files",\r
+                    "properties": {\r
+                        "delimiter": {\r
+                            "enum": [",", "|", "\t", ";"]\r
+                        },\r
+                        "fields": {\r
+                            "type": "array",\r
+                            "description": "Array of field descriptions",\r
+                            "items": {\r
+                                "$ref": "#/definitions/field"\r
+                            }\r
+                        }\r
+                    },\r
+                    "additionalProperties": false\r
+                }\r
+            },\r
+            "required": ["self", "dataformatversion", "delimitedschema"],\r
+            "additionalProperties": false\r
+        }, {\r
+            "properties": {\r
+                "self": {\r
+                    "$ref": "#/definitions/self"\r
+                },\r
+                "dataformatversion": {\r
+                    "$ref": "#/definitions/dataformatversion"\r
+                },\r
+                "unstructured": {\r
+                    "type": "object",\r
+                    "description": "A JSON schema for unstructured text",\r
+                    "properties": {\r
+                        "encoding": {\r
+                            "type": "string",\r
+                            "enum": ["ASCII", "UTF-8", "UTF-16", "UTF-32"]\r
+                        }\r
+                    },\r
+                    "additionalProperties": false\r
+    \r
+                }\r
+            },\r
+            "required": ["self", "dataformatversion", "unstructured"],\r
+            "additionalProperties": false\r
+        }],\r
+        "definitions": {\r
+            "name": {\r
+                "type": "string"\r
+            },\r
+            "version": {\r
+                "type": "string",\r
+                "pattern": "^(\\d+\\.)(\\d+\\.)(\\*|\\d+)$"\r
+            },\r
+            "self": {\r
+                "description": "Identifying Information for the Data Format - name/version can be used to access the artifact",\r
+                "type": "object",\r
+                "properties": {\r
+                    "name": {\r
+                        "$ref": "#/definitions/name"\r
+                    },\r
+                    "version": {\r
+                        "$ref": "#/definitions/version"\r
+                    },\r
+                    "description": {\r
+                        "type": "string"\r
+                    }\r
+                },\r
+                "required": [\r
+                    "name",\r
+                    "version"\r
+                ],\r
+                "additionalProperties": false\r
+            },\r
+            "format": {\r
+                "description": "Reference schema type",\r
+                "type": "string",\r
+                "enum": [\r
+                    "JSON",\r
+                    "Delimited Format",\r
+                    "XML",\r
+                    "Unstructured"\r
+                ]\r
+            },\r
+            "field": {\r
+                "description": "A field definition for the delimited schema",\r
+                "type": "object",\r
+                "properties": {\r
+                    "name": {\r
+                        "type": "string"\r
+                    },\r
+                    "description": {\r
+                        "type": "string"\r
+                    },\r
+                    "fieldtype": {\r
+                        "description": "the field type - from the XML schema types",\r
+                        "type": "string",\r
+                        "enum": ["string", "boolean",\r
+                            "decimal", "float", "double",\r
+                            "duration", "dateTime", "time",\r
+                            "date", "gYearMonth", "gYear",\r
+                            "gMonthDay", "gDay", "gMonth",\r
+                            "hexBinary", "base64Binary",\r
+                            "anyURI", "QName", "NOTATION",\r
+                            "normalizedString", "token",\r
+                            "language", "IDREFS", "ENTITIES",\r
+                            "NMTOKEN", "NMTOKENS", "Name",\r
+                            "NCName", "ID", "IDREF", "ENTITY",\r
+                            "integer", "nonPositiveInteger",\r
+                            "negativeInteger", "long", "int",\r
+                            "short", "byte",\r
+                            "nonNegativeInteger", "unsignedLong",\r
+                            "unsignedInt", "unsignedShort",\r
+                            "unsignedByte", "positiveInteger"\r
+    \r
+                        ]\r
+                    },\r
+                    "fieldPattern": {\r
+                        "description": "Regular expression that defines the field format",\r
+                        "type": "integer"\r
+                    },\r
+                    "fieldMaxLength": {\r
+                        "description": "The maximum length of the field",\r
+                        "type": "integer"\r
+                    },\r
+                    "fieldMinLength": {\r
+                        "description": "The minimum length of the field",\r
+                        "type": "integer"\r
+                    },\r
+                    "fieldMinimum": {\r
+                        "description": "The minimum numeric value of the field",\r
+                        "type": "integer"\r
+                    },\r
+                    "fieldMaximum": {\r
+                        "description": "The maximum numeric value of the field",\r
+                        "type": "integer"\r
+                    }\r
+                },\r
+                "additionalProperties": false\r
+            },\r
+            "dataformatversion": {\r
+                "type": "string",\r
+                "enum": ["1.0.0"]\r
+            }\r
+        }\r
+    }\r
+\r
+Examples\r
+-----------\r
+\r
+By reference example - Common Event Format \r
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r
+\r
+First the full JSON schema description of the Common Event Format would\r
+be loaded with a name of “Common Event Format” and the current version\r
+of “25.0.0”.\r
+\r
+Then the data format description is loaded by this schema:\r
+\r
+::\r
+\r
+    {\r
+        "self": {\r
+            "name": "Common Event Format Definition",\r
+            "version": "25.0.0",\r
+            "description": "Common Event Format Definition"\r
+     \r
+        },\r
+        "dataformatversion": "1.0.0",\r
+        "reference": {\r
+            "name": "Common Event Format",\r
+            "format": "JSON",\r
+            "version": "25.0.0"\r
+       }\r
+    }\r
+\r
+\r
+\r
+Simple JSON Example\r
+~~~~~~~~~~~~~~~~~~~~~~~~\r
+\r
+\r
+::\r
+\r
+    {\r
+        "self": {\r
+            "name": "Simple JSON Example",\r
+            "version": "1.0.0",\r
+            "description": "An example of unnested JSON schema for Input and output"\r
+    \r
+        },\r
+        "dataformatversion": "1.0.0",\r
+        "jsonschema": {\r
+            "$schema": "http://json-schema.org/draft-04/schema#",\r
+            "type": "object",\r
+            "properties": {\r
+                "raw-text": {\r
+                    "type": "string"\r
+                }\r
+            },\r
+            "required": ["raw-text"],\r
+            "additionalProperties": false\r
+        }\r
+    }\r
+\r
+Nested JSON Example\r
+~~~~~~~~~~~~~~~~~~~~~~~~\r
+\r
+::\r
+\r
+    {\r
+        "self": {\r
+            "name": "Nested JSON Example",\r
+            "version": "1.0.0",\r
+            "description": "An example of nested JSON schema for Input and output"\r
+    \r
+        },\r
+        "dataformatversion": "1.0.0",\r
+        "jsonschema": {\r
+            "$schema": "http://json-schema.org/draft-04/schema#",\r
+            "properties": {\r
+                "numFound": {\r
+                    "type": "integer"\r
+                },\r
+                "start": {\r
+                    "type": "integer"\r
+                },\r
+                "engagements": {\r
+                    "type": "array",\r
+                    "items": {\r
+                        "properties": {\r
+                            "engagementID": {\r
+                                "type": "string",\r
+                                "transcript": {\r
+                                    "type": "array",\r
+                                    "items": {\r
+                                        "type": {\r
+                                            "type": "string"\r
+                                        },\r
+                                        "content": {\r
+                                            "type": "string"\r
+                                        },\r
+                                        "senderName": {\r
+                                            "type": "string"\r
+                                        },\r
+                                        "iso": {\r
+                                            "type": "string"\r
+                                        },\r
+                                        "timestamp": {\r
+                                            "type": "integer"\r
+                                        },\r
+                                        "senderId": {\r
+                                            "type": "string"\r
+                                        }\r
+                                    }\r
+                                }\r
+                            }\r
+                        }\r
+                    }\r
+                }\r
+            },\r
+            "additionalProperties": false\r
+        }\r
+    }\r
+\r
+Unstructured Example\r
+~~~~~~~~~~~~~~~~~~~~~~~~~\r
+\r
+::\r
+\r
+    {\r
+        "self": {\r
+            "name": "Unstructured Text Example",\r
+            "version": "25.0.0",\r
+            "description": "An example of a unstructured text used for both input and output for "\r
+    \r
+        },\r
+        "dataformatversion": "1.0.0",\r
+        "unstructured": {\r
+            "encoding": "UTF-8"\r
+        }\r
+    }\r
+\r
+\r
+An example of a delimited schema\r
+--------------------------------\r
+\r
+::\r
+\r
+    {\r
+        "self": {\r
+            "name": "Delimited Format Example",\r
+            "version": "1.0.0",\r
+            "description": "Delimited format example just for testing"\r
+\r
+        },\r
+        "dataformatversion": "1.0.0",\r
+        "delimitedschema": {\r
+            "delimiter": "|",\r
+            "fields": [{\r
+                "name": "field1",\r
+                "description": "test field1",\r
+                "fieldtype": "string"\r
+            }, {\r
+                "name": "field2",\r
+                "description": "test field2",\r
+                "fieldtype": "boolean"\r
+            }]\r
+        }\r
+    }\r