DCAE Controller documentation DCAEGEN2-213
[dcaegen2.git] / docs / sections / components / data-formats.rst
1 .. This work is licensed under a Creative Commons Attribution 4.0 International License.\r
2 .. http://creativecommons.org/licenses/by/4.0\r
3 \r
4 .. _data-formats:\r
5 \r
6 Data Formats\r
7 ============\r
8 \r
9 | Because the DCAE designer composes your component with others at\r
10   service design time, in most cases you do not know what specific\r
11   component(s) your component will send data to during runtime. Thus, it\r
12   is vital that DCAE has a language of describing the data passed\r
13   between components, so that it is known which components are\r
14   composable with others. Data formats are descriptions of data—they are\r
15   the data contract between your component and other components. You\r
16   need to describe the available outputs and assumed inputs of your\r
17   components as data formats. These data descriptions are onboarded into\r
18   ASDC, and each receives a UUID. If component X outputs data format\r
19   DF-Y, and another component Z specifies DF-Y as their input data\r
20   format, then X is said to be *composable* with that component. The\r
21   data formats are referenced in the component specifications by the\r
22   data format’s id and version.\r
23 | The vision is to have a repository of shared data formats that\r
24   developers and teams can re-use and also provide them the means to\r
25   extend and create new custom data formats.\r
26 \r
27 .. _dataformat_metadata:\r
28 \r
29 Meta Schema Definition\r
30 ----------------------\r
31 \r
32 The “Meta Schema” implementation defines how data format JSON schemas\r
33 can be written to define user input. It is itself a JSON schema (thus it\r
34 is a “meta schema”). It requires the name of the data format entry, the\r
35 data format entry version and allows a description under “self” object.\r
36 The meta schema version must be specified as the value of the\r
37 “dataformatversion” key. Then the input schema itself is described.\r
38 There are four types of schema descriptions objects - jsonschema for\r
39 inline standard JSON Schema definitions of JSON inputs, delimitedschema\r
40 for delimited data input using a defined JSON description, unstructured\r
41 for unstructured text, and reference that allows a pointer to another\r
42 artifact for a schema. The reference allows for XML schema, but can be\r
43 used as a pointer to JSON, Delimited Format, and Unstructured schemas as\r
44 well.\r
45 \r
46 The current Meta Schema implementation is defined below:\r
47 \r
48 ::\r
49 \r
50     {\r
51         "$schema": "http://json-schema.org/draft-04/schema#",\r
52         "title": "Data format specification schema Version 1.0",\r
53         "type": "object",\r
54         "oneOf": [{\r
55             "properties": {\r
56                 "self": {\r
57                     "$ref": "#/definitions/self"\r
58                 },\r
59                 "dataformatversion": {\r
60                     "$ref": "#/definitions/dataformatversion"\r
61                 },\r
62                 "reference": {\r
63     \r
64                     "type": "object",\r
65                     "description": "A reference to an external schema - name/version is used to access the artifact",\r
66                     "properties": {\r
67                         "name": {\r
68                             "$ref": "#/definitions/name"\r
69                         },\r
70                         "version": {\r
71                             "$ref": "#/definitions/version"\r
72                         },\r
73                         "format": {\r
74                             "$ref": "#/definitions/format"\r
75                         }\r
76                     },\r
77                     "required": [\r
78                         "name",\r
79                         "version",\r
80                         "format"\r
81                     ],\r
82                     "additionalProperties": false\r
83                 }\r
84             },\r
85             "required": ["self", "dataformatversion", "reference"],\r
86             "additionalProperties": false\r
87         }, {\r
88             "properties": {\r
89                 "self": {\r
90                     "$ref": "#/definitions/self"\r
91                 },\r
92                 "dataformatversion": {\r
93                     "$ref": "#/definitions/dataformatversion"\r
94                 },\r
95                 "jsonschema": {\r
96                     "$schema": "http://json-schema.org/draft-04/schema#",\r
97                     "description": "The actual JSON schema for this data format"\r
98                 }\r
99     \r
100             },\r
101             "required": ["self", "dataformatversion", "jsonschema"],\r
102             "additionalProperties": false\r
103         }, {\r
104             "properties": {\r
105                 "self": {\r
106                     "$ref": "#/definitions/self"\r
107                 },\r
108                 "dataformatversion": {\r
109                     "$ref": "#/definitions/dataformatversion"\r
110                 },\r
111                 "delimitedschema": {\r
112                     "type": "object",\r
113                     "description": "A JSON schema for delimited files",\r
114                     "properties": {\r
115                         "delimiter": {\r
116                             "enum": [",", "|", "\t", ";"]\r
117                         },\r
118                         "fields": {\r
119                             "type": "array",\r
120                             "description": "Array of field descriptions",\r
121                             "items": {\r
122                                 "$ref": "#/definitions/field"\r
123                             }\r
124                         }\r
125                     },\r
126                     "additionalProperties": false\r
127                 }\r
128             },\r
129             "required": ["self", "dataformatversion", "delimitedschema"],\r
130             "additionalProperties": false\r
131         }, {\r
132             "properties": {\r
133                 "self": {\r
134                     "$ref": "#/definitions/self"\r
135                 },\r
136                 "dataformatversion": {\r
137                     "$ref": "#/definitions/dataformatversion"\r
138                 },\r
139                 "unstructured": {\r
140                     "type": "object",\r
141                     "description": "A JSON schema for unstructured text",\r
142                     "properties": {\r
143                         "encoding": {\r
144                             "type": "string",\r
145                             "enum": ["ASCII", "UTF-8", "UTF-16", "UTF-32"]\r
146                         }\r
147                     },\r
148                     "additionalProperties": false\r
149     \r
150                 }\r
151             },\r
152             "required": ["self", "dataformatversion", "unstructured"],\r
153             "additionalProperties": false\r
154         }],\r
155         "definitions": {\r
156             "name": {\r
157                 "type": "string"\r
158             },\r
159             "version": {\r
160                 "type": "string",\r
161                 "pattern": "^(\\d+\\.)(\\d+\\.)(\\*|\\d+)$"\r
162             },\r
163             "self": {\r
164                 "description": "Identifying Information for the Data Format - name/version can be used to access the artifact",\r
165                 "type": "object",\r
166                 "properties": {\r
167                     "name": {\r
168                         "$ref": "#/definitions/name"\r
169                     },\r
170                     "version": {\r
171                         "$ref": "#/definitions/version"\r
172                     },\r
173                     "description": {\r
174                         "type": "string"\r
175                     }\r
176                 },\r
177                 "required": [\r
178                     "name",\r
179                     "version"\r
180                 ],\r
181                 "additionalProperties": false\r
182             },\r
183             "format": {\r
184                 "description": "Reference schema type",\r
185                 "type": "string",\r
186                 "enum": [\r
187                     "JSON",\r
188                     "Delimited Format",\r
189                     "XML",\r
190                     "Unstructured"\r
191                 ]\r
192             },\r
193             "field": {\r
194                 "description": "A field definition for the delimited schema",\r
195                 "type": "object",\r
196                 "properties": {\r
197                     "name": {\r
198                         "type": "string"\r
199                     },\r
200                     "description": {\r
201                         "type": "string"\r
202                     },\r
203                     "fieldtype": {\r
204                         "description": "the field type - from the XML schema types",\r
205                         "type": "string",\r
206                         "enum": ["string", "boolean",\r
207                             "decimal", "float", "double",\r
208                             "duration", "dateTime", "time",\r
209                             "date", "gYearMonth", "gYear",\r
210                             "gMonthDay", "gDay", "gMonth",\r
211                             "hexBinary", "base64Binary",\r
212                             "anyURI", "QName", "NOTATION",\r
213                             "normalizedString", "token",\r
214                             "language", "IDREFS", "ENTITIES",\r
215                             "NMTOKEN", "NMTOKENS", "Name",\r
216                             "NCName", "ID", "IDREF", "ENTITY",\r
217                             "integer", "nonPositiveInteger",\r
218                             "negativeInteger", "long", "int",\r
219                             "short", "byte",\r
220                             "nonNegativeInteger", "unsignedLong",\r
221                             "unsignedInt", "unsignedShort",\r
222                             "unsignedByte", "positiveInteger"\r
223     \r
224                         ]\r
225                     },\r
226                     "fieldPattern": {\r
227                         "description": "Regular expression that defines the field format",\r
228                         "type": "integer"\r
229                     },\r
230                     "fieldMaxLength": {\r
231                         "description": "The maximum length of the field",\r
232                         "type": "integer"\r
233                     },\r
234                     "fieldMinLength": {\r
235                         "description": "The minimum length of the field",\r
236                         "type": "integer"\r
237                     },\r
238                     "fieldMinimum": {\r
239                         "description": "The minimum numeric value of the field",\r
240                         "type": "integer"\r
241                     },\r
242                     "fieldMaximum": {\r
243                         "description": "The maximum numeric value of the field",\r
244                         "type": "integer"\r
245                     }\r
246                 },\r
247                 "additionalProperties": false\r
248             },\r
249             "dataformatversion": {\r
250                 "type": "string",\r
251                 "enum": ["1.0.0"]\r
252             }\r
253         }\r
254     }\r
255 \r
256 Examples\r
257 -----------\r
258 \r
259 By reference example - Common Event Format \r
260 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\r
261 \r
262 First the full JSON schema description of the Common Event Format would\r
263 be loaded with a name of “Common Event Format” and the current version\r
264 of “25.0.0”.\r
265 \r
266 Then the data format description is loaded by this schema:\r
267 \r
268 ::\r
269 \r
270     {\r
271         "self": {\r
272             "name": "Common Event Format Definition",\r
273             "version": "25.0.0",\r
274             "description": "Common Event Format Definition"\r
275      \r
276         },\r
277         "dataformatversion": "1.0.0",\r
278         "reference": {\r
279             "name": "Common Event Format",\r
280             "format": "JSON",\r
281             "version": "25.0.0"\r
282        }\r
283     }\r
284 \r
285 \r
286 \r
287 Simple JSON Example\r
288 ~~~~~~~~~~~~~~~~~~~~~~~~\r
289 \r
290 \r
291 ::\r
292 \r
293     {\r
294         "self": {\r
295             "name": "Simple JSON Example",\r
296             "version": "1.0.0",\r
297             "description": "An example of unnested JSON schema for Input and output"\r
298     \r
299         },\r
300         "dataformatversion": "1.0.0",\r
301         "jsonschema": {\r
302             "$schema": "http://json-schema.org/draft-04/schema#",\r
303             "type": "object",\r
304             "properties": {\r
305                 "raw-text": {\r
306                     "type": "string"\r
307                 }\r
308             },\r
309             "required": ["raw-text"],\r
310             "additionalProperties": false\r
311         }\r
312     }\r
313 \r
314 Nested JSON Example\r
315 ~~~~~~~~~~~~~~~~~~~~~~~~\r
316 \r
317 ::\r
318 \r
319     {\r
320         "self": {\r
321             "name": "Nested JSON Example",\r
322             "version": "1.0.0",\r
323             "description": "An example of nested JSON schema for Input and output"\r
324     \r
325         },\r
326         "dataformatversion": "1.0.0",\r
327         "jsonschema": {\r
328             "$schema": "http://json-schema.org/draft-04/schema#",\r
329             "properties": {\r
330                 "numFound": {\r
331                     "type": "integer"\r
332                 },\r
333                 "start": {\r
334                     "type": "integer"\r
335                 },\r
336                 "engagements": {\r
337                     "type": "array",\r
338                     "items": {\r
339                         "properties": {\r
340                             "engagementID": {\r
341                                 "type": "string",\r
342                                 "transcript": {\r
343                                     "type": "array",\r
344                                     "items": {\r
345                                         "type": {\r
346                                             "type": "string"\r
347                                         },\r
348                                         "content": {\r
349                                             "type": "string"\r
350                                         },\r
351                                         "senderName": {\r
352                                             "type": "string"\r
353                                         },\r
354                                         "iso": {\r
355                                             "type": "string"\r
356                                         },\r
357                                         "timestamp": {\r
358                                             "type": "integer"\r
359                                         },\r
360                                         "senderId": {\r
361                                             "type": "string"\r
362                                         }\r
363                                     }\r
364                                 }\r
365                             }\r
366                         }\r
367                     }\r
368                 }\r
369             },\r
370             "additionalProperties": false\r
371         }\r
372     }\r
373 \r
374 Unstructured Example\r
375 ~~~~~~~~~~~~~~~~~~~~~~~~~\r
376 \r
377 ::\r
378 \r
379     {\r
380         "self": {\r
381             "name": "Unstructured Text Example",\r
382             "version": "25.0.0",\r
383             "description": "An example of a unstructured text used for both input and output for "\r
384     \r
385         },\r
386         "dataformatversion": "1.0.0",\r
387         "unstructured": {\r
388             "encoding": "UTF-8"\r
389         }\r
390     }\r
391 \r
392 \r
393 An example of a delimited schema\r
394 --------------------------------\r
395 \r
396 ::\r
397 \r
398     {\r
399         "self": {\r
400             "name": "Delimited Format Example",\r
401             "version": "1.0.0",\r
402             "description": "Delimited format example just for testing"\r
403 \r
404         },\r
405         "dataformatversion": "1.0.0",\r
406         "delimitedschema": {\r
407             "delimiter": "|",\r
408             "fields": [{\r
409                 "name": "field1",\r
410                 "description": "test field1",\r
411                 "fieldtype": "string"\r
412             }, {\r
413                 "name": "field2",\r
414                 "description": "test field2",\r
415                 "fieldtype": "boolean"\r
416             }]\r
417         }\r
418     }\r