2 * copyright(c) 2005 kuwata-lab all rights reserved.
8 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.IdentityHashMap;
12 import java.util.regex.Matcher;
13 import java.util.Calendar;
14 import java.util.TimeZone;
17 * plain yaml parser class which is a parent of YamlParser class.
19 public class PlainYamlParser implements Parser {
21 private static final String ANCHOR = "anchor '";
22 private static final String ENDFLAG_EOF = "<EOF>";
23 private static final String ENDFLAG_DOC_BEGIN = "---";
24 private static final String ENDFLAG_DOC_END = "...";
25 private static final String REGEXP1 = "^( *)(.*)";
26 private static final String REGEXP2 = "^((?::?[-.\\w]+|'.*?'|\".*?\"|=|<<) *):(( +)(.*))?$";
28 public static class Alias {
32 Alias(String label, int lineNum) {
34 this.lineNum = lineNum;
37 String getLabel() { return label; }
39 int getLineNumber() { return lineNum; }
43 private String[] lines;
44 private String line = null;
45 private int linenum = 0;
46 private Map<String,Object> anchors = new HashMap<>();
47 private Map<String,Integer> aliases = new HashMap<>();
48 private String endFlag = null;
49 private String sbuf = null;
50 private int index = 0;
52 PlainYamlParser(String yamlStr) {
53 List list = Util.toListOfLines(yamlStr);
54 int len = list.size();
55 lines = new String[len + 1];
56 for (int i = 0; i < len; i++) {
57 lines[i + 1] = (String)list.get(i);
61 public Object parse() throws SyntaxException {
62 Object data = parseChild(0);
63 if (data == null && endFlag.equals(ENDFLAG_DOC_BEGIN)) {
66 if (aliases.size() > 0) {
72 public boolean hasNext() {
73 return !endFlag.equals(ENDFLAG_EOF);
76 private List createSequence() {
77 return new ArrayList();
80 private void addSequenceValue(List seq, Object value) {
84 private void setSequenceValueAt(List seq, int index, Object value) {
85 seq.set(index, value);
89 return new DefaultableHashMap();
92 private void setMappingValueWith(Map map, Object key, Object value) {
96 void setMappingDefault(Map map, Object value) {
97 if (map instanceof Defaultable) {
98 ((Defaultable)map).setDefault(value);
102 private void mergeMapping(Map map, Map map2) {
103 for (Object key : map2.keySet()) {
104 if (!map.containsKey(key)) {
105 Object value = map2.get(key);
111 private void mergeList(Map map, List maplist) throws SyntaxException {
112 for (Object elem : maplist) {
113 mergeCollection(map, elem);
117 private void mergeCollection(Map map, Object collection) throws SyntaxException {
118 if (collection instanceof Map) {
119 mergeMapping(map, (Map)collection);
120 } else if (collection instanceof List) {
121 mergeList(map, (List)collection);
123 throw syntaxError("'<<' requires collection (mapping, or sequence of mapping).");
127 private Object createScalar(Object value) {
131 private String currentLine() {
135 int currentLineNumber() {
139 protected String getLine() {
142 currentLine = getCurrentLine();
143 } while (currentLine != null && Util.matches(currentLine, "^\\s*($|#)"));
147 private String getCurrentLine() {
148 if (++linenum < lines.length) {
149 line = lines[linenum];
150 if (Util.matches(line, "^\\.\\.\\.$")) {
152 endFlag = ENDFLAG_DOC_END;
153 } else if (Util.matches(line, "^---( [!%].*)?$")) {
155 endFlag = ENDFLAG_DOC_BEGIN;
159 endFlag = ENDFLAG_EOF;
164 private void resetBuffer(String str) {
165 sbuf = str.charAt(str.length() - 1) == '\n' ? str : str + "\n";
169 private int getCurrentCharacter() {
170 if (index + 1 < sbuf.length()) {
173 String currentLine = getLine();
174 if (currentLine == null) {
177 resetBuffer(currentLine);
180 return sbuf.charAt(index);
183 private int getChar() {
186 ch = getCurrentCharacter();
187 } while (ch >= 0 && isWhite(ch));
191 private int getCharOrNewline() {
194 ch = getCurrentCharacter();
195 } while (ch >= 0 && isWhite(ch) && ch != '\n');
199 private int currentChar() {
200 return sbuf.charAt(index);
203 private SyntaxException syntaxError(String message, int linenum) {
204 return new YamlSyntaxException(message, linenum);
207 private SyntaxException syntaxError(String message) {
208 return new SyntaxException(message, linenum);
211 private Object parseChild(int column) throws SyntaxException {
212 String currentLine = getLine();
213 if (currentLine == null) {
214 return createScalar(null);
216 Matcher m = Util.matcher(currentLine, REGEXP1);
221 int indent = m.group(1).length();
222 if (indent < column) {
223 return createScalar(null);
225 String value = m.group(2);
226 return parseValue(column, value, indent);
229 private Object parseValue(int column, String value, int valueStartColumn) throws SyntaxException {
231 if (Util.matches(value, "^-( |$)")) {
232 data = parseSequence(valueStartColumn, value);
233 } else if (Util.matches(value, REGEXP2)) {
234 data = parseMapping(valueStartColumn, value);
235 } else if (Util.matches(value, "^[\\[\\{]")) {
236 data = parseFlowStyle(value);
237 } else if (Util.matches(value, "^\\&[-\\w]+( |$)")) {
238 data = parseAnchor(column, value);
239 } else if (Util.matches(value, "^\\*[-\\w]+( |$)")) {
240 data = parseAlias(value);
241 } else if (Util.matches(value, "^[|>]")) {
242 data = parseBlockText(column, value);
243 } else if (Util.matches(value, "^!")) {
244 data = parseTag(column, value);
245 } else if (Util.matches(value, "^\\#")) {
246 data = parseChild(column);
248 data = parseScalar(value);
253 private static boolean isWhite(int ch) {
254 return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r';
258 private Object parseFlowStyle(String value) throws SyntaxException {
261 Object data = parseFlow(0);
262 int ch = currentChar();
263 assert ch == ']' || ch == '}';
264 ch = getCharOrNewline();
265 if (ch != '\n' && ch != '#' && ch >= 0) {
266 throw syntaxError("flow style sequence is closed buf got '" + ((char)ch) + "'.");
274 private Object parseFlow(int depth) throws SyntaxException {
275 int ch = currentChar();
277 throw syntaxError("found EOF when parsing flow style.");
281 data = parseFlowSequence(depth);
282 } else if (ch == '{') {
283 data = parseFlowMapping(depth);
285 data = parseFlowScalar();
290 private List parseFlowSequence(int depth) throws SyntaxException {
291 assert currentChar() == '[';
292 List seq = createSequence();
295 addSequenceValue(seq, parseFlowSequenceItem(depth + 1));
296 while ((ch = currentChar()) == ',') {
299 throw syntaxError("sequence item required (or last comma is extra).");
301 addSequenceValue(seq, parseFlowSequenceItem(depth + 1));
304 if (currentChar() != ']') {
305 throw syntaxError("flow style sequence requires ']'.");
313 private Object parseFlowSequenceItem(int depth) throws SyntaxException {
314 return parseFlow(depth);
317 private Map parseFlowMapping(int depth) throws SyntaxException {
318 assert currentChar() == '{';
319 Map map = createMapping();
322 Object[] pair = parseFlowMappingItem(depth + 1);
323 Object key = pair[0];
324 Object value = pair[1];
325 setMappingValueWith(map, key, value);
326 while ((ch = currentChar()) == ',') {
329 throw syntaxError("mapping item required (or last comman is extra.");
331 pair = parseFlowMappingItem(depth + 1);
334 setMappingValueWith(map, key, value);
337 if (currentChar() != '}') {
338 throw syntaxError("flow style mapping requires '}'.");
346 private Object[] parseFlowMappingItem(int depth) throws SyntaxException {
347 Object key = parseFlow(depth);
348 int ch = currentChar();
350 String s = ch >= 0 ? "'" + ((char)ch) + "'" : "EOF";
351 throw syntaxError("':' expected but got " + s);
354 Object value = parseFlow(depth);
355 return new Object[] { key, value };
358 private Object parseFlowScalar() {
359 int ch = currentChar();
361 StringBuilder sb = new StringBuilder();
362 if (ch == '"' || ch == '\'') {
364 while ((ch = getCurrentCharacter()) >= 0 && ch != endch) {
368 scalar = sb.toString();
371 while ((ch = getCurrentCharacter()) >= 0 && ch != ':' && ch != ',' && ch != ']' && ch != '}') {
374 scalar = toScalar(sb.toString().trim());
376 return createScalar(scalar);
379 private Object parseTag(int column, String value) throws SyntaxException {
380 assert Util.matches(value, "^!\\S+");
381 Matcher m = Util.matcher(value, "^!(\\S+)((\\s+)(.*))?$");
386 String tag = m.group(1);
387 String space = m.group(3);
388 String value2 = m.group(4);
390 if (value2 != null && value2.length() > 0) {
391 int valueStartColumn = column + 1 + tag.length() + space.length();
392 data = parseValue(column, value2, valueStartColumn);
394 data = parseChild(column);
399 private Object parseAnchor(int column, String value) throws SyntaxException {
400 assert Util.matches(value, "^\\&([-\\w]+)(( *)(.*))?$");
401 Matcher m = Util.matcher(value, "^\\&([-\\w]+)(( *)(.*))?$");
406 String label = m.group(1);
407 String space = m.group(3);
408 String value2 = m.group(4);
410 if (value2 != null && value2.length() > 0) {
411 int valueStartColumn = column + 1 + label.length() + space.length();
412 data = parseValue(column, value2, valueStartColumn);
414 data = parseChild(column);
416 registerAnchor(label, data);
420 private void registerAnchor(String label, Object data) throws SyntaxException {
421 if (anchors.containsKey(label)) {
422 throw syntaxError(ANCHOR + label + "' is already used.");
424 anchors.put(label, data);
427 private Object parseAlias(String value) throws SyntaxException {
428 assert value.matches("^\\*([-\\w]+)(( *)(.*))?$");
429 Matcher m = Util.matcher(value, "^\\*([-\\w]+)(( *)(.*))?$");
434 String label = m.group(1);
435 String value2 = m.group(4);
436 if (value2 != null && value2.length() > 0 && value2.charAt(0) != '#') {
437 throw syntaxError("alias cannot take any data.");
439 Object data = anchors.get(label);
441 data = registerAlias(label);
447 private Alias registerAlias(String label) {
448 aliases.merge(label, 1, (a, b) -> a + b);
449 return new Alias(label, linenum);
453 private void resolveAliases(Object data) throws SyntaxException {
454 Map resolved = new IdentityHashMap();
455 resolveAliases(data, resolved);
459 private void resolveAliases(Object data, Map resolved) throws SyntaxException {
460 if (resolved.containsKey(data)) {
463 resolved.put(data, data);
464 if (data instanceof List) {
465 resolveAliases((List)data, resolved);
466 } else if (data instanceof Map) {
467 resolveAliases((Map)data, resolved);
469 assert !(data instanceof Alias);
471 if (data instanceof Defaultable) {
472 Object defaultValue = ((Defaultable)data).getDefault();
473 if (defaultValue != null) {
474 resolveAliases(defaultValue, resolved);
479 private void resolveAliases(List seq, Map resolved) throws SyntaxException {
480 int len = seq.size();
481 for (int i = 0; i < len; i++) {
482 Object val = seq.get(i);
483 if (val instanceof Alias) {
484 Alias alias = (Alias)val;
485 String label = alias.getLabel();
486 if (anchors.containsKey(label)) {
487 setSequenceValueAt(seq, i, anchors.get(label));
489 throw syntaxError(ANCHOR + alias.getLabel() + "' not found.");
491 } else if (val instanceof List || val instanceof Map) {
492 resolveAliases(val, resolved);
497 private void resolveAliases(Map map, Map resolved) throws SyntaxException {
498 for (Object key : map.keySet()) {
499 Object val = map.get(key);
500 if (val instanceof Alias) {
501 Alias alias = (Alias) val;
502 String label = alias.getLabel();
503 if (anchors.containsKey(label)) {
504 setMappingValueWith(map, key, anchors.get(label));
506 throw syntaxError(ANCHOR + alias.getLabel() + "' not found.", alias.getLineNumber());
508 } else if (val instanceof List || val instanceof Map) {
509 resolveAliases(val, resolved);
514 private Object parseBlockText(int column, String value) throws SyntaxException {
515 assert Util.matches(value, "^[>|]");
516 Matcher m = Util.matcher(value, "^([>|])([-+]?)(\\d*)\\s*(.*)$");
521 char blockChar = m.group(1).length() > 0 ? m.group(1).charAt(0) : '\0';
522 char indicator = m.group(2).length() > 0 ? m.group(2).charAt(0) : '\0';
523 int indent = m.group(3).length() > 0 ? Integer.parseInt(m.group(3)) : -1;
524 String text = m.group(4);
525 char sep = blockChar == '|' ? '\n' : ' ';
527 StringBuilder sb = new StringBuilder();
529 while ((currentLine = getCurrentLine()) != null) {
530 m = Util.matcher(currentLine, "^( *)(.*)$");
532 String space = m.group(1);
533 String str = m.group(2);
535 indent = space.length();
537 if (str.length() == 0) {
540 int slen = space.length();
543 } else if (slen < indent) {
544 throw syntaxError("invalid indent in block text.");
547 if (blockChar == '>' && sb.length() > 0) {
548 sb.deleteCharAt(sb.length() - 1);
550 for (int i = 0; i < n; i++) {
555 str = currentLine.substring(indent);
559 if ((blockChar == '>') && (sb.charAt(sb.length() - 1) == '\n')) {
560 sb.setCharAt(sb.length() - 1, ' ');
563 if (currentLine != null && Util.matches(currentLine, "^ *#")) {
568 handlePlus(blockChar, sb, n);
571 handleMinus(sep, sb);
574 if (blockChar == '>') {
575 sb.setCharAt(sb.length() - 1, '\n');
578 return createScalar(text + sb.toString());
581 private void handleMinus(char sep, StringBuilder sb) {
582 if (sb.charAt(sb.length() - 1) == sep) {
583 sb.deleteCharAt(sb.length() - 1);
587 private void handlePlus(char blockChar, StringBuilder sb, int n) {
589 if (blockChar == '>') {
590 sb.setCharAt(sb.length() - 1, '\n');
592 for (int i = 0; i < n; i++) {
599 private List parseSequence(int column, String value) throws SyntaxException {
600 assert Util.matches(value, "^-(( +)(.*))?$");
601 List seq = createSequence();
603 Matcher m = Util.matcher(value, "^-(( +)(.*))?$");
605 throw syntaxError("sequence item is expected.");
607 String space = m.group(2);
608 String value2 = m.group(3);
609 int column2 = column + 1;
612 if (value2 == null || value2.length() == 0) {
613 elem = parseChild(column2);
615 int valueStartColumn = column2 + space.length();
616 elem = parseValue(column2, value2, valueStartColumn);
618 addSequenceValue(seq, elem);
620 String currentLine = currentLine();
621 if (currentLine == null) {
624 Matcher m2 = Util.matcher(currentLine, REGEXP1);
626 int indent = m2.group(1).length();
627 if (indent < column) {
629 } else if (indent > column) {
630 throw syntaxError("invalid indent of sequence.");
638 private Map parseMapping(int column, String value) throws SyntaxException {
639 assert Util.matches(value, REGEXP2);
640 Map map = createMapping();
642 Matcher m = Util.matcher(value, REGEXP2);
644 throw syntaxError("mapping item is expected.");
646 String v = m.group(1).trim();
647 Object key = toScalar(v);
648 String value2 = m.group(4);
649 int column2 = column + 1;
652 if (value2 == null || value2.length() == 0) {
653 elem = parseChild(column2);
655 int valueStartColumn = column2 + m.group(1).length() + m.group(3).length();
656 elem = parseValue(column2, value2, valueStartColumn);
659 setMappingDefault(map, elem);
660 } else if ("<<".equals(v)) {
661 mergeCollection(map, elem);
663 setMappingValueWith(map, key, elem);
666 String currentLine = currentLine();
667 if (currentLine == null) {
670 Matcher m2 = Util.matcher(currentLine, REGEXP1);
672 int indent = m2.group(1).length();
673 if (indent < column) {
675 } else if (indent > column) {
676 throw syntaxError("invalid indent of mapping.");
684 private Object parseScalar(String value) {
685 Object data = createScalar(toScalar(value));
691 private Object toScalar(String value) {
693 if ((m = Util.matcher(value, "^\"(.*)\"([ \t]*#.*$)?")).find()) {
695 } else if ((m = Util.matcher(value, "^'(.*)'([ \t]*#.*$)?")).find()) {
697 } else if ((m = Util.matcher(value, "^(.*\\S)[ \t]*#")).find()) {
701 if (Util.matches(value, "^-?0x\\d+$")) {
702 return Integer.parseInt(value, 16);
703 } else if (Util.matches(value, "^-?0\\d+$")) {
704 return Integer.parseInt(value, 8);
705 } else if (Util.matches(value, "^-?\\d+$")) {
706 return Integer.parseInt(value, 10);
707 } else if (Util.matches(value, "^-?\\d+\\.\\d+$")) {
708 return Double.parseDouble(value);
709 } else if (Util.matches(value, "^(true|yes|on)$")) {
711 } else if (Util.matches(value, "^(false|no|off)$")) {
712 return Boolean.FALSE;
713 } else if (Util.matches(value, "^(null|~)$")){
715 } else if (Util.matches(value, "^:(\\w+)$")) {
717 } else if ((m = Util.matcher(value, "^(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d)$")).find()) {
718 int year = Integer.parseInt(m.group(1));
719 int month = Integer.parseInt(m.group(2));
720 int day = Integer.parseInt(m.group(3));
721 Calendar cal = Calendar.getInstance();
722 cal.set(year, month, day, 0, 0, 0);
723 return cal.getTime();
724 } else if ((m = Util.matcher(value, "^(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d)(?:[Tt]|[ \t]+)(\\d\\d?):(\\d\\d):(\\d\\d)(\\.\\d*)?(?:Z|[ \t]*([-+]\\d\\d?)(?::(\\d\\d))?)?$")).find()) {
725 int year = Integer.parseInt(m.group(1));
726 int month = Integer.parseInt(m.group(2));
727 int day = Integer.parseInt(m.group(3));
728 int hour = Integer.parseInt(m.group(4));
729 int min = Integer.parseInt(m.group(5));
730 int sec = Integer.parseInt(m.group(6));
732 String timezone = "GMT" + m.group(8) + ":" + m.group(9);
733 Calendar cal = Calendar.getInstance();
734 cal.set(year, month, day, hour, min, sec);
735 cal.setTimeZone(TimeZone.getTimeZone(timezone));
736 return cal.getTime();