2 * copyright(c) 2005 kuwata-lab all rights reserved.
8 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.IdentityHashMap;
12 import java.util.regex.Matcher;
13 import java.util.Calendar;
14 import java.util.TimeZone;
17 * plain yaml parser class which is a parent of YamlParser class.
19 public class PlainYamlParser implements Parser {
21 private static final String ANCHOR = "anchor '";
22 private static final String ENDFLAG_EOF = "<EOF>";
23 private static final String ENDFLAG_DOC_BEGIN = "---";
24 private static final String ENDFLAG_DOC_END = "...";
25 private static final String REGEXP1 = "^( *)(.*)";
26 private static final String REGEXP2 = "^((?::?[-.\\w]+|'.*?'|\".*?\"|=|<<) *):(( +)(.*))?$";
28 public static class Alias {
32 Alias(String label, int lineNum) {
34 this.lineNum = lineNum;
37 String getLabel() { return label; }
39 int getLineNumber() { return lineNum; }
43 private String[] lines;
44 private String line = null;
45 private int linenum = 0;
46 private Map<String,Object> anchors = new HashMap<>();
47 private Map<String,Integer> aliases = new HashMap<>();
48 private String endFlag = null;
49 private String sbuf = null;
50 private int index = 0;
52 PlainYamlParser(String yamlStr) {
53 List list = Util.toListOfLines(yamlStr);
54 int len = list.size();
55 lines = new String[len + 1];
56 for (int i = 0; i < len; i++) {
57 lines[i + 1] = (String)list.get(i);
61 public Object parse() throws SyntaxException {
62 Object data = parseChild(0);
63 if (data == null && endFlag.equals(ENDFLAG_DOC_BEGIN)) {
66 if (aliases.size() > 0) {
72 public boolean hasNext() {
73 return !endFlag.equals(ENDFLAG_EOF);
76 private List createSequence() {
77 return new ArrayList();
80 private void addSequenceValue(List seq, Object value) {
84 private void setSequenceValueAt(List seq, int index, Object value) {
85 seq.set(index, value);
88 private void setMappingValueWith(Map map, Object key, Object value) {
92 void setMappingDefault(Map map, Object value) {
93 if (map instanceof Defaultable) {
94 ((Defaultable)map).setDefault((Rule)value);
98 private void mergeMapping(Map map, Map map2) {
99 for (Object key : map2.keySet()) {
100 if (!map.containsKey(key)) {
101 Object value = map2.get(key);
107 private void mergeList(Map map, List maplist) throws SyntaxException {
108 for (Object elem : maplist) {
109 mergeCollection(map, elem);
113 private void mergeCollection(Map map, Object collection) throws SyntaxException {
114 if (collection instanceof Map) {
115 mergeMapping(map, (Map)collection);
116 } else if (collection instanceof List) {
117 mergeList(map, (List)collection);
119 throw syntaxError("'<<' requires collection (mapping, or sequence of mapping).");
123 private Object createScalar(Object value) {
127 private String currentLine() {
131 int currentLineNumber() {
135 protected String getLine() {
138 currentLine = getCurrentLine();
139 } while (currentLine != null && Util.matches(currentLine, "^\\s*($|#)"));
143 private String getCurrentLine() {
144 if (++linenum < lines.length) {
145 line = lines[linenum];
146 if (Util.matches(line, "^\\.\\.\\.$")) {
148 endFlag = ENDFLAG_DOC_END;
149 } else if (Util.matches(line, "^---( [!%].*)?$")) {
151 endFlag = ENDFLAG_DOC_BEGIN;
155 endFlag = ENDFLAG_EOF;
160 private void resetBuffer(String str) {
161 sbuf = str.charAt(str.length() - 1) == '\n' ? str : str + "\n";
165 private int getCurrentCharacter() {
166 if (index + 1 < sbuf.length()) {
169 String currentLine = getLine();
170 if (currentLine == null) {
173 resetBuffer(currentLine);
176 return sbuf.charAt(index);
179 private int getChar() {
182 ch = getCurrentCharacter();
183 } while (ch >= 0 && isWhite(ch));
187 private int getCharOrNewline() {
190 ch = getCurrentCharacter();
191 } while (ch >= 0 && isWhite(ch) && ch != '\n');
195 private int currentChar() {
196 return sbuf.charAt(index);
199 private SyntaxException syntaxError(String message, int linenum) {
200 return new YamlSyntaxException(message, linenum);
203 private SyntaxException syntaxError(String message) {
204 return new SyntaxException(message, linenum);
207 private Object parseChild(int column) throws SyntaxException {
208 String currentLine = getLine();
209 if (currentLine == null) {
210 return createScalar(null);
212 Matcher m = Util.matcher(currentLine, REGEXP1);
217 int indent = m.group(1).length();
218 if (indent < column) {
219 return createScalar(null);
221 String value = m.group(2);
222 return parseValue(column, value, indent);
225 private Object parseValue(int column, String value, int valueStartColumn) throws SyntaxException {
227 if (Util.matches(value, "^-( |$)")) {
228 data = parseSequence(valueStartColumn, value);
229 } else if (Util.matches(value, REGEXP2)) {
230 data = parseMapping(valueStartColumn, value);
231 } else if (Util.matches(value, "^[\\[\\{]")) {
232 data = parseFlowStyle(value);
233 } else if (Util.matches(value, "^\\&[-\\w]+( |$)")) {
234 data = parseAnchor(column, value);
235 } else if (Util.matches(value, "^\\*[-\\w]+( |$)")) {
236 data = parseAlias(value);
237 } else if (Util.matches(value, "^[|>]")) {
238 data = parseBlockText(column, value);
239 } else if (Util.matches(value, "^!")) {
240 data = parseTag(column, value);
241 } else if (Util.matches(value, "^\\#")) {
242 data = parseChild(column);
244 data = parseScalar(value);
249 private static boolean isWhite(int ch) {
250 return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r';
254 private Object parseFlowStyle(String value) throws SyntaxException {
257 Object data = parseFlow(0);
258 int ch = currentChar();
259 assert ch == ']' || ch == '}';
260 ch = getCharOrNewline();
261 if (ch != '\n' && ch != '#' && ch >= 0) {
262 throw syntaxError("flow style sequence is closed buf got '" + ((char)ch) + "'.");
270 private Object parseFlow(int depth) throws SyntaxException {
271 int ch = currentChar();
273 throw syntaxError("found EOF when parsing flow style.");
277 data = parseFlowSequence(depth);
278 } else if (ch == '{') {
279 data = parseFlowMapping(depth);
281 data = parseFlowScalar();
286 private List parseFlowSequence(int depth) throws SyntaxException {
287 assert currentChar() == '[';
288 List seq = createSequence();
291 addSequenceValue(seq, parseFlowSequenceItem(depth + 1));
292 while ((ch = currentChar()) == ',') {
295 throw syntaxError("sequence item required (or last comma is extra).");
297 addSequenceValue(seq, parseFlowSequenceItem(depth + 1));
300 if (currentChar() != ']') {
301 throw syntaxError("flow style sequence requires ']'.");
309 private Object parseFlowSequenceItem(int depth) throws SyntaxException {
310 return parseFlow(depth);
313 private Map parseFlowMapping(int depth) throws SyntaxException {
314 assert currentChar() == '{';
315 Map map = new DefaultableHashMap();
318 Object[] pair = parseFlowMappingItem(depth + 1);
319 Object key = pair[0];
320 Object value = pair[1];
321 setMappingValueWith(map, key, value);
322 while ((currentChar()) == ',') {
325 throw syntaxError("mapping item required (or last comman is extra.");
327 pair = parseFlowMappingItem(depth + 1);
330 setMappingValueWith(map, key, value);
333 if (currentChar() != '}') {
334 throw syntaxError("flow style mapping requires '}'.");
342 private Object[] parseFlowMappingItem(int depth) throws SyntaxException {
343 Object key = parseFlow(depth);
344 int ch = currentChar();
346 String s = ch >= 0 ? "'" + ((char)ch) + "'" : "EOF";
347 throw syntaxError("':' expected but got " + s);
350 Object value = parseFlow(depth);
351 return new Object[] { key, value };
354 private Object parseFlowScalar() {
355 int ch = currentChar();
357 StringBuilder sb = new StringBuilder();
358 if (ch == '"' || ch == '\'') {
360 while ((ch = getCurrentCharacter()) >= 0 && ch != endch) {
364 scalar = sb.toString();
367 String lookup = ":,]}";
368 while ((ch = getCurrentCharacter()) >= 0 && lookup.indexOf(ch) == -1) {
371 scalar = toScalar(sb.toString().trim());
373 return createScalar(scalar);
376 private Object parseTag(int column, String value) throws SyntaxException {
377 assert Util.matches(value, "^!\\S+");
378 Matcher m = Util.matcher(value, "^!(\\S+)((\\s+)(.*))?$");
383 String tag = m.group(1);
384 String space = m.group(3);
385 String value2 = m.group(4);
387 if (value2 != null && value2.length() > 0) {
388 int valueStartColumn = column + 1 + tag.length() + space.length();
389 data = parseValue(column, value2, valueStartColumn);
391 data = parseChild(column);
396 private Object parseAnchor(int column, String value) throws SyntaxException {
397 assert Util.matches(value, "^\\&([-\\w]+)(( *)(.*))?$");
398 Matcher m = Util.matcher(value, "^\\&([-\\w]+)(( *)(.*))?$");
403 String label = m.group(1);
404 String space = m.group(3);
405 String value2 = m.group(4);
407 if (value2 != null && value2.length() > 0) {
408 int valueStartColumn = column + 1 + label.length() + space.length();
409 data = parseValue(column, value2, valueStartColumn);
411 data = parseChild(column);
413 registerAnchor(label, data);
417 private void registerAnchor(String label, Object data) throws SyntaxException {
418 if (anchors.containsKey(label)) {
419 throw syntaxError(ANCHOR + label + "' is already used.");
421 anchors.put(label, data);
424 private Object parseAlias(String value) throws SyntaxException {
425 assert value.matches("^\\*([-\\w]+)(( *)(.*))?$");
426 Matcher m = Util.matcher(value, "^\\*([-\\w]+)(( *)(.*))?$");
431 String label = m.group(1);
432 String value2 = m.group(4);
433 if (value2 != null && value2.length() > 0 && value2.charAt(0) != '#') {
434 throw syntaxError("alias cannot take any data.");
436 Object data = anchors.get(label);
438 data = registerAlias(label);
444 private Alias registerAlias(String label) {
445 aliases.merge(label, 1, (a, b) -> a + b);
446 return new Alias(label, linenum);
450 private void resolveAliases(Object data) throws SyntaxException {
451 Map resolved = new IdentityHashMap();
452 resolveAliases(data, resolved);
456 private void resolveAliases(Object data, Map resolved) throws SyntaxException {
457 if (resolved.containsKey(data)) {
460 resolved.put(data, data);
461 if (data instanceof List) {
462 resolveAliases((List)data, resolved);
463 } else if (data instanceof Map) {
464 resolveAliases((Map)data, resolved);
466 assert !(data instanceof Alias);
468 if (data instanceof Defaultable) {
469 Object defaultValue = ((Defaultable)data).getDefault();
470 if (defaultValue != null) {
471 resolveAliases(defaultValue, resolved);
476 private void resolveAliases(List seq, Map resolved) throws SyntaxException {
477 int len = seq.size();
478 for (int i = 0; i < len; i++) {
479 Object val = seq.get(i);
480 if (val instanceof Alias) {
481 Alias alias = (Alias)val;
482 String label = alias.getLabel();
483 if (anchors.containsKey(label)) {
484 setSequenceValueAt(seq, i, anchors.get(label));
486 throw syntaxError(ANCHOR + alias.getLabel() + "' not found.");
488 } else if (val instanceof List || val instanceof Map) {
489 resolveAliases(val, resolved);
494 private void resolveAliases(Map map, Map resolved) throws SyntaxException {
495 for (Object key : map.keySet()) {
496 Object val = map.get(key);
497 if (val instanceof Alias) {
498 Alias alias = (Alias) val;
499 String label = alias.getLabel();
500 if (anchors.containsKey(label)) {
501 setMappingValueWith(map, key, anchors.get(label));
503 throw syntaxError(ANCHOR + alias.getLabel() + "' not found.", alias.getLineNumber());
505 } else if (val instanceof List || val instanceof Map) {
506 resolveAliases(val, resolved);
511 private Object parseBlockText(int column, String value) throws SyntaxException {
512 assert Util.matches(value, "^[>|]");
513 Matcher m = Util.matcher(value, "^([>|])([-+]?)(\\d*)\\s*(.*)$");
518 char blockChar = m.group(1).length() > 0 ? m.group(1).charAt(0) : '\0';
519 char indicator = m.group(2).length() > 0 ? m.group(2).charAt(0) : '\0';
520 int indent = m.group(3).length() > 0 ? Integer.parseInt(m.group(3)) : -1;
521 String text = m.group(4);
522 char sep = blockChar == '|' ? '\n' : ' ';
524 StringBuilder sb = new StringBuilder();
526 while ((currentLine = getCurrentLine()) != null) {
527 m = Util.matcher(currentLine, "^( *)(.*)$");
529 String space = m.group(1);
530 String str = m.group(2);
532 indent = space.length();
534 if (str.length() == 0) {
537 int slen = space.length();
540 } else if (slen < indent) {
541 throw syntaxError("invalid indent in block text.");
543 n = indentHandler(blockChar, sb, n);
544 str = currentLine.substring(indent);
548 if ((blockChar == '>') && (sb.charAt(sb.length() - 1) == '\n')) {
549 sb.setCharAt(sb.length() - 1, ' ');
552 if (currentLine != null && Util.matches(currentLine, "^ *#")) {
555 processIndicator(blockChar, indicator, sep, sb, n);
556 return createScalar(text + sb.toString());
559 private void processIndicator(char blockChar, char indicator, char sep, StringBuilder sb, int n) {
562 handlePlus(blockChar, sb, n);
565 handleMinus(sep, sb);
568 if (blockChar == '>') {
569 sb.setCharAt(sb.length() - 1, '\n');
574 private int indentHandler(char blockChar, StringBuilder sb, int indent) {
576 if (blockChar == '>' && sb.length() > 0) {
577 sb.deleteCharAt(sb.length() - 1);
579 for (int i = 0; i < indent; i++) {
587 private void handleMinus(char sep, StringBuilder sb) {
588 if (sb.charAt(sb.length() - 1) == sep) {
589 sb.deleteCharAt(sb.length() - 1);
593 private void handlePlus(char blockChar, StringBuilder sb, int n) {
595 if (blockChar == '>') {
596 sb.setCharAt(sb.length() - 1, '\n');
598 for (int i = 0; i < n; i++) {
605 private List parseSequence(int column, String value) throws SyntaxException {
606 assert Util.matches(value, "^-(( +)(.*))?$");
607 List seq = createSequence();
609 Matcher m = Util.matcher(value, "^-(( +)(.*))?$");
611 throw syntaxError("sequence item is expected.");
613 String space = m.group(2);
614 String value2 = m.group(3);
615 int column2 = column + 1;
618 if (value2 == null || value2.length() == 0) {
619 elem = parseChild(column2);
621 int valueStartColumn = column2 + space.length();
622 elem = parseValue(column2, value2, valueStartColumn);
624 addSequenceValue(seq, elem);
626 String currentLine = currentLine();
627 if (currentLine == null) {
630 Matcher m2 = Util.matcher(currentLine, REGEXP1);
632 int indent = m2.group(1).length();
633 if (indent < column) {
635 } else if (indent > column) {
636 throw syntaxError("invalid indent of sequence.");
644 private Map parseMapping(int column, String value) throws SyntaxException {
645 assert Util.matches(value, REGEXP2);
646 Map map = new DefaultableHashMap();
648 Matcher m = Util.matcher(value, REGEXP2);
650 throw syntaxError("mapping item is expected.");
652 String v = m.group(1).trim();
653 Object key = toScalar(v);
654 String value2 = m.group(4);
655 int column2 = column + 1;
658 if (value2 == null || value2.length() == 0) {
659 elem = parseChild(column2);
661 int valueStartColumn = column2 + m.group(1).length() + m.group(3).length();
662 elem = parseValue(column2, value2, valueStartColumn);
665 setMappingDefault(map, elem);
666 } else if ("<<".equals(v)) {
667 mergeCollection(map, elem);
669 setMappingValueWith(map, key, elem);
672 String currentLine = currentLine();
673 if (currentLine == null) {
676 Matcher m2 = Util.matcher(currentLine, REGEXP1);
678 int indent = m2.group(1).length();
679 if (checkIndent(column, indent)) {
687 private boolean checkIndent(int column, int indent) throws SyntaxException {
688 if (indent < column) {
690 } else if (indent > column) {
691 throw syntaxError("invalid indent of mapping.");
697 private Object parseScalar(String value) {
698 Object data = createScalar(toScalar(value));
704 private Object toScalar(String value) {
706 m = Util.matcher(value, "^\"(.*)\"([ \t]*#.*$)?");
711 m = Util.matcher(value, "^'(.*)'([ \t]*#.*$)?");
716 m = Util.matcher(value, "^(.*\\S)[ \t]*#");
721 if (Util.matches(value, "^-?0x\\d+$")) {
722 return Integer.parseInt(value, 16);
725 if (Util.matches(value, "^-?0\\d+$")) {
726 return Integer.parseInt(value, 8);
729 if (Util.matches(value, "^-?\\d+$")) {
730 return Integer.parseInt(value, 10);
733 if (Util.matches(value, "^-?\\d+\\.\\d+$")) {
734 return Double.parseDouble(value);
737 if (Util.matches(value, "^(true|yes|on)$")) {
741 if (Util.matches(value, "^(false|no|off)$")) {
742 return Boolean.FALSE;
745 if (Util.matches(value, "^(null|~)$")){
749 if (Util.matches(value, "^:(\\w+)$")) {
753 m = Util.matcher(value, "^(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d)$");
755 int year = Integer.parseInt(m.group(1));
756 int month = Integer.parseInt(m.group(2));
757 int day = Integer.parseInt(m.group(3));
758 Calendar cal = Calendar.getInstance();
759 //noinspection MagicConstant
760 cal.set(year, month, day, 0, 0, 0);
761 return cal.getTime();
764 m = Util.matcher(value, "^(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d)(?:[Tt]|[ \t]+)(\\d\\d?):(\\d\\d):(\\d\\d)(\\.\\d*)?(?:Z|[ \t]*([-+]\\d\\d?)(?::(\\d\\d))?)?$");
766 int year = Integer.parseInt(m.group(1));
767 int month = Integer.parseInt(m.group(2));
768 int day = Integer.parseInt(m.group(3));
769 int hour = Integer.parseInt(m.group(4));
770 int min = Integer.parseInt(m.group(5));
771 int sec = Integer.parseInt(m.group(6));
773 String timezone = "GMT" + m.group(8) + ":" + m.group(9);
774 Calendar cal = Calendar.getInstance();
775 //noinspection MagicConstant
776 cal.set(year, month, day, hour, min, sec);
777 cal.setTimeZone(TimeZone.getTimeZone(timezone));
778 return cal.getTime();