* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/**
* XReader
- * This class works similarly as StAX, except StAX has more behavior than is needed. That would be ok, but
+ * This class works similarly as StAX, except StAX has more behavior than is needed. That would be ok, but
* StAX also was Buffering in their code in such as way as to read most if not all the incoming stream into memory,
* defeating the purpose of pre-reading only the Header
- *
+ *
* This Reader does no back-tracking, but is able to create events based on syntax and given state only, leaving the
* Read-ahead mode of the InputStream up to the other classes.
- *
- * At this time, we only implement the important events, though if this is good enough, it could be expanded, perhaps to
+ *
+ * At this time, we only implement the important events, though if this is good enough, it could be expanded, perhaps to
* replace the original XMLReader from StAX.
- *
+ *
* @author Jonathan
*
*/
private InputStream is;
private ByteArrayOutputStream baos;
private int state, count, last;
-
+
private Stack<Map<String,String>> nsses;
-
+
public XReader(InputStream is) {
this.is = is;
curr = another = null;
baos = new ByteArrayOutputStream();
- state = BEGIN_DOC;
+ state = BEGIN_DOC;
count = 0;
nsses = new Stack<Map<String,String>>();
}
-
+
public boolean hasNext() throws XMLStreamException {
- if(curr==null) {
+ if (curr==null) {
curr = parse();
}
return curr!=null;
return xe;
}
- //
+ //
// State Flags
//
// Note: The State of parsing XML can be complicated. There are too many to cleanly keep in "booleans". Additionally,
// useful combined Comment states
private final static int IN_COMMENT=COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2;
private final static int COMPLETE_COMMENT = COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2|COMMENT_D3|COMMENT_D4;
-
-
+
+
private XEvent parse() throws XMLStreamException {
Map<String,String> nss = nsses.isEmpty()?null:nsses.peek();
XEvent rv;
- if((rv=another)!=null) { // "another" is a tag that may have needed to be created, but not
+ if ((rv=another)!=null) { // "another" is a tag that may have needed to be created, but not
// immediately returned. Save for next parse. If necessary, this could be turned into
// a FIFO storage, but a single reference is enough for now.
another = null; // "rv" is now set for the Event, and will be returned. Set to Null.
} else {
boolean go = true;
int c=0;
-
+
try {
- while(go && (c=is.read())>=0) {
+ while (go && (c=is.read())>=0) {
++count;
switch(c) {
case '<': // Tag is opening
state|=~BEGIN_DOC; // remove BEGIN_DOC flag, this is possibly an XML Doc
XEvent cxe = null;
- if(baos.size()>0) { // If there are any characters between tags, we send as Character Event
+ if (baos.size()>0) { // If there are any characters between tags, we send as Character Event
String chars = baos.toString().trim(); // Trim out WhiteSpace before and after
- if(chars.length()>0) { // don't send if Characters were only whitespace
+ if (chars.length()>0) { // don't send if Characters were only whitespace
cxe = new XEvent.Characters(chars);
baos.reset();
go = false;
String ns;
switch(t.state&(START_TAG|END_TAG)) {
case START_TAG:
- nss = getNss(nss,t); // Only Start Tags might have NS Attributes
- // Get any NameSpace elements from tag. If there are, nss will become
- // a new Map with all the previous NSs plus the new. This provides
+ nss = getNss(nss,t); // Only Start Tags might have NS Attributes
+ // Get any NameSpace elements from tag. If there are, nss will become
+ // a new Map with all the previous NSs plus the new. This provides
// scoping behavior when used with the Stack
// drop through on purpose
case END_TAG:
default:
ns = "";
}
- if(ns==null)
+ if (ns==null)
throw new XMLStreamException("Invalid Namespace Prefix at " + count);
go = false;
- switch(t.state) { // based on
- case DOC_TYPE:
+ switch(t.state) { // based on
+ case DOC_TYPE:
rv = new XEvent.StartDocument();
break;
case COMMENT:
break;
case START_TAG|END_TAG: // This tag is both start/end aka <myTag/>
rv = new XEvent.StartElement(ns,t.name);
- if(last=='/')another = new XEvent.EndElement(ns,t.name);
+ if (last=='/')another = new XEvent.EndElement(ns,t.name);
}
- if(cxe!=null) { // if there is a Character Event, it actually should go first. ow.
- another = rv; // Make current Event the "another" or next event, and
+ if (cxe!=null) { // if there is a Character Event, it actually should go first. ow.
+ another = rv; // Make current Event the "another" or next event, and
rv = cxe; // send Character Event now
}
break;
case ' ':
case '\t':
case '\n':
- if((state&BEGIN_DOC)==BEGIN_DOC) { // if Whitespace before doc, just ignore
+ if ((state&BEGIN_DOC)==BEGIN_DOC) { // if Whitespace before doc, just ignore
break;
}
// fallthrough on purpose
default:
- if((state&BEGIN_DOC)==BEGIN_DOC) { // if there is any data at the start other than XML Tag, it's not XML
+ if ((state&BEGIN_DOC)==BEGIN_DOC) { // if there is any data at the start other than XML Tag, it's not XML
throw new XMLStreamException("Parse Error: This is not an XML Doc");
}
baos.write(c); // save off Characters
} catch (IOException e) {
throw new XMLStreamException(e); // all errors parsing will be treated as XMLStreamErrors (like StAX)
}
- if(c==-1 && (state&BEGIN_DOC)==BEGIN_DOC) { // Normally, end of stream is ok, however, we need to know if the
- throw new XMLStreamException("Premature End of File"); // document isn't an XML document, so we throw exception if it
+ if (c==-1 && (state&BEGIN_DOC)==BEGIN_DOC) { // Normally, end of stream is ok, however, we need to know if the
+ throw new XMLStreamException("Premature End of File"); // document isn't an XML document, so we throw exception if it
} // hasn't yet been determined to be an XML Doc
}
return rv;
}
-
+
/**
* parseTag
- *
- * Parsing a Tag is somewhat complicated, so it's helpful to separate this process from the
+ *
+ * Parsing a Tag is somewhat complicated, so it's helpful to separate this process from the
* higher level Parsing effort
* @return
* @throws IOException
int c, quote=0; // If "quote" is 0, then we're not in a quote. We set ' (in pretag) or " in attribs accordingly to denote quoted
String prefix=null,name=null,value=null;
baos.reset();
-
- while(go && (c=is.read())>=0) {
+
+ while (go && (c=is.read())>=0) {
++count;
- if(quote!=0) { // If we're in a quote, we only end if we hit another quote of the same time, not preceded by \
- if(c==quote && last!='\\') {
+ if (quote!=0) { // If we're in a quote, we only end if we hit another quote of the same time, not preceded by \
+ if (c==quote && last!='\\') {
quote=0;
} else {
baos.write(c);
}
- } else if((state&COMMENT)==COMMENT) { // similar to Quote is being in a comment
+ } else if ((state&COMMENT)==COMMENT) { // similar to Quote is being in a comment
switch(c) {
case '-':
- switch(state) { // XML has a complicated Quote set... <!-- --> ... we keep track if each has been met with flags.
+ switch(state) { // XML has a complicated Quote set... <!-- --> ... we keep track if each has been met with flags.
case COMMENT|COMMENT_E:
state|=COMMENT_D1;
break;
}
break;
case '>': // Tag indicator has been found, do we have all the comment characters in line?
- if((state&COMPLETE_COMMENT)==COMPLETE_COMMENT) {
+ if ((state&COMPLETE_COMMENT)==COMPLETE_COMMENT) {
byte ba[] = baos.toByteArray();
tag = new Tag(null,null, new String(ba,0,ba.length-2));
baos.reset();
// fall through on purpose
default:
state&=~(COMMENT_D3|COMMENT_D4);
- if((state&IN_COMMENT)!=IN_COMMENT) state&=~IN_COMMENT; // false alarm, it's not actually a comment
+ if ((state&IN_COMMENT)!=IN_COMMENT) state&=~IN_COMMENT; // false alarm, it's not actually a comment
baos.write(c);
}
} else { // Normal Tag Processing loop
switch(c) {
- case '?':
+ case '?':
switch(state & (QUESTION_F|QUESTION)) { // Validate the state of Doc tag... <?xml ... ?>
case QUESTION_F:
state |= DOC_TYPE;
}
break;
case '!':
- if(last=='<') {
+ if (last=='<') {
state|=COMMENT|COMMENT_E; // likely a comment, continue processing in Comment Loop
}
baos.write(c);
case ' ':
case '\t':
case '\n': // white space indicates change in internal tag state, ex between name and between attributes
- if((state&VALUE)==VALUE) {
+ if ((state&VALUE)==VALUE) {
value = baos.toString(); // we're in VALUE state, add characters to Value
- } else if(name==null) {
+ } else if (name==null) {
name = baos.toString(); // we're in Name state (default) add characters to Name
}
baos.reset(); // we've assigned chars, reset buffer
- if(name!=null) { // Name is not null, there's a tag in the offing here...
+ if (name!=null) { // Name is not null, there's a tag in the offing here...
Tag t = new Tag(prefix,name,value);
- if(tag==null) { // Set as the tag to return, if not exists
+ if (tag==null) { // Set as the tag to return, if not exists
tag = t;
} else { // if we already have a Tag, then we'll treat this one as an attribute
tag.add(t);
break;
case '\'': // is the character one of two kinds of quote?
case '"':
- if(last!='\\') {
+ if (last!='\\') {
quote=c;
break;
}
// Fallthrough ok
default:
baos.write(c); // write any unprocessed bytes into buffer
-
+
}
}
last = c;
}
int type = state&(DOC_TYPE|COMMENT|END_TAG|START_TAG); // get just the Tag states and turn into Type for Tag
- if(type==0) {
+ if (type==0) {
type=START_TAG;
}
- if(tag!=null) {
+ if (tag!=null) {
tag.state|=type; // add the appropriate Tag States
}
return tag;
/**
* getNSS
- *
+ *
* If the tag contains some Namespace attributes, create a new nss from the passed in one, copy all into it, then add
* This provides Scoping behavior
- *
+ *
* if Nss is null in the first place, create an new nss, so we don't have to deal with null Maps.
- *
+ *
* @param nss
* @param t
* @return
*/
private Map<String, String> getNss(Map<String, String> nss, Tag t) {
Map<String,String> newnss = null;
- if(t.attribs!=null) {
- for(Tag tag : t.attribs) {
- if("xmlns".equals(tag.prefix)) {
- if(newnss==null) {
+ if (t.attribs!=null) {
+ for (Tag tag : t.attribs) {
+ if ("xmlns".equals(tag.prefix)) {
+ if (newnss==null) {
newnss = new HashMap<>();
- if(nss!=null)newnss.putAll(nss);
+ if (nss!=null)newnss.putAll(nss);
}
newnss.put(tag.name, tag.value);
}
}
}
//return newnss==null?(nss==null?new HashMap<String,String>():nss):newnss;
- if(newnss==null) {
- if(nss==null) {
+ if (newnss==null) {
+ if (nss==null) {
newnss = new HashMap<>();
} else {
newnss = nss;
/**
* The result of the parseTag method
- *
+ *
* Data is split up into prefix, name and value portions. "Tags" with Values that are inside a Tag are known in XLM
- * as Attributes.
- *
+ * as Attributes.
+ *
* @author Jonathan
*
*/
this.prefix = prefix;
this.name = name;
this.value = value;
- attribs = null;
+ attribs = null;
}
/**
* @param tag
*/
public void add(Tag attrib) {
- if(attribs == null) {
+ if (attribs == null) {
attribs = new ArrayList<>();
}
attribs.add(attrib);
}
-
+
public String toString() {
StringBuffer sb = new StringBuffer();
- if(prefix!=null) {
+ if (prefix!=null) {
sb.append(prefix);
sb.append(':');
}
sb.append(name==null?"!!ERROR!!":name);
char quote = ((state&DOC_TYPE)==DOC_TYPE)?'\'':'"';
- if(value!=null) {
+ if (value!=null) {
sb.append('=');
sb.append(quote);
sb.append(value);