- XEvent rv;
- if((rv=another)!=null) { // "another" is a tag that may have needed to be created, but not
- // immediately returned. Save for next parse. If necessary, this could be turned into
- // a FIFO storage, but a single reference is enough for now.
- another = null; // "rv" is now set for the Event, and will be returned. Set to Null.
- } else {
- boolean go = true;
- int c=0;
-
- try {
- while(go && (c=is.read())>=0) {
- ++count;
- switch(c) {
- case '<': // Tag is opening
- state|=~BEGIN_DOC; // remove BEGIN_DOC flag, this is possibly an XML Doc
- XEvent cxe = null;
- if(baos.size()>0) { // If there are any characters between tags, we send as Character Event
- String chars = baos.toString().trim(); // Trim out WhiteSpace before and after
- if(chars.length()>0) { // don't send if Characters were only whitespace
- cxe = new XEvent.Characters(chars);
- baos.reset();
- go = false;
- }
- }
- last = c; // make sure "last" character is set for use in "ParseTag"
- Tag t = parseTag(); // call subroutine to process the tag as a unit
- String ns;
- switch(t.state&(START_TAG|END_TAG)) {
- case START_TAG:
- nss = getNss(nss,t); // Only Start Tags might have NS Attributes
- // Get any NameSpace elements from tag. If there are, nss will become
- // a new Map with all the previous NSs plus the new. This provides
- // scoping behavior when used with the Stack
- // drop through on purpose
- case END_TAG:
- ns = t.prefix==null?"":nss.get(t.prefix); // Get the namespace from prefix (if exists)
- break;
- default:
- ns = "";
- }
- if(ns==null)
- throw new XMLStreamException("Invalid Namespace Prefix at " + count);
- go = false;
- switch(t.state) { // based on
- case DOC_TYPE:
- rv = new XEvent.StartDocument();
- break;
- case COMMENT:
- rv = new XEvent.Comment(t.value);
- break;
- case START_TAG:
- rv = new XEvent.StartElement(ns,t.name);
- nsses.push(nss); // Change potential scope for Namespace
- break;
- case END_TAG:
- rv = new XEvent.EndElement(ns,t.name);
- nss = nsses.pop(); // End potential scope for Namespace
- break;
- case START_TAG|END_TAG: // This tag is both start/end aka <myTag/>
- rv = new XEvent.StartElement(ns,t.name);
- if(last=='/')another = new XEvent.EndElement(ns,t.name);
- }
- if(cxe!=null) { // if there is a Character Event, it actually should go first. ow.
- another = rv; // Make current Event the "another" or next event, and
- rv = cxe; // send Character Event now
- }
- break;
- case ' ':
- case '\t':
- case '\n':
- if((state&BEGIN_DOC)==BEGIN_DOC) { // if Whitespace before doc, just ignore
- break;
- }
- // fallthrough on purpose
- default:
- if((state&BEGIN_DOC)==BEGIN_DOC) { // if there is any data at the start other than XML Tag, it's not XML
- throw new XMLStreamException("Parse Error: This is not an XML Doc");
- }
- baos.write(c); // save off Characters
- }
- last = c; // Some processing needs to know what the last character was, aka Escaped characters... ex \"
- }
- } catch (IOException e) {
- throw new XMLStreamException(e); // all errors parsing will be treated as XMLStreamErrors (like StAX)
- }
- if(c==-1 && (state&BEGIN_DOC)==BEGIN_DOC) { // Normally, end of stream is ok, however, we need to know if the
- throw new XMLStreamException("Premature End of File"); // document isn't an XML document, so we throw exception if it
- } // hasn't yet been determined to be an XML Doc
- }
- return rv;
- }
-
- /**
- * parseTag
- *
- * Parsing a Tag is somewhat complicated, so it's helpful to separate this process from the
- * higher level Parsing effort
- * @return
- * @throws IOException
- * @throws XMLStreamException
- */
- private Tag parseTag() throws IOException, XMLStreamException {
- Tag tag = null;
- boolean go = true;
- state = 0;
- int c, quote=0; // If "quote" is 0, then we're not in a quote. We set ' (in pretag) or " in attribs accordingly to denote quoted
- String prefix=null,name=null,value=null;
- baos.reset();
-
- while(go && (c=is.read())>=0) {
- ++count;
- if(quote!=0) { // If we're in a quote, we only end if we hit another quote of the same time, not preceded by \
- if(c==quote && last!='\\') {
- quote=0;
- } else {
- baos.write(c);
- }
- } else if((state&COMMENT)==COMMENT) { // similar to Quote is being in a comment
- switch(c) {
- case '-':
- switch(state) { // XML has a complicated Quote set... <!-- --> ... we keep track if each has been met with flags.
- case COMMENT|COMMENT_E:
- state|=COMMENT_D1;
- break;
- case COMMENT|COMMENT_E|COMMENT_D1:
- state|=COMMENT_D2;
- baos.reset(); // clear out "!--", it's a Comment
- break;
- case COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2:
- state|=COMMENT_D3;
- baos.write(c);
- break;
- case COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2|COMMENT_D3:
- state|=COMMENT_D4;
- baos.write(c);
- break;
- }
- break;
- case '>': // Tag indicator has been found, do we have all the comment characters in line?
- if((state&COMPLETE_COMMENT)==COMPLETE_COMMENT) {
- byte ba[] = baos.toByteArray();
- tag = new Tag(null,null, new String(ba,0,ba.length-2));
- baos.reset();
- go = false;
- break;
- }
- // fall through on purpose
- default:
- state&=~(COMMENT_D3|COMMENT_D4);
- if((state&IN_COMMENT)!=IN_COMMENT) state&=~IN_COMMENT; // false alarm, it's not actually a comment
- baos.write(c);
- }
- } else { // Normal Tag Processing loop
- switch(c) {
- case '?':
- switch(state & (QUESTION_F|QUESTION)) { // Validate the state of Doc tag... <?xml ... ?>
- case QUESTION_F:
- state |= DOC_TYPE;
- state &= ~QUESTION_F;
- break;
- case 0:
- state |=QUESTION_F;
- break;
- default:
- throw new IOException("Bad character [?] at " + count);
- }
- break;
- case '!':
- if(last=='<') {
- state|=COMMENT|COMMENT_E; // likely a comment, continue processing in Comment Loop
- }
- baos.write(c);
- break;
- case '/':
- state|=(last=='<'?END_TAG:(END_TAG|START_TAG)); // end tag indicator </xxx>, ,or both <xxx/>
- break;
- case ':':
- prefix=baos.toString(); // prefix indicator
- baos.reset();
- break;
- case '=': // used in Attributes
- name=baos.toString();
- baos.reset();
- state|=VALUE;
- break;
- case '>': // end the tag, which causes end of this subprocess as well as formulation of the found data
- go = false;
- // passthrough on purpose
- case ' ':
- case '\t':
- case '\n': // white space indicates change in internal tag state, ex between name and between attributes
- if((state&VALUE)==VALUE) {
- value = baos.toString(); // we're in VALUE state, add characters to Value
- } else if(name==null) {
- name = baos.toString(); // we're in Name state (default) add characters to Name
- }
- baos.reset(); // we've assigned chars, reset buffer
- if(name!=null) { // Name is not null, there's a tag in the offing here...
- Tag t = new Tag(prefix,name,value);
- if(tag==null) { // Set as the tag to return, if not exists
- tag = t;
- } else { // if we already have a Tag, then we'll treat this one as an attribute
- tag.add(t);
- }
- }
- prefix=name=value=null; // reset these values in case we loop for attributes.
- break;
- case '\'': // is the character one of two kinds of quote?
- case '"':
- if(last!='\\') {
- quote=c;
- break;
- }
- // Fallthrough ok
- default:
- baos.write(c); // write any unprocessed bytes into buffer
-
- }
- }
- last = c;
- }
- int type = state&(DOC_TYPE|COMMENT|END_TAG|START_TAG); // get just the Tag states and turn into Type for Tag
- if(type==0) {
- type=START_TAG;
- }
- tag.state|=type; // add the appropriate Tag States
- return tag;
- }