cadi/core/src/main/java/org/onap/aaf/cadi/wsse/XReader.java

   1 /**
   2  * ============LICENSE_START====================================================
   3  * org.onap.aaf
   4  * ===========================================================================
   5  * Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
   6  * ===========================================================================
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *      http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  * ============LICENSE_END====================================================
  19  *
  20  */
  21
  22 package org.onap.aaf.cadi.wsse;
  23
  24 import java.io.ByteArrayOutputStream;
  25 import java.io.IOException;
  26 import java.io.InputStream;
  27 import java.util.ArrayList;
  28 import java.util.HashMap;
  29 import java.util.List;
  30 import java.util.Map;
  31 import java.util.Stack;
  32
  33 import javax.xml.stream.XMLStreamException;
  34
  35 /**
  36  * XReader
  37  * This class works similarly as StAX, except StAX has more behavior than is needed.  That would be ok, but
  38  * StAX also was Buffering in their code in such as way as to read most if not all the incoming stream into memory,
  39  * defeating the purpose of pre-reading only the Header
  40  *
  41  * This Reader does no back-tracking, but is able to create events based on syntax and given state only, leaving the
  42  * Read-ahead mode of the InputStream up to the other classes.
  43  *
  44  * At this time, we only implement the important events, though if this is good enough, it could be expanded, perhaps to
  45  * replace the original XMLReader from StAX.
  46  *
  47  * @author Jonathan
  48  *
  49  */
  50 // @SuppressWarnings("restriction")
  51 public class XReader {
  52     private XEvent curr,another;
  53     private InputStream is;
  54     private ByteArrayOutputStream baos;
  55     private int state, count, last;
  56
  57     private Stack<Map<String,String>> nsses;
  58
  59     public XReader(InputStream is) {
  60         this.is = is;
  61         curr = another = null;
  62         baos = new ByteArrayOutputStream();
  63         state = BEGIN_DOC;
  64         count = 0;
  65         nsses = new Stack<Map<String,String>>();
  66     }
  67
  68     public boolean hasNext() throws XMLStreamException {
  69         if (curr==null) {
  70             curr = parse();
  71         }
  72         return curr!=null;
  73     }
  74
  75     public XEvent nextEvent() {
  76         XEvent xe = curr;
  77         curr = null;
  78         return xe;
  79     }
  80
  81     //
  82     // State Flags
  83     //
  84     // Note: The State of parsing XML can be complicated.  There are too many to cleanly keep in "booleans".  Additionally,
  85     // there are certain checks that can be better made with Bitwise operations within switches
  86     // Keeping track of state this way also helps us to accomplish logic without storing any back characters except one
  87     private final static int BEGIN_DOC=  0x000001;
  88     private final static int DOC_TYPE=   0x000002;
  89     private final static int QUESTION_F= 0x000004;
  90     private final static int QUESTION =  0x000008;
  91     private final static int START_TAG = 0x000010;
  92     private final static int END_TAG =      0x000020;
  93     private final static int VALUE=         0x000040;
  94     private final static int COMMENT =   0x001000;
  95     private final static int COMMENT_E = 0x002000;
  96     private final static int COMMENT_D1 =0x010000;
  97     private final static int COMMENT_D2 =0x020000;
  98     private final static int COMMENT_D3 =0x040000;
  99     private final static int COMMENT_D4 =0x080000;
 100     // useful combined Comment states
 101     private final static int IN_COMMENT=COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2;
 102     private final static int COMPLETE_COMMENT = COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2|COMMENT_D3|COMMENT_D4;
 103
 104
 105     private XEvent parse() throws XMLStreamException {
 106         Map<String,String> nss = nsses.isEmpty()?null:nsses.peek();
 107
 108         XEvent rv;
 109         if ((rv=another)!=null) { // "another" is a tag that may have needed to be created, but not
 110                                  // immediately returned.  Save for next parse.  If necessary, this could be turned into
 111                                  // a FIFO storage, but a single reference is enough for now.
 112             another = null;      // "rv" is now set for the Event, and will be returned.  Set to Null.
 113         } else {
 114             boolean go = true;
 115             int c=0;
 116
 117             try {
 118                 while (go && (c=is.read())>=0) {
 119                     ++count;
 120                     switch(c) {
 121                         case '<': // Tag is opening
 122                             state|=~BEGIN_DOC; // remove BEGIN_DOC flag, this is possibly an XML Doc
 123                             XEvent cxe = null;
 124                             if (baos.size()>0) { // If there are any characters between tags, we send as Character Event
 125                                 String chars = baos.toString().trim();  // Trim out WhiteSpace before and after
 126                                 if (chars.length()>0) { // don't send if Characters were only whitespace
 127                                     cxe = new XEvent.Characters(chars);
 128                                     baos.reset();
 129                                     go = false;
 130                                 }
 131                             }
 132                             last = c;  // make sure "last" character is set for use in "ParseTag"
 133                             Tag t = parseTag(); // call subroutine to process the tag as a unit
 134                             String ns;
 135                             switch(t.state&(START_TAG|END_TAG)) {
 136                                 case START_TAG:
 137                                         nss = getNss(nss,t);             // Only Start Tags might have NS Attributes
 138                                                                         // Get any NameSpace elements from tag.  If there are, nss will become
 139                                                                         // a new Map with all the previous NSs plus the new.  This provides
 140                                                                         // scoping behavior when used with the Stack
 141                                     // drop through on purpose
 142                                 case END_TAG:
 143                                     ns = t.prefix==null||nss==null?"":nss.get(t.prefix); // Get the namespace from prefix (if exists)
 144                                     break;
 145                                 default:
 146                                     ns = "";
 147                             }
 148                             if (ns==null)
 149                                 throw new XMLStreamException("Invalid Namespace Prefix at " + count);
 150                             go = false;
 151                             switch(t.state) { // based on
 152                               case DOC_TYPE:
 153                                   rv = new XEvent.StartDocument();
 154                                   break;
 155                               case COMMENT:
 156                                   rv = new XEvent.Comment(t.value);
 157                                   break;
 158                               case START_TAG:
 159                                   rv = new XEvent.StartElement(ns,t.name);
 160                                   nsses.push(nss);                // Change potential scope for Namespace
 161                                   break;
 162                               case END_TAG:
 163                                   rv = new XEvent.EndElement(ns,t.name);
 164                                   nss = nsses.pop();            // End potential scope for Namespace
 165                                   break;
 166                               case START_TAG|END_TAG:            // This tag is both start/end  aka <myTag/>
 167                                   rv = new XEvent.StartElement(ns,t.name);
 168                                   if (last=='/')another = new XEvent.EndElement(ns,t.name);
 169                             }
 170                             if (cxe!=null) {     // if there is a Character Event, it actually should go first.  ow.
 171                                 another = rv;   // Make current Event the "another" or next event, and
 172                                 rv = cxe;        // send Character Event now
 173                             }
 174                             break;
 175                         case ' ':
 176                         case '\t':
 177                         case '\n':
 178                             if ((state&BEGIN_DOC)==BEGIN_DOC) { // if Whitespace before doc, just ignore
 179                                 break;
 180                             }
 181                             // fallthrough on purpose
 182                         default:
 183                             if ((state&BEGIN_DOC)==BEGIN_DOC) { // if there is any data at the start other than XML Tag, it's not XML
 184                                 throw new XMLStreamException("Parse Error: This is not an XML Doc");
 185                             }
 186                             baos.write(c); // save off Characters
 187                     }
 188                     last = c; // Some processing needs to know what the last character was, aka Escaped characters... ex \"
 189                 }
 190             } catch (IOException e) {
 191                 throw new XMLStreamException(e); // all errors parsing will be treated as XMLStreamErrors (like StAX)
 192             }
 193             if (c==-1 && (state&BEGIN_DOC)==BEGIN_DOC) {                // Normally, end of stream is ok, however, we need to know if the
 194                 throw new XMLStreamException("Premature End of File"); // document isn't an XML document, so we throw exception if it
 195             }                                                           // hasn't yet been determined to be an XML Doc
 196         }
 197         return rv;
 198     }
 199
 200     /**
 201      * parseTag
 202      *
 203      * Parsing a Tag is somewhat complicated, so it's helpful to separate this process from the
 204      * higher level Parsing effort
 205      * @return
 206      * @throws IOException
 207      * @throws XMLStreamException
 208      */
 209     private Tag parseTag() throws IOException, XMLStreamException {
 210         Tag tag = null;
 211         boolean go = true;
 212         state = 0;
 213         int c, quote=0; // If "quote" is 0, then we're not in a quote.  We set ' (in pretag) or " in attribs accordingly to denote quoted
 214         String prefix=null,name=null,value=null;
 215         baos.reset();
 216
 217         while (go && (c=is.read())>=0) {
 218             ++count;
 219             if (quote!=0) { // If we're in a quote, we only end if we hit another quote of the same time, not preceded by \
 220                 if (c==quote && last!='\\') {
 221                     quote=0;
 222                 } else {
 223                     baos.write(c);
 224                 }
 225             } else if ((state&COMMENT)==COMMENT) { // similar to Quote is being in a comment
 226                 switch(c) {
 227                     case '-':
 228                         switch(state) { // XML has a complicated Quote set... <!-- --> ... we keep track if each has been met with flags.
 229                             case COMMENT|COMMENT_E:
 230                                 state|=COMMENT_D1;
 231                                 break;
 232                             case COMMENT|COMMENT_E|COMMENT_D1:
 233                                 state|=COMMENT_D2;
 234                                 baos.reset();                // clear out "!--", it's a Comment
 235                                 break;
 236                             case COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2:
 237                                 state|=COMMENT_D3;
 238                                 baos.write(c);
 239                                 break;
 240                             case COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2|COMMENT_D3:
 241                                 state|=COMMENT_D4;
 242                                 baos.write(c);
 243                                 break;
 244                         }
 245                         break;
 246                     case '>': // Tag indicator has been found, do we have all the comment characters in line?
 247                         if ((state&COMPLETE_COMMENT)==COMPLETE_COMMENT) {
 248                             byte ba[] = baos.toByteArray();
 249                             tag = new Tag(null,null, new String(ba,0,ba.length-2));
 250                             baos.reset();
 251                             go = false;
 252                             break;
 253                         }
 254                         // fall through on purpose
 255                     default:
 256                         state&=~(COMMENT_D3|COMMENT_D4);
 257                         if ((state&IN_COMMENT)!=IN_COMMENT) state&=~IN_COMMENT; // false alarm, it's not actually a comment
 258                         baos.write(c);
 259                 }
 260             } else { // Normal Tag Processing loop
 261                 switch(c) {
 262                     case '?':
 263                         switch(state & (QUESTION_F|QUESTION)) {  // Validate the state of Doc tag... <?xml ... ?>
 264                             case QUESTION_F:
 265                                 state |= DOC_TYPE;
 266                                 state &= ~QUESTION_F;
 267                                 break;
 268                             case 0:
 269                                 state |=QUESTION_F;
 270                                 break;
 271                             default:
 272                                 throw new IOException("Bad character [?] at " + count);
 273                         }
 274                         break;
 275                     case '!':
 276                         if (last=='<') {
 277                             state|=COMMENT|COMMENT_E; // likely a comment, continue processing in Comment Loop
 278                         }
 279                         baos.write(c);
 280                         break;
 281                     case '/':
 282                         state|=(last=='<'?END_TAG:(END_TAG|START_TAG));  // end tag indicator </xxx>, ,or both <xxx/>
 283                         break;
 284                     case ':':
 285                         prefix=baos.toString(); // prefix indicator
 286                         baos.reset();
 287                         break;
 288                     case '=':                    // used in Attributes
 289                         name=baos.toString();
 290                         baos.reset();
 291                         state|=VALUE;
 292                         break;
 293                     case '>': // end the tag, which causes end of this subprocess as well as formulation of the found data
 294                         go = false;
 295                         // passthrough on purpose
 296                     case ' ':
 297                     case '\t':
 298                     case '\n': // white space indicates change in internal tag state, ex between name and between attributes
 299                         if ((state&VALUE)==VALUE) {
 300                             value = baos.toString();    // we're in VALUE state, add characters to Value
 301                         } else if (name==null) {
 302                             name = baos.toString();        // we're in Name state (default) add characters to Name
 303                         }
 304                         baos.reset();                    // we've assigned chars, reset buffer
 305                         if (name!=null) {                // Name is not null, there's a tag in the offing here...
 306                             Tag t = new Tag(prefix,name,value);
 307                             if (tag==null) {                // Set as the tag to return, if not exists
 308                                 tag = t;
 309                             } else {                    // if we already have a Tag, then we'll treat this one as an attribute
 310                                 tag.add(t);
 311                             }
 312                         }
 313                         prefix=name=value=null;            // reset these values in case we loop for attributes.
 314                         break;
 315                     case '\'':                            // is the character one of two kinds of quote?
 316                     case '"':
 317                         if (last!='\\') {
 318                             quote=c;
 319                             break;
 320                         }
 321                         // Fallthrough ok
 322                     default:
 323                         baos.write(c);                    // write any unprocessed bytes into buffer
 324
 325                 }
 326             }
 327             last = c;
 328         }
 329         int type = state&(DOC_TYPE|COMMENT|END_TAG|START_TAG); // get just the Tag states and turn into Type for Tag
 330         if (type==0) {
 331             type=START_TAG;
 332         }
 333         if (tag!=null) {
 334             tag.state|=type;    // add the appropriate Tag States
 335         }
 336         return tag;
 337     }
 338
 339     /**
 340      * getNSS
 341      *
 342      * If the tag contains some Namespace attributes, create a new nss from the passed in one, copy all into it, then add
 343      * This provides Scoping behavior
 344      *
 345      * if Nss is null in the first place, create an new nss, so we don't have to deal with null Maps.
 346      *
 347      * @param nss
 348      * @param t
 349      * @return
 350      */
 351     private Map<String, String> getNss(Map<String, String> nss, Tag t) {
 352         Map<String,String> newnss = null;
 353         if (t.attribs!=null) {
 354             for (Tag tag : t.attribs) {
 355                 if ("xmlns".equals(tag.prefix)) {
 356                     if (newnss==null) {
 357                         newnss = new HashMap<>();
 358                         if (nss!=null)newnss.putAll(nss);
 359                     }
 360                     newnss.put(tag.name, tag.value);
 361                 }
 362             }
 363         }
 364         //return newnss==null?(nss==null?new HashMap<String,String>():nss):newnss;
 365         if (newnss==null) {
 366             if (nss==null) {
 367                 newnss = new HashMap<>();
 368             } else {
 369                 newnss = nss;
 370             }
 371         }
 372         return newnss;
 373     }
 374
 375     /**
 376      * The result of the parseTag method
 377      *
 378      * Data is split up into prefix, name and value portions. "Tags" with Values that are inside a Tag are known in XLM
 379      * as Attributes.
 380      *
 381      * @author Jonathan
 382      *
 383      */
 384     public class Tag {
 385         public int state;
 386         public String prefix,name,value;
 387         public List<Tag> attribs;
 388
 389         public Tag(String prefix, String name, String value) {
 390             this.prefix = prefix;
 391             this.name = name;
 392             this.value = value;
 393             attribs = null;
 394         }
 395
 396         /**
 397          * add an attribute
 398          * Not all tags need attributes... lazy instantiate to save time and memory
 399          * @param tag
 400          */
 401         public void add(Tag attrib) {
 402             if (attribs == null) {
 403                 attribs = new ArrayList<>();
 404             }
 405             attribs.add(attrib);
 406         }
 407
 408         public String toString() {
 409             StringBuffer sb = new StringBuffer();
 410             if (prefix!=null) {
 411                 sb.append(prefix);
 412                 sb.append(':');
 413             }
 414             sb.append(name==null?"!!ERROR!!":name);
 415
 416             char quote = ((state&DOC_TYPE)==DOC_TYPE)?'\'':'"';
 417             if (value!=null) {
 418                 sb.append('=');
 419                 sb.append(quote);
 420                 sb.append(value);
 421                 sb.append(quote);
 422             }
 423             return sb.toString();
 424         }
 425     }
 426
 427 }