core/src/main/java/com/att/cadi/wsse/XReader.java

   1 /*******************************************************************************\r
   2  * ============LICENSE_START====================================================\r
   3  * * org.onap.aai\r
   4  * * ===========================================================================\r
   5  * * Copyright © 2017 AT&T Intellectual Property. All rights reserved.\r
   6  * * Copyright © 2017 Amdocs\r
   7  * * ===========================================================================\r
   8  * * Licensed under the Apache License, Version 2.0 (the "License");\r
   9  * * you may not use this file except in compliance with the License.\r
  10  * * You may obtain a copy of the License at\r
  11  * * \r
  12  *  *      http://www.apache.org/licenses/LICENSE-2.0\r
  13  * * \r
  14  *  * Unless required by applicable law or agreed to in writing, software\r
  15  * * distributed under the License is distributed on an "AS IS" BASIS,\r
  16  * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
  17  * * See the License for the specific language governing permissions and\r
  18  * * limitations under the License.\r
  19  * * ============LICENSE_END====================================================\r
  20  * *\r
  21  * * ECOMP is a trademark and service mark of AT&T Intellectual Property.\r
  22  * *\r
  23  ******************************************************************************/\r
  24 package com.att.cadi.wsse;\r
  25 \r
  26 import java.io.ByteArrayOutputStream;\r
  27 import java.io.IOException;\r
  28 import java.io.InputStream;\r
  29 import java.util.ArrayList;\r
  30 import java.util.HashMap;\r
  31 import java.util.List;\r
  32 import java.util.Map;\r
  33 import java.util.Stack;\r
  34 \r
  35 import javax.xml.stream.XMLStreamException;\r
  36 \r
  37 /**\r
  38  * XReader\r
  39  * This class works similarly as StAX, except StAX has more behavior than is needed.  That would be ok, but \r
  40  * StAX also was Buffering in their code in such as way as to read most if not all the incoming stream into memory,\r
  41  * defeating the purpose of pre-reading only the Header\r
  42  * \r
  43  * This Reader does no back-tracking, but is able to create events based on syntax and given state only, leaving the\r
  44  * Read-ahead mode of the InputStream up to the other classes.\r
  45  * \r
  46  * At this time, we only implement the important events, though if this is good enough, it could be expanded, perhaps to \r
  47  * replace the original XMLReader from StAX.\r
  48  * \r
  49  *\r
  50  */\r
  51 // @SuppressWarnings("restriction")\r
  52 public class XReader {\r
  53         private XEvent curr,another;\r
  54         private InputStream is;\r
  55         private ByteArrayOutputStream baos;\r
  56         private int state, count, last;\r
  57         \r
  58         private Stack<Map<String,String>> nsses;\r
  59         \r
  60         public XReader(InputStream is) {\r
  61                 this.is = is;\r
  62                 curr = another = null;\r
  63                 baos = new ByteArrayOutputStream();\r
  64                 state = BEGIN_DOC; \r
  65                 count = 0;\r
  66                 nsses = new Stack<Map<String,String>>();\r
  67         }\r
  68         \r
  69         public boolean hasNext() throws XMLStreamException {\r
  70                 if(curr==null) {\r
  71                         curr = parse();\r
  72                 }\r
  73                 return curr!=null;\r
  74         }\r
  75 \r
  76         public XEvent nextEvent() {\r
  77                 XEvent xe = curr;\r
  78                 curr = null;\r
  79                 return xe;\r
  80         }\r
  81 \r
  82         // \r
  83         // State Flags\r
  84         //\r
  85         // Note: The State of parsing XML can be complicated.  There are too many to cleanly keep in "booleans".  Additionally,\r
  86         // there are certain checks that can be better made with Bitwise operations within switches\r
  87         // Keeping track of state this way also helps us to accomplish logic without storing any back characters except one\r
  88         private final static int BEGIN_DOC=  0x000001;\r
  89         private final static int DOC_TYPE=   0x000002;\r
  90         private final static int QUESTION_F= 0x000004;\r
  91         private final static int QUESTION =  0x000008;\r
  92         private final static int START_TAG = 0x000010;\r
  93         private final static int END_TAG =       0x000020;\r
  94         private final static int VALUE=          0x000040;\r
  95         private final static int COMMENT =   0x001000;\r
  96         private final static int COMMENT_E = 0x002000;\r
  97         private final static int COMMENT_D1 =0x010000;\r
  98         private final static int COMMENT_D2 =0x020000;\r
  99         private final static int COMMENT_D3 =0x040000;\r
 100         private final static int COMMENT_D4 =0x080000;\r
 101         // useful combined Comment states\r
 102         private final static int IN_COMMENT=COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2;\r
 103         private final static int COMPLETE_COMMENT = COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2|COMMENT_D3|COMMENT_D4;\r
 104         \r
 105         \r
 106         private XEvent parse() throws XMLStreamException {\r
 107                 Map<String,String> nss = nsses.isEmpty()?null:nsses.peek();\r
 108 \r
 109                 XEvent rv;\r
 110                 if((rv=another)!=null) { // "another" is a tag that may have needed to be created, but not \r
 111                                                                  // immediately returned.  Save for next parse.  If necessary, this could be turned into\r
 112                                                                  // a FIFO storage, but a single reference is enough for now.\r
 113                         another = null;      // "rv" is now set for the Event, and will be returned.  Set to Null.\r
 114                 } else {\r
 115                         boolean go = true;\r
 116                         int c=0;\r
 117                         \r
 118                         try {\r
 119                                 while(go && (c=is.read())>=0) {\r
 120                                         ++count;\r
 121                                         switch(c) {\r
 122                                                 case '<': // Tag is opening\r
 123                                                         state|=~BEGIN_DOC; // remove BEGIN_DOC flag, this is possibly an XML Doc\r
 124                                                         XEvent cxe = null;\r
 125                                                         if(baos.size()>0) { // If there are any characters between tags, we send as Character Event\r
 126                                                                 String chars = baos.toString().trim();  // Trim out WhiteSpace before and after\r
 127                                                                 if(chars.length()>0) { // don't send if Characters were only whitespace\r
 128                                                                         cxe = new XEvent.Characters(chars);\r
 129                                                                         baos.reset();\r
 130                                                                         go = false;\r
 131                                                                 }\r
 132                                                         }\r
 133                                                         last = c;  // make sure "last" character is set for use in "ParseTag"\r
 134                                                         Tag t = parseTag(); // call subroutine to process the tag as a unit\r
 135                                                         String ns;\r
 136                                                         switch(t.state&(START_TAG|END_TAG)) {\r
 137                                                                 case START_TAG:\r
 138                                                                                 nss = getNss(nss,t);                    // Only Start Tags might have NS Attributes   \r
 139                                                                                                                                                 // Get any NameSpace elements from tag.  If there are, nss will become \r
 140                                                                                                                                                 // a new Map with all the previous NSs plus the new.  This provides \r
 141                                                                                                                                                 // scoping behavior when used with the Stack\r
 142                                                                         // drop through on purpose\r
 143                                                                 case END_TAG:\r
 144                                                                         ns = t.prefix==null?"":nss.get(t.prefix); // Get the namespace from prefix (if exists)\r
 145                                                                         break;\r
 146                                                                 default:\r
 147                                                                         ns = "";\r
 148                                                         }\r
 149                                                         if(ns==null)\r
 150                                                                 throw new XMLStreamException("Invalid Namespace Prefix at " + count);\r
 151                                                         go = false;\r
 152                                                         switch(t.state) { // based on \r
 153                                                           case DOC_TYPE: \r
 154                                                                   rv = new XEvent.StartDocument();\r
 155                                                                   break;\r
 156                                                           case COMMENT:\r
 157                                                                   rv = new XEvent.Comment(t.value);\r
 158                                                                   break;\r
 159                                                           case START_TAG:\r
 160                                                                   rv = new XEvent.StartElement(ns,t.name);\r
 161                                                                   nsses.push(nss);                              // Change potential scope for Namespace\r
 162                                                                   break;\r
 163                                                           case END_TAG:\r
 164                                                                   rv = new XEvent.EndElement(ns,t.name);\r
 165                                                                   nss = nsses.pop();                    // End potential scope for Namespace\r
 166                                                                   break;\r
 167                                                           case START_TAG|END_TAG:                       // This tag is both start/end  aka <myTag/>\r
 168                                                                   rv = new XEvent.StartElement(ns,t.name);\r
 169                                                                   if(last=='/')another = new XEvent.EndElement(ns,t.name);\r
 170                                                         }\r
 171                                                         if(cxe!=null) {     // if there is a Character Event, it actually should go first.  ow.\r
 172                                                                 another = rv;   // Make current Event the "another" or next event, and \r
 173                                                                 rv = cxe;               // send Character Event now\r
 174                                                         }\r
 175                                                         break;\r
 176                                                 case ' ':\r
 177                                                 case '\t':\r
 178                                                 case '\n':\r
 179                                                         if((state&BEGIN_DOC)==BEGIN_DOC) { // if Whitespace before doc, just ignore \r
 180                                                                 break;\r
 181                                                         }\r
 182                                                         // fallthrough on purpose\r
 183                                                 default:\r
 184                                                         if((state&BEGIN_DOC)==BEGIN_DOC) { // if there is any data at the start other than XML Tag, it's not XML\r
 185                                                                 throw new XMLStreamException("Parse Error: This is not an XML Doc");\r
 186                                                         }\r
 187                                                         baos.write(c); // save off Characters\r
 188                                         }\r
 189                                         last = c; // Some processing needs to know what the last character was, aka Escaped characters... ex \"\r
 190                                 }\r
 191                         } catch (IOException e) {\r
 192                                 throw new XMLStreamException(e); // all errors parsing will be treated as XMLStreamErrors (like StAX)\r
 193                         }\r
 194                         if(c==-1 && (state&BEGIN_DOC)==BEGIN_DOC) {                        // Normally, end of stream is ok, however, we need to know if the \r
 195                                 throw new XMLStreamException("Premature End of File"); // document isn't an XML document, so we throw exception if it \r
 196                         }                                                                                                                  // hasn't yet been determined to be an XML Doc\r
 197                 }\r
 198                 return rv;\r
 199         }\r
 200         \r
 201         /**\r
 202          * parseTag\r
 203          * \r
 204          * Parsing a Tag is somewhat complicated, so it's helpful to separate this process from the \r
 205          * higher level Parsing effort\r
 206          * @return\r
 207          * @throws IOException\r
 208          * @throws XMLStreamException\r
 209          */\r
 210         private Tag parseTag() throws IOException, XMLStreamException {\r
 211                 Tag tag = null;\r
 212                 boolean go = true;\r
 213                 state = 0;\r
 214                 int c, quote=0; // If "quote" is 0, then we're not in a quote.  We set ' (in pretag) or " in attribs accordingly to denote quoted\r
 215                 String prefix=null,name=null,value=null;\r
 216                 baos.reset();\r
 217                 \r
 218                 while(go && (c=is.read())>=0) {\r
 219                         ++count;\r
 220                         if(quote!=0) { // If we're in a quote, we only end if we hit another quote of the same time, not preceded by \\r
 221                                 if(c==quote && last!='\\') {\r
 222                                         quote=0;\r
 223                                 } else {\r
 224                                         baos.write(c);\r
 225                                 }\r
 226                         } else if((state&COMMENT)==COMMENT) { // similar to Quote is being in a comment\r
 227                                 switch(c) {\r
 228                                         case '-':\r
 229                                                 switch(state) { // XML has a complicated Quote set... <!-- --> ... we keep track if each has been met with flags. \r
 230                                                         case COMMENT|COMMENT_E:\r
 231                                                                 state|=COMMENT_D1;\r
 232                                                                 break;\r
 233                                                         case COMMENT|COMMENT_E|COMMENT_D1:\r
 234                                                                 state|=COMMENT_D2;\r
 235                                                                 baos.reset();                           // clear out "!--", it's a Comment\r
 236                                                                 break;\r
 237                                                         case COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2:\r
 238                                                                 state|=COMMENT_D3;\r
 239                                                                 baos.write(c);\r
 240                                                                 break;\r
 241                                                         case COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2|COMMENT_D3:\r
 242                                                                 state|=COMMENT_D4;\r
 243                                                                 baos.write(c);\r
 244                                                                 break;\r
 245                                                 }\r
 246                                                 break;\r
 247                                         case '>': // Tag indicator has been found, do we have all the comment characters in line?\r
 248                                                 if((state&COMPLETE_COMMENT)==COMPLETE_COMMENT) {\r
 249                                                         byte ba[] = baos.toByteArray();\r
 250                                                         tag = new Tag(null,null, new String(ba,0,ba.length-2));\r
 251                                                         baos.reset();\r
 252                                                         go = false;\r
 253                                                         break;\r
 254                                                 }\r
 255                                                 // fall through on purpose\r
 256                                         default:\r
 257                                                 state&=~(COMMENT_D3|COMMENT_D4);\r
 258                                                 if((state&IN_COMMENT)!=IN_COMMENT) state&=~IN_COMMENT; // false alarm, it's not actually a comment\r
 259                                                 baos.write(c);\r
 260                                 }\r
 261                         } else { // Normal Tag Processing loop\r
 262                                 switch(c) {\r
 263                                         case '?': \r
 264                                                 switch(state & (QUESTION_F|QUESTION)) {  // Validate the state of Doc tag... <?xml ... ?>\r
 265                                                         case QUESTION_F:\r
 266                                                                 state |= DOC_TYPE;\r
 267                                                                 state &= ~QUESTION_F;\r
 268                                                                 break;\r
 269                                                         case 0:\r
 270                                                                 state |=QUESTION_F;\r
 271                                                                 break;\r
 272                                                         default:\r
 273                                                                 throw new IOException("Bad character [?] at " + count);\r
 274                                                 }\r
 275                                                 break;\r
 276                                         case '!':\r
 277                                                 if(last=='<') { \r
 278                                                         state|=COMMENT|COMMENT_E; // likely a comment, continue processing in Comment Loop\r
 279                                                 }\r
 280                                                 baos.write(c);\r
 281                                                 break;\r
 282                                         case '/':\r
 283                                                 state|=(last=='<'?END_TAG:(END_TAG|START_TAG));  // end tag indicator </xxx>, ,or both <xxx/>\r
 284                                                 break;\r
 285                                         case ':':\r
 286                                                 prefix=baos.toString(); // prefix indicator\r
 287                                                 baos.reset();\r
 288                                                 break;\r
 289                                         case '=':                                       // used in Attributes\r
 290                                                 name=baos.toString();\r
 291                                                 baos.reset();\r
 292                                                 state|=VALUE;\r
 293                                                 break;\r
 294                                         case '>': // end the tag, which causes end of this subprocess as well as formulation of the found data\r
 295                                                 go = false;\r
 296                                                 // passthrough on purpose\r
 297                                         case ' ':\r
 298                                         case '\t':\r
 299                                         case '\n': // white space indicates change in internal tag state, ex between name and between attributes\r
 300                                                 if((state&VALUE)==VALUE) {\r
 301                                                         value = baos.toString();        // we're in VALUE state, add characters to Value\r
 302                                                 } else if(name==null) {\r
 303                                                         name = baos.toString();         // we're in Name state (default) add characters to Name\r
 304                                                 }\r
 305                                                 baos.reset();                                   // we've assigned chars, reset buffer\r
 306                                                 if(name!=null) {                                // Name is not null, there's a tag in the offing here...\r
 307                                                         Tag t = new Tag(prefix,name,value);\r
 308                                                         if(tag==null) {                         // Set as the tag to return, if not exists\r
 309                                                                 tag = t;\r
 310                                                         } else {                                        // if we already have a Tag, then we'll treat this one as an attribute\r
 311                                                                 tag.add(t);\r
 312                                                         }\r
 313                                                 }\r
 314                                                 prefix=name=value=null;                 // reset these values in case we loop for attributes.\r
 315                                                 break;\r
 316                                         case '\'':                                                      // is the character one of two kinds of quote?\r
 317                                         case '"':\r
 318                                                 if(last!='\\') {\r
 319                                                         quote=c;\r
 320                                                         break;\r
 321                                                 }\r
 322                                                 // Fallthrough ok\r
 323                                         default:\r
 324                                                 baos.write(c);                                  // write any unprocessed bytes into buffer\r
 325                                                 \r
 326                                 }\r
 327                         }\r
 328                         last = c;\r
 329                 }\r
 330                 int type = state&(DOC_TYPE|COMMENT|END_TAG|START_TAG); // get just the Tag states and turn into Type for Tag\r
 331                 if(type==0) {\r
 332                         type=START_TAG;\r
 333                 }\r
 334                 tag.state|=type;        // add the appropriate Tag States\r
 335                 return tag;\r
 336         }\r
 337 \r
 338         /**\r
 339          * getNSS\r
 340          * \r
 341          * If the tag contains some Namespace attributes, create a new nss from the passed in one, copy all into it, then add\r
 342          * This provides Scoping behavior\r
 343          * \r
 344          * if Nss is null in the first place, create an new nss, so we don't have to deal with null Maps.\r
 345          * \r
 346          * @param nss\r
 347          * @param t\r
 348          * @return\r
 349          */\r
 350         private Map<String, String> getNss(Map<String, String> nss, Tag t) {\r
 351                 Map<String,String> newnss = null;\r
 352                 if(t.attribs!=null) {\r
 353                         for(Tag tag : t.attribs) {\r
 354                                 if("xmlns".equals(tag.prefix)) {\r
 355                                         if(newnss==null) {\r
 356                                                 newnss = new HashMap<String,String>();\r
 357                                                 if(nss!=null)newnss.putAll(nss);\r
 358                                         }\r
 359                                         newnss.put(tag.name, tag.value);\r
 360                                 }\r
 361                         }\r
 362                 }\r
 363                 return newnss==null?(nss==null?new HashMap<String,String>():nss):newnss;\r
 364         }\r
 365 \r
 366         /**\r
 367          * The result of the parseTag method\r
 368          * \r
 369          * Data is split up into prefix, name and value portions. "Tags" with Values that are inside a Tag are known in XLM\r
 370          * as Attributes.  \r
 371          * \r
 372          *\r
 373          */\r
 374         public class Tag {\r
 375                 public int state;\r
 376                 public String prefix,name,value;\r
 377                 public List<Tag> attribs;\r
 378 \r
 379                 public Tag(String prefix, String name, String value) {\r
 380                         this.prefix = prefix;\r
 381                         this.name = name;\r
 382                         this.value = value;\r
 383                         attribs = null;  \r
 384                 }\r
 385 \r
 386                 /**\r
 387                  * add an attribute\r
 388                  * Not all tags need attributes... lazy instantiate to save time and memory\r
 389                  * @param tag\r
 390                  */\r
 391                 public void add(Tag attrib) {\r
 392                         if(attribs == null) {\r
 393                                 attribs = new ArrayList<Tag>();\r
 394                         }\r
 395                         attribs.add(attrib);\r
 396                 }\r
 397                 \r
 398                 public String toString() {\r
 399                         StringBuffer sb = new StringBuffer();\r
 400                         if(prefix!=null) {\r
 401                                 sb.append(prefix);\r
 402                                 sb.append(':');\r
 403                         }\r
 404                         sb.append(name==null?"!!ERROR!!":name);\r
 405 \r
 406                         char quote = ((state&DOC_TYPE)==DOC_TYPE)?'\'':'"';\r
 407                         if(value!=null) {\r
 408                                 sb.append('=');\r
 409                                 sb.append(quote);\r
 410                                 sb.append(value);\r
 411                                 sb.append(quote);\r
 412                         }\r
 413                         return sb.toString();\r
 414                 }\r
 415         }\r
 416 \r
 417 }\r