1 /*******************************************************************************
\r
2 * ============LICENSE_START====================================================
\r
4 * * ===========================================================================
\r
5 * * Copyright © 2017 AT&T Intellectual Property. All rights reserved.
\r
6 * * Copyright © 2017 Amdocs
\r
7 * * ===========================================================================
\r
8 * * Licensed under the Apache License, Version 2.0 (the "License");
\r
9 * * you may not use this file except in compliance with the License.
\r
10 * * You may obtain a copy of the License at
\r
12 * * http://www.apache.org/licenses/LICENSE-2.0
\r
14 * * Unless required by applicable law or agreed to in writing, software
\r
15 * * distributed under the License is distributed on an "AS IS" BASIS,
\r
16 * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
17 * * See the License for the specific language governing permissions and
\r
18 * * limitations under the License.
\r
19 * * ============LICENSE_END====================================================
\r
21 * * ECOMP is a trademark and service mark of AT&T Intellectual Property.
\r
23 ******************************************************************************/
\r
24 package com.att.cadi.wsse;
\r
26 import java.io.ByteArrayOutputStream;
\r
27 import java.io.IOException;
\r
28 import java.io.InputStream;
\r
29 import java.util.ArrayList;
\r
30 import java.util.HashMap;
\r
31 import java.util.List;
\r
32 import java.util.Map;
\r
33 import java.util.Stack;
\r
35 import javax.xml.stream.XMLStreamException;
\r
39 * This class works similarly as StAX, except StAX has more behavior than is needed. That would be ok, but
\r
40 * StAX also was Buffering in their code in such as way as to read most if not all the incoming stream into memory,
\r
41 * defeating the purpose of pre-reading only the Header
\r
43 * This Reader does no back-tracking, but is able to create events based on syntax and given state only, leaving the
\r
44 * Read-ahead mode of the InputStream up to the other classes.
\r
46 * At this time, we only implement the important events, though if this is good enough, it could be expanded, perhaps to
\r
47 * replace the original XMLReader from StAX.
\r
51 // @SuppressWarnings("restriction")
\r
52 public class XReader {
\r
53 private XEvent curr,another;
\r
54 private InputStream is;
\r
55 private ByteArrayOutputStream baos;
\r
56 private int state, count, last;
\r
58 private Stack<Map<String,String>> nsses;
\r
60 public XReader(InputStream is) {
\r
62 curr = another = null;
\r
63 baos = new ByteArrayOutputStream();
\r
66 nsses = new Stack<Map<String,String>>();
\r
69 public boolean hasNext() throws XMLStreamException {
\r
76 public XEvent nextEvent() {
\r
85 // Note: The State of parsing XML can be complicated. There are too many to cleanly keep in "booleans". Additionally,
\r
86 // there are certain checks that can be better made with Bitwise operations within switches
\r
87 // Keeping track of state this way also helps us to accomplish logic without storing any back characters except one
\r
88 private final static int BEGIN_DOC= 0x000001;
\r
89 private final static int DOC_TYPE= 0x000002;
\r
90 private final static int QUESTION_F= 0x000004;
\r
91 private final static int QUESTION = 0x000008;
\r
92 private final static int START_TAG = 0x000010;
\r
93 private final static int END_TAG = 0x000020;
\r
94 private final static int VALUE= 0x000040;
\r
95 private final static int COMMENT = 0x001000;
\r
96 private final static int COMMENT_E = 0x002000;
\r
97 private final static int COMMENT_D1 =0x010000;
\r
98 private final static int COMMENT_D2 =0x020000;
\r
99 private final static int COMMENT_D3 =0x040000;
\r
100 private final static int COMMENT_D4 =0x080000;
\r
101 // useful combined Comment states
\r
102 private final static int IN_COMMENT=COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2;
\r
103 private final static int COMPLETE_COMMENT = COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2|COMMENT_D3|COMMENT_D4;
\r
106 private XEvent parse() throws XMLStreamException {
\r
107 Map<String,String> nss = nsses.isEmpty()?null:nsses.peek();
\r
110 if((rv=another)!=null) { // "another" is a tag that may have needed to be created, but not
\r
111 // immediately returned. Save for next parse. If necessary, this could be turned into
\r
112 // a FIFO storage, but a single reference is enough for now.
\r
113 another = null; // "rv" is now set for the Event, and will be returned. Set to Null.
\r
119 while(go && (c=is.read())>=0) {
\r
122 case '<': // Tag is opening
\r
123 state|=~BEGIN_DOC; // remove BEGIN_DOC flag, this is possibly an XML Doc
\r
125 if(baos.size()>0) { // If there are any characters between tags, we send as Character Event
\r
126 String chars = baos.toString().trim(); // Trim out WhiteSpace before and after
\r
127 if(chars.length()>0) { // don't send if Characters were only whitespace
\r
128 cxe = new XEvent.Characters(chars);
\r
133 last = c; // make sure "last" character is set for use in "ParseTag"
\r
134 Tag t = parseTag(); // call subroutine to process the tag as a unit
\r
136 switch(t.state&(START_TAG|END_TAG)) {
\r
138 nss = getNss(nss,t); // Only Start Tags might have NS Attributes
\r
139 // Get any NameSpace elements from tag. If there are, nss will become
\r
140 // a new Map with all the previous NSs plus the new. This provides
\r
141 // scoping behavior when used with the Stack
\r
142 // drop through on purpose
\r
144 ns = t.prefix==null?"":nss.get(t.prefix); // Get the namespace from prefix (if exists)
\r
150 throw new XMLStreamException("Invalid Namespace Prefix at " + count);
\r
152 switch(t.state) { // based on
\r
154 rv = new XEvent.StartDocument();
\r
157 rv = new XEvent.Comment(t.value);
\r
160 rv = new XEvent.StartElement(ns,t.name);
\r
161 nsses.push(nss); // Change potential scope for Namespace
\r
164 rv = new XEvent.EndElement(ns,t.name);
\r
165 nss = nsses.pop(); // End potential scope for Namespace
\r
167 case START_TAG|END_TAG: // This tag is both start/end aka <myTag/>
\r
168 rv = new XEvent.StartElement(ns,t.name);
\r
169 if(last=='/')another = new XEvent.EndElement(ns,t.name);
\r
171 if(cxe!=null) { // if there is a Character Event, it actually should go first. ow.
\r
172 another = rv; // Make current Event the "another" or next event, and
\r
173 rv = cxe; // send Character Event now
\r
179 if((state&BEGIN_DOC)==BEGIN_DOC) { // if Whitespace before doc, just ignore
\r
182 // fallthrough on purpose
\r
184 if((state&BEGIN_DOC)==BEGIN_DOC) { // if there is any data at the start other than XML Tag, it's not XML
\r
185 throw new XMLStreamException("Parse Error: This is not an XML Doc");
\r
187 baos.write(c); // save off Characters
\r
189 last = c; // Some processing needs to know what the last character was, aka Escaped characters... ex \"
\r
191 } catch (IOException e) {
\r
192 throw new XMLStreamException(e); // all errors parsing will be treated as XMLStreamErrors (like StAX)
\r
194 if(c==-1 && (state&BEGIN_DOC)==BEGIN_DOC) { // Normally, end of stream is ok, however, we need to know if the
\r
195 throw new XMLStreamException("Premature End of File"); // document isn't an XML document, so we throw exception if it
\r
196 } // hasn't yet been determined to be an XML Doc
\r
204 * Parsing a Tag is somewhat complicated, so it's helpful to separate this process from the
\r
205 * higher level Parsing effort
\r
207 * @throws IOException
\r
208 * @throws XMLStreamException
\r
210 private Tag parseTag() throws IOException, XMLStreamException {
\r
214 int c, quote=0; // If "quote" is 0, then we're not in a quote. We set ' (in pretag) or " in attribs accordingly to denote quoted
\r
215 String prefix=null,name=null,value=null;
\r
218 while(go && (c=is.read())>=0) {
\r
220 if(quote!=0) { // If we're in a quote, we only end if we hit another quote of the same time, not preceded by \
\r
221 if(c==quote && last!='\\') {
\r
226 } else if((state&COMMENT)==COMMENT) { // similar to Quote is being in a comment
\r
229 switch(state) { // XML has a complicated Quote set... <!-- --> ... we keep track if each has been met with flags.
\r
230 case COMMENT|COMMENT_E:
\r
233 case COMMENT|COMMENT_E|COMMENT_D1:
\r
235 baos.reset(); // clear out "!--", it's a Comment
\r
237 case COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2:
\r
241 case COMMENT|COMMENT_E|COMMENT_D1|COMMENT_D2|COMMENT_D3:
\r
247 case '>': // Tag indicator has been found, do we have all the comment characters in line?
\r
248 if((state&COMPLETE_COMMENT)==COMPLETE_COMMENT) {
\r
249 byte ba[] = baos.toByteArray();
\r
250 tag = new Tag(null,null, new String(ba,0,ba.length-2));
\r
255 // fall through on purpose
\r
257 state&=~(COMMENT_D3|COMMENT_D4);
\r
258 if((state&IN_COMMENT)!=IN_COMMENT) state&=~IN_COMMENT; // false alarm, it's not actually a comment
\r
261 } else { // Normal Tag Processing loop
\r
264 switch(state & (QUESTION_F|QUESTION)) { // Validate the state of Doc tag... <?xml ... ?>
\r
267 state &= ~QUESTION_F;
\r
270 state |=QUESTION_F;
\r
273 throw new IOException("Bad character [?] at " + count);
\r
278 state|=COMMENT|COMMENT_E; // likely a comment, continue processing in Comment Loop
\r
283 state|=(last=='<'?END_TAG:(END_TAG|START_TAG)); // end tag indicator </xxx>, ,or both <xxx/>
\r
286 prefix=baos.toString(); // prefix indicator
\r
289 case '=': // used in Attributes
\r
290 name=baos.toString();
\r
294 case '>': // end the tag, which causes end of this subprocess as well as formulation of the found data
\r
296 // passthrough on purpose
\r
299 case '\n': // white space indicates change in internal tag state, ex between name and between attributes
\r
300 if((state&VALUE)==VALUE) {
\r
301 value = baos.toString(); // we're in VALUE state, add characters to Value
\r
302 } else if(name==null) {
\r
303 name = baos.toString(); // we're in Name state (default) add characters to Name
\r
305 baos.reset(); // we've assigned chars, reset buffer
\r
306 if(name!=null) { // Name is not null, there's a tag in the offing here...
\r
307 Tag t = new Tag(prefix,name,value);
\r
308 if(tag==null) { // Set as the tag to return, if not exists
\r
310 } else { // if we already have a Tag, then we'll treat this one as an attribute
\r
314 prefix=name=value=null; // reset these values in case we loop for attributes.
\r
316 case '\'': // is the character one of two kinds of quote?
\r
324 baos.write(c); // write any unprocessed bytes into buffer
\r
330 int type = state&(DOC_TYPE|COMMENT|END_TAG|START_TAG); // get just the Tag states and turn into Type for Tag
\r
334 tag.state|=type; // add the appropriate Tag States
\r
341 * If the tag contains some Namespace attributes, create a new nss from the passed in one, copy all into it, then add
\r
342 * This provides Scoping behavior
\r
344 * if Nss is null in the first place, create an new nss, so we don't have to deal with null Maps.
\r
350 private Map<String, String> getNss(Map<String, String> nss, Tag t) {
\r
351 Map<String,String> newnss = null;
\r
352 if(t.attribs!=null) {
\r
353 for(Tag tag : t.attribs) {
\r
354 if("xmlns".equals(tag.prefix)) {
\r
356 newnss = new HashMap<String,String>();
\r
357 if(nss!=null)newnss.putAll(nss);
\r
359 newnss.put(tag.name, tag.value);
\r
363 return newnss==null?(nss==null?new HashMap<String,String>():nss):newnss;
\r
367 * The result of the parseTag method
\r
369 * Data is split up into prefix, name and value portions. "Tags" with Values that are inside a Tag are known in XLM
\r
376 public String prefix,name,value;
\r
377 public List<Tag> attribs;
\r
379 public Tag(String prefix, String name, String value) {
\r
380 this.prefix = prefix;
\r
382 this.value = value;
\r
388 * Not all tags need attributes... lazy instantiate to save time and memory
\r
391 public void add(Tag attrib) {
\r
392 if(attribs == null) {
\r
393 attribs = new ArrayList<Tag>();
\r
395 attribs.add(attrib);
\r
398 public String toString() {
\r
399 StringBuffer sb = new StringBuffer();
\r
404 sb.append(name==null?"!!ERROR!!":name);
\r
406 char quote = ((state&DOC_TYPE)==DOC_TYPE)?'\'':'"';
\r
413 return sb.toString();
\r