3007ea953b98deb5214dc5802f8bda7269b72691
[aaf/authz.git] / misc / rosetta / src / main / java / org / onap / aaf / misc / rosetta / XmlEscape.java
1 /**
2  * ============LICENSE_START====================================================
3  * org.onap.aaf
4  * ===========================================================================
5  * Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
6  * ===========================================================================
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  * 
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  * 
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  * ============LICENSE_END====================================================
19  *
20  */
21
22 package org.onap.aaf.misc.rosetta;
23
24 import java.io.IOException;
25 import java.io.Reader;
26 import java.util.Map.Entry;
27 import java.util.TreeMap;
28
29 public class XmlEscape {
30     private XmlEscape() {}
31     
32     private static final TreeMap<String,Integer> charMap; // see initialization at end
33     private static final TreeMap<Integer,String> intMap; // see initialization at end
34
35     public static void xmlEscape(StringBuilder sb, Reader r) throws ParseException {
36         try {
37             int c;
38             StringBuilder esc = new StringBuilder();
39             for (int cnt = 0;cnt<9 /*max*/; ++cnt) {
40                 if ((c=r.read())<0)throw new ParseException("Invalid Data: Unfinished Escape Sequence");
41                 if (c!=';') { 
42                     esc.append((char)c);
43                 } else { // evaluate
44                     Integer i = charMap.get(esc.toString());
45                     if (i==null) {
46                         // leave in nasty XML format for now.
47                         sb.append('&');
48                         sb.append(esc);
49                         sb.append(';');
50                     } else {
51                         sb.append((char)i.intValue());
52                     }
53                     break;
54                 }
55             }
56             
57             
58         } catch (IOException e) {
59             throw new ParseException(e);
60         }
61     }
62     
63     public static void xmlEscape(StringBuilder sb, int chr) {
64         sb.append('&');
65         sb.append(intMap.get(chr));
66         sb.append(';');
67     }
68     
69     public static String convert(StringBuilder insb) {
70         int idx, ch;
71         StringBuilder sb=null;
72         for (idx=0;idx<insb.length();++idx) {
73             ch = insb.charAt(idx);
74             if (ch>=160 || ch==34 || ch==38 || ch==39 || ch==60 || ch==62) {
75                 sb = new StringBuilder();
76                 sb.append(insb,0,idx);
77                 break;
78             }
79         }
80         
81         if (sb==null)return insb.toString();
82             
83         for (int i=idx;i<insb.length();++i) {
84             ch = insb.charAt(i);
85             if (ch<160) {
86                 switch(ch) {
87                     case 34: sb.append("&quot;"); break;
88                     case 38: sb.append("&amp;"); break;
89                     case 39: sb.append("&apos;"); break;
90                     case 60: sb.append("&lt;"); break;
91                     case 62: sb.append("&gt;"); break;
92                     default:
93                         sb.append((char)ch);
94                 }
95             } else { // use map
96                 String s = intMap.get(ch);
97                 if (s==null)sb.append((char)ch);
98                 else {
99                     sb.append('&');
100                     sb.append(s);
101                     sb.append(';');
102                 }
103             }
104         }
105         return sb.toString();
106     }
107
108     static {
109         charMap = new TreeMap<>();
110         intMap = new TreeMap<>();
111         charMap.put("quot", 34);
112         charMap.put("amp",38);
113         charMap.put("apos",39);
114         charMap.put("lt",60);
115         charMap.put("gt",62);
116         charMap.put("nbsp",160);
117         charMap.put("iexcl",161);
118         charMap.put("cent",162);
119         charMap.put("pound",163);
120         charMap.put("curren",164);
121         charMap.put("yen",165);
122         charMap.put("brvbar",166);
123         charMap.put("sect",167);
124         charMap.put("uml",168);
125         charMap.put("copy",169);
126         charMap.put("ordf",170);
127         charMap.put("laquo",171);
128         charMap.put("not",172);
129         charMap.put("shy",173);
130         charMap.put("reg",174);
131         charMap.put("macr",175);
132         charMap.put("deg",176);
133         charMap.put("plusmn",177);
134         charMap.put("sup2",178);
135         charMap.put("sup3",179);
136         charMap.put("acute",180);
137         charMap.put("micro",181);
138         charMap.put("para",182);
139         charMap.put("middot",183);
140         charMap.put("cedil",184);
141         charMap.put("sup1",185);
142         charMap.put("ordm",186);
143         charMap.put("raquo",187);
144         charMap.put("frac14",188);
145         charMap.put("frac12",189);
146         charMap.put("frac34",190);
147         charMap.put("iquest",191);
148         charMap.put("Agrave",192);
149         charMap.put("Aacute",193);
150         charMap.put("Acirc",194);
151         charMap.put("Atilde",195);
152         charMap.put("Auml",196);
153         charMap.put("Aring",197);
154         charMap.put("AElig",198);
155         charMap.put("Ccedil",199);
156         charMap.put("Egrave",200);
157         charMap.put("Eacute",201);
158         charMap.put("Ecirc",202);
159         charMap.put("Euml",203);
160         charMap.put("Igrave",204);
161         charMap.put("Iacute",205);
162         charMap.put("Icirc",206);
163         charMap.put("Iuml",207);
164         charMap.put("ETH",208);
165         charMap.put("Ntilde",209);
166         charMap.put("Ograve",210);
167         charMap.put("Oacute",211);
168         charMap.put("Ocirc",212);
169         charMap.put("Otilde",213);
170         charMap.put("Ouml",214);
171         charMap.put("times",215);
172         charMap.put("Oslash",216);
173         charMap.put("Ugrave",217);
174         charMap.put("Uacute",218);
175         charMap.put("Ucirc",219);
176         charMap.put("Uuml",220);
177         charMap.put("Yacute",221);
178         charMap.put("THORN",222);
179         charMap.put("szlig",223);
180         charMap.put("agrave",224);
181         charMap.put("aacute",225);
182         charMap.put("acirc",226);
183         charMap.put("atilde",227);
184         charMap.put("auml",228);
185         charMap.put("aring",229);
186         charMap.put("aelig",230);
187         charMap.put("ccedil",231);
188         charMap.put("egrave",232);
189         charMap.put("eacute",233);
190         charMap.put("ecirc",234);
191         charMap.put("euml",235);
192         charMap.put("igrave",236);
193         charMap.put("iacute",237);
194         charMap.put("icirc",238);
195         charMap.put("iuml",239);
196         charMap.put("eth",240);
197         charMap.put("ntilde",241);
198         charMap.put("ograve",242);
199         charMap.put("oacute",243);
200         charMap.put("ocirc",244);
201         charMap.put("otilde",245);
202         charMap.put("ouml",246);
203         charMap.put("divide",247);
204         charMap.put("oslash",248);
205         charMap.put("ugrave",249);
206         charMap.put("uacute",250);
207         charMap.put("ucirc",251);
208         charMap.put("uuml",252);
209         charMap.put("yacute",253);
210         charMap.put("thorn",254);
211         charMap.put("yuml",255);
212         charMap.put("OElig",338);
213         charMap.put("oelig",339);
214         charMap.put("Scaron",352);
215         charMap.put("scaron",353);
216         charMap.put("Yuml",376);
217         charMap.put("fnof",402);
218         charMap.put("circ",710);
219         charMap.put("tilde",732);
220         charMap.put("Alpha",913);
221         charMap.put("Beta",914);
222         charMap.put("Gamma",915);
223         charMap.put("Delta",916);
224         charMap.put("Epsilon",917);
225         charMap.put("Zeta",918);
226         charMap.put("Eta",919);
227         charMap.put("Theta",920);
228         charMap.put("Iota",921);
229         charMap.put("Kappa",922);
230         charMap.put("Lambda",923);
231         charMap.put("Mu",924);
232         charMap.put("Nu",925);
233         charMap.put("Xi",926);
234         charMap.put("Omicron",927);
235         charMap.put("Pi",928);
236         charMap.put("Rho",929);
237         charMap.put("Sigma",931);
238         charMap.put("Tau",932);
239         charMap.put("Upsilon",933);
240         charMap.put("Phi",934);
241         charMap.put("Chi",935);
242         charMap.put("Psi",936);
243         charMap.put("Omega",937);
244         charMap.put("alpha",945);
245         charMap.put("beta",946);
246         charMap.put("gamma",947);
247         charMap.put("delta",948);
248         charMap.put("epsilon",949);
249         charMap.put("zeta",950);
250         charMap.put("eta",951);
251         charMap.put("theta",952);
252         charMap.put("iota",953);
253         charMap.put("kappa",954);
254         charMap.put("lambda",955);
255         charMap.put("mu",956);
256         charMap.put("nu",957);
257         charMap.put("xi",958);
258         charMap.put("omicron",959);
259         charMap.put("pi",960);
260         charMap.put("rho",961);
261         charMap.put("sigmaf",962);
262         charMap.put("sigma",963);
263         charMap.put("tau",964);
264         charMap.put("upsilon",965);
265         charMap.put("phi",966);
266         charMap.put("chi",967);
267         charMap.put("psi",968);
268         charMap.put("omega",969);
269         charMap.put("thetasym",977);
270         charMap.put("upsih",978);
271         charMap.put("piv",982);
272         charMap.put("ensp",8194);
273         charMap.put("emsp",8195);
274         charMap.put("thinsp",8201);
275         charMap.put("zwnj",8204);
276         charMap.put("zwj",8205);
277         charMap.put("lrm",8206);
278         charMap.put("rlm",8207);
279         charMap.put("ndash",8211);
280         charMap.put("mdash",8212);
281         charMap.put("lsquo",8216);
282         charMap.put("rsquo",8217);
283         charMap.put("sbquo",8218);
284         charMap.put("ldquo",8220);
285         charMap.put("rdquo",8221);
286         charMap.put("bdquo",8222);
287         charMap.put("dagger",8224);
288         charMap.put("Dagger",8225);
289         charMap.put("bull",8226);
290         charMap.put("hellip",8230);
291         charMap.put("permil",8240);
292         charMap.put("prime",8242);
293         charMap.put("Prime",8243);
294         charMap.put("lsaquo",8249);
295         charMap.put("rsaquo",8250);
296         charMap.put("oline",8254);
297         charMap.put("frasl",8260);
298         charMap.put("euro",8364);
299         charMap.put("image",8465);
300         charMap.put("weierp",8472);
301         charMap.put("real",8476);
302         charMap.put("trade",8482);
303         charMap.put("alefsym",8501);
304         charMap.put("larr",8592);
305         charMap.put("uarr",8593);
306         charMap.put("rarr",8594);
307         charMap.put("darr",8595);
308         charMap.put("harr",8596);
309         charMap.put("crarr",8629);
310         charMap.put("lArr",8656);
311         charMap.put("uArr",8657);
312         charMap.put("rArr",8658);
313         charMap.put("dArr",8659);
314         charMap.put("hArr",8660);
315         charMap.put("forall",8704);
316         charMap.put("part",8706);
317         charMap.put("exist",8707);
318         charMap.put("empty",8709);
319         charMap.put("nabla",8711);
320         charMap.put("isin",8712);
321         charMap.put("notin",8713);
322         charMap.put("ni",8715);
323         charMap.put("prod",8719);
324         charMap.put("sum",8721);
325         charMap.put("minus",8722);
326         charMap.put("lowast",8727);
327         charMap.put("radic",8730);
328         charMap.put("prop",8733);
329         charMap.put("infin",8734);
330         charMap.put("ang",8736);
331         charMap.put("and",8743);
332         charMap.put("or",8744);
333         charMap.put("cap",8745);
334         charMap.put("cup",8746);
335         charMap.put("int",8747);
336         charMap.put("there4",8756);
337         charMap.put("sim",8764);
338         charMap.put("cong",8773);
339         charMap.put("asymp",8776);
340         charMap.put("ne",8800);
341         charMap.put("equiv",8801);
342         charMap.put("le",8804);
343         charMap.put("ge",8805);
344         charMap.put("sub",8834);
345         charMap.put("sup",8835);
346         charMap.put("nsub",8836);
347         charMap.put("sube",8838);
348         charMap.put("supe",8839);
349         charMap.put("oplus",8853);
350         charMap.put("otimes",8855);
351         charMap.put("perp",8869);
352         charMap.put("sdot",8901);
353         charMap.put("lceil",8968);
354         charMap.put("rceil",8969);
355         charMap.put("lfloor",8970);
356         charMap.put("rfloor",8971);
357         charMap.put("lang",9001);
358         charMap.put("rang",9002);
359         charMap.put("loz",9674);
360         charMap.put("spades",9824);
361         charMap.put("clubs",9827);
362         charMap.put("hearts",9829);
363         charMap.put("diams",9830);
364         
365         for ( Entry<String, Integer> es: charMap.entrySet()) {
366             if (es.getValue()>=160); // save small space... note that no longer has amp, etc.
367             intMap.put(es.getValue(), es.getKey());
368         }
369     }
370
371 }