2 * ============LICENSE_START=======================================================
4 * ================================================================================
5 * Copyright (C) 2017 AT&T Intellectual Property. All rights reserved.
6 * ================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ============LICENSE_END=========================================================
21 package org.openecomp.sdc.common.util;
23 import org.apache.commons.lang3.StringEscapeUtils;
25 import javax.swing.text.html.HTML;
26 import javax.swing.text.html.HTML.Tag;
27 import java.util.HashSet;
29 import java.util.regex.Matcher;
30 import java.util.regex.Pattern;
32 public class HtmlCleaner {
34 private static Set<String> htmlTags = new HashSet<>();
36 private static String patternHtmlFullTagStr = "</?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[\\^'\">\\s]+))?)+\\s*|\\s*)/?>";
38 private static String patternHtmlTagOnlyStr = "</?(\\w+)[^>]*/?>";
40 private static Pattern onlyTagPattern = Pattern.compile(patternHtmlTagOnlyStr);
42 private static Pattern fullTagPattern = Pattern.compile(patternHtmlFullTagStr);
45 Tag[] allTags = HTML.getAllTags();
46 for (Tag tag : allTags) {
47 htmlTags.add(tag.toString().toLowerCase());
51 public static String stripHtml(String input) {
53 return stripHtml(input, false);
57 public static String stripHtml(String input, boolean toEscape) {
59 if (input == null || input.isEmpty()) {
63 Matcher matcher = onlyTagPattern.matcher(input);
65 Set<String> tagsToRemove = new HashSet<>();
67 while (matcher.find()) {
69 int start = matcher.start();
70 int end = matcher.end();
72 String matchTag = input.substring(start, end);
74 int groupCount = matcher.groupCount();
77 String tag = matcher.group(1);
78 if (tag != null && htmlTags.contains(tag.toLowerCase())) {
79 if (!tagsToRemove.contains(matchTag)) {
80 tagsToRemove.add(matchTag);
86 String stripHtmlStr = removeTagsFromString(tagsToRemove, input);
88 if (stripHtmlStr != null) {
90 stripHtmlStr = StringEscapeUtils.escapeHtml4(stripHtmlStr);
98 private static String removeTagsFromString(Set<String> tagsToRemove, String input) {
100 String stripStr = input;
101 if (input == null || tagsToRemove.isEmpty()) {
105 for (String tag : tagsToRemove) {
106 stripStr = stripStr.replaceAll(tag, "");