top of page

How to detect if String is HTML or not in Android?

  • agulevski10
  • Oct 21, 2020
  • 1 min read

This is helper class with regular expression built in to detect if some string have HTML body or not.

Java

public final class HTMLHelper { public static final String TAG_START = "<\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)>"; public static final String TAG_END = "</\\w+>"; public static final String TAG_SELF_CLOSING = "<\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)/>"; public static final String HTML_ENTITY = "&[a-zA-Z][a-zA-Z0-9]+;"; public static final Pattern htmlPattern = Pattern .compile("(" + TAG_START + ".*" + TAG_END + ")|(" + TAG_SELF_CLOSING + ")|(" + HTML_ENTITY + ")", Pattern.DOTALL); private HTMLHelper() { } public static boolean isHtml(String htmlString) { boolean isHTML = false; if (htmlString != null) { isHTML = htmlPattern.matcher(htmlString).find(); } return isHTML; } }

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

public final class HTMLHelper {

public static final String TAG_START = "<\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)>";

public static final String TAG_END = "</\\w+>";

public static final String TAG_SELF_CLOSING = "<\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)/>";

public static final String HTML_ENTITY = "&[a-zA-Z][a-zA-Z0-9]+;";

public static final Pattern htmlPattern = Pattern

.compile("(" + TAG_START + ".*" + TAG_END + ")|(" + TAG_SELF_CLOSING + ")|(" + HTML_ENTITY + ")", Pattern.DOTALL);

private HTMLHelper() {

}

public static boolean isHtml(String htmlString) {

boolean isHTML = false;

if (htmlString != null) {

isHTML = htmlPattern.matcher(htmlString).find();

}

return isHTML;

}

}

Comments


bottom of page