7 package com.google.appinventor.components.common;
9 import java.util.HashMap;
11 import java.util.regex.Matcher;
12 import java.util.regex.Pattern;
24 private static final Pattern HTML_ENTITY_PATTERN = Pattern.compile(
"&(#?[0-9a-zA-Z]+);");
25 private static final Map<String, Character> lookup =
new HashMap<String, Character>();
29 lookup.put(
"Agrave", (
char) 192);
30 lookup.put(
"agrave", (
char) 224);
31 lookup.put(
"Aacute", (
char) 193);
32 lookup.put(
"aacute", (
char) 225);
33 lookup.put(
"Acirc", (
char) 194);
34 lookup.put(
"acirc", (
char) 226);
35 lookup.put(
"Atilde", (
char) 195);
36 lookup.put(
"atilde", (
char) 227);
37 lookup.put(
"Auml", (
char) 196);
38 lookup.put(
"auml", (
char) 228);
39 lookup.put(
"Aring", (
char) 197);
40 lookup.put(
"aring", (
char) 229);
41 lookup.put(
"AElig", (
char) 198);
42 lookup.put(
"aelig", (
char) 230);
43 lookup.put(
"Ccedil", (
char) 199);
44 lookup.put(
"ccedil", (
char) 231);
45 lookup.put(
"Egrave", (
char) 200);
46 lookup.put(
"egrave", (
char) 232);
47 lookup.put(
"Eacute", (
char) 201);
48 lookup.put(
"eacute", (
char) 233);
49 lookup.put(
"Ecirc", (
char) 202);
50 lookup.put(
"ecirc", (
char) 234);
51 lookup.put(
"Euml", (
char) 203);
52 lookup.put(
"euml", (
char) 235);
53 lookup.put(
"Igrave", (
char) 204);
54 lookup.put(
"igrave", (
char) 236);
55 lookup.put(
"Iacute", (
char) 205);
56 lookup.put(
"iacute", (
char) 237);
57 lookup.put(
"Icirc", (
char) 206);
58 lookup.put(
"icirc", (
char) 238);
59 lookup.put(
"Iuml", (
char) 207);
60 lookup.put(
"iuml", (
char) 239);
61 lookup.put(
"ETH", (
char) 208);
62 lookup.put(
"eth", (
char) 240);
63 lookup.put(
"Ntilde", (
char) 209);
64 lookup.put(
"ntilde", (
char) 241);
65 lookup.put(
"Ograve", (
char) 210);
66 lookup.put(
"ograve", (
char) 242);
67 lookup.put(
"Oacute", (
char) 211);
68 lookup.put(
"oacute", (
char) 243);
69 lookup.put(
"Ocirc", (
char) 212);
70 lookup.put(
"ocirc", (
char) 244);
71 lookup.put(
"Otilde", (
char) 213);
72 lookup.put(
"otilde", (
char) 245);
73 lookup.put(
"Ouml", (
char) 214);
74 lookup.put(
"ouml", (
char) 246);
75 lookup.put(
"Oslash", (
char) 216);
76 lookup.put(
"oslash", (
char) 248);
77 lookup.put(
"Ugrave", (
char) 217);
78 lookup.put(
"ugrave", (
char) 249);
79 lookup.put(
"Uacute", (
char) 218);
80 lookup.put(
"uacute", (
char) 250);
81 lookup.put(
"Ucirc", (
char) 219);
82 lookup.put(
"ucirc", (
char) 251);
83 lookup.put(
"Uuml", (
char) 220);
84 lookup.put(
"uuml", (
char) 252);
85 lookup.put(
"Yacute", (
char) 221);
86 lookup.put(
"yacute", (
char) 253);
87 lookup.put(
"THORN", (
char) 222);
88 lookup.put(
"thorn", (
char) 254);
89 lookup.put(
"szlig", (
char) 223);
90 lookup.put(
"yuml", (
char) 255);
91 lookup.put(
"Yuml", (
char) 376);
92 lookup.put(
"OElig", (
char) 338);
93 lookup.put(
"oelig", (
char) 339);
94 lookup.put(
"Scaron", (
char) 352);
95 lookup.put(
"scaron", (
char) 353);
96 lookup.put(
"Alpha", (
char) 913);
97 lookup.put(
"Beta", (
char) 914);
98 lookup.put(
"Gamma", (
char) 915);
99 lookup.put(
"Delta", (
char) 916);
100 lookup.put(
"Epsilon", (
char) 917);
101 lookup.put(
"Zeta", (
char) 918);
102 lookup.put(
"Eta", (
char) 919);
103 lookup.put(
"Theta", (
char) 920);
104 lookup.put(
"Iota", (
char) 921);
105 lookup.put(
"Kappa", (
char) 922);
106 lookup.put(
"Lambda", (
char) 923);
107 lookup.put(
"Mu", (
char) 924);
108 lookup.put(
"Nu", (
char) 925);
109 lookup.put(
"Xi", (
char) 926);
110 lookup.put(
"Omicron", (
char) 927);
111 lookup.put(
"Pi", (
char) 928);
112 lookup.put(
"Rho", (
char) 929);
113 lookup.put(
"Sigma", (
char) 931);
114 lookup.put(
"Tau", (
char) 932);
115 lookup.put(
"Upsilon", (
char) 933);
116 lookup.put(
"Phi", (
char) 934);
117 lookup.put(
"Chi", (
char) 935);
118 lookup.put(
"Psi", (
char) 936);
119 lookup.put(
"Omega", (
char) 937);
120 lookup.put(
"alpha", (
char) 945);
121 lookup.put(
"beta", (
char) 946);
122 lookup.put(
"gamma", (
char) 947);
123 lookup.put(
"delta", (
char) 948);
124 lookup.put(
"epsilon", (
char) 949);
125 lookup.put(
"zeta", (
char) 950);
126 lookup.put(
"eta", (
char) 951);
127 lookup.put(
"theta", (
char) 952);
128 lookup.put(
"iota", (
char) 953);
129 lookup.put(
"kappa", (
char) 954);
130 lookup.put(
"lambda", (
char) 955);
131 lookup.put(
"mu", (
char) 956);
132 lookup.put(
"nu", (
char) 957);
133 lookup.put(
"xi", (
char) 958);
134 lookup.put(
"omicron", (
char) 959);
135 lookup.put(
"pi", (
char) 960);
136 lookup.put(
"rho", (
char) 961);
137 lookup.put(
"sigmaf", (
char) 962);
138 lookup.put(
"sigma", (
char) 963);
139 lookup.put(
"tau", (
char) 964);
140 lookup.put(
"upsilon", (
char) 965);
141 lookup.put(
"phi", (
char) 966);
142 lookup.put(
"chi", (
char) 967);
143 lookup.put(
"psi", (
char) 968);
144 lookup.put(
"omega", (
char) 969);
145 lookup.put(
"thetasym", (
char) 977);
146 lookup.put(
"upsih", (
char) 978);
147 lookup.put(
"piv", (
char) 982);
149 lookup.put(
"iexcl", (
char) 161);
150 lookup.put(
"cent", (
char) 162);
151 lookup.put(
"pound", (
char) 163);
152 lookup.put(
"curren", (
char) 164);
153 lookup.put(
"yen", (
char) 165);
154 lookup.put(
"brvbar", (
char) 166);
155 lookup.put(
"sect", (
char) 167);
156 lookup.put(
"uml", (
char) 168);
157 lookup.put(
"copy", (
char) 169);
158 lookup.put(
"ordf", (
char) 170);
159 lookup.put(
"laquo", (
char) 171);
160 lookup.put(
"not", (
char) 172);
161 lookup.put(
"shy", (
char) 173);
162 lookup.put(
"reg", (
char) 174);
163 lookup.put(
"macr", (
char) 175);
164 lookup.put(
"deg", (
char) 176);
165 lookup.put(
"plusmn", (
char) 177);
166 lookup.put(
"sup2", (
char) 178);
167 lookup.put(
"sup3", (
char) 179);
168 lookup.put(
"acute", (
char) 180);
169 lookup.put(
"micro", (
char) 181);
170 lookup.put(
"para", (
char) 182);
171 lookup.put(
"middot", (
char) 183);
172 lookup.put(
"cedil", (
char) 184);
173 lookup.put(
"sup1", (
char) 185);
174 lookup.put(
"ordm", (
char) 186);
175 lookup.put(
"raquo", (
char) 187);
176 lookup.put(
"frac14", (
char) 188);
177 lookup.put(
"frac12", (
char) 189);
178 lookup.put(
"frac34", (
char) 190);
179 lookup.put(
"iquest", (
char) 191);
180 lookup.put(
"times", (
char) 215);
181 lookup.put(
"divide", (
char) 247);
182 lookup.put(
"fnof", (
char) 402);
183 lookup.put(
"circ", (
char) 710);
184 lookup.put(
"tilde", (
char) 732);
185 lookup.put(
"lrm", (
char) 8206);
186 lookup.put(
"rlm", (
char) 8207);
187 lookup.put(
"ndash", (
char) 8211);
188 lookup.put(
"endash", (
char) 8211);
189 lookup.put(
"mdash", (
char) 8212);
190 lookup.put(
"emdash", (
char) 8212);
191 lookup.put(
"lsquo", (
char) 8216);
192 lookup.put(
"rsquo", (
char) 8217);
193 lookup.put(
"sbquo", (
char) 8218);
194 lookup.put(
"ldquo", (
char) 8220);
195 lookup.put(
"rdquo", (
char) 8221);
196 lookup.put(
"bdquo", (
char) 8222);
197 lookup.put(
"dagger", (
char) 8224);
198 lookup.put(
"Dagger", (
char) 8225);
199 lookup.put(
"bull", (
char) 8226);
200 lookup.put(
"hellip", (
char) 8230);
201 lookup.put(
"permil", (
char) 8240);
202 lookup.put(
"prime", (
char) 8242);
203 lookup.put(
"Prime", (
char) 8243);
204 lookup.put(
"lsaquo", (
char) 8249);
205 lookup.put(
"rsaquo", (
char) 8250);
206 lookup.put(
"oline", (
char) 8254);
207 lookup.put(
"frasl", (
char) 8260);
208 lookup.put(
"euro", (
char) 8364);
209 lookup.put(
"image", (
char) 8465);
210 lookup.put(
"weierp", (
char) 8472);
211 lookup.put(
"real", (
char) 8476);
212 lookup.put(
"trade", (
char) 8482);
213 lookup.put(
"alefsym", (
char) 8501);
214 lookup.put(
"larr", (
char) 8592);
215 lookup.put(
"uarr", (
char) 8593);
216 lookup.put(
"rarr", (
char) 8594);
217 lookup.put(
"darr", (
char) 8595);
218 lookup.put(
"harr", (
char) 8596);
219 lookup.put(
"crarr", (
char) 8629);
220 lookup.put(
"lArr", (
char) 8656);
221 lookup.put(
"uArr", (
char) 8657);
222 lookup.put(
"rArr", (
char) 8658);
223 lookup.put(
"dArr", (
char) 8659);
224 lookup.put(
"hArr", (
char) 8660);
225 lookup.put(
"forall", (
char) 8704);
226 lookup.put(
"part", (
char) 8706);
227 lookup.put(
"exist", (
char) 8707);
228 lookup.put(
"empty", (
char) 8709);
229 lookup.put(
"nabla", (
char) 8711);
230 lookup.put(
"isin", (
char) 8712);
231 lookup.put(
"notin", (
char) 8713);
232 lookup.put(
"ni", (
char) 8715);
233 lookup.put(
"prod", (
char) 8719);
234 lookup.put(
"sum", (
char) 8721);
235 lookup.put(
"minus", (
char) 8722);
236 lookup.put(
"lowast", (
char) 8727);
237 lookup.put(
"radic", (
char) 8730);
238 lookup.put(
"prop", (
char) 8733);
239 lookup.put(
"infin", (
char) 8734);
240 lookup.put(
"ang", (
char) 8736);
241 lookup.put(
"and", (
char) 8743);
242 lookup.put(
"or", (
char) 8744);
243 lookup.put(
"cap", (
char) 8745);
244 lookup.put(
"cup", (
char) 8746);
245 lookup.put(
"int", (
char) 8747);
246 lookup.put(
"there4", (
char) 8756);
247 lookup.put(
"sim", (
char) 8764);
248 lookup.put(
"cong", (
char) 8773);
249 lookup.put(
"asymp", (
char) 8776);
250 lookup.put(
"ne", (
char) 8800);
251 lookup.put(
"equiv", (
char) 8801);
252 lookup.put(
"le", (
char) 8804);
253 lookup.put(
"ge", (
char) 8805);
254 lookup.put(
"sub", (
char) 8834);
255 lookup.put(
"sup", (
char) 8835);
256 lookup.put(
"nsub", (
char) 8836);
257 lookup.put(
"sube", (
char) 8838);
258 lookup.put(
"supe", (
char) 8839);
259 lookup.put(
"oplus", (
char) 8853);
260 lookup.put(
"otimes", (
char) 8855);
261 lookup.put(
"perp", (
char) 8869);
262 lookup.put(
"sdot", (
char) 8901);
263 lookup.put(
"lceil", (
char) 8968);
264 lookup.put(
"rceil", (
char) 8969);
265 lookup.put(
"lfloor", (
char) 8970);
266 lookup.put(
"rfloor", (
char) 8971);
267 lookup.put(
"lang", (
char) 9001);
268 lookup.put(
"rang", (
char) 9002);
269 lookup.put(
"loz", (
char) 9674);
270 lookup.put(
"spades", (
char) 9824);
271 lookup.put(
"clubs", (
char) 9827);
272 lookup.put(
"hearts", (
char) 9829);
273 lookup.put(
"diams", (
char) 9830);
275 lookup.put(
"gt", (
char) 62);
276 lookup.put(
"GT", (
char) 62);
277 lookup.put(
"lt", (
char) 60);
278 lookup.put(
"LT", (
char) 60);
279 lookup.put(
"quot", (
char) 34);
280 lookup.put(
"QUOT", (
char) 34);
281 lookup.put(
"amp", (
char) 38);
282 lookup.put(
"AMP", (
char) 38);
283 lookup.put(
"apos", (
char) 39);
285 lookup.put(
"nbsp", (
char) 160);
286 lookup.put(
"ensp", (
char) 8194);
287 lookup.put(
"emsp", (
char) 8195);
288 lookup.put(
"thinsp", (
char) 8201);
289 lookup.put(
"zwnj", (
char) 8204);
291 lookup.put(
"zwj", (
char) 8205);
303 return lookup.get(entityName);
320 if (htmlText.length() == 0 ||
321 htmlText.indexOf(
'&') == -1) {
325 StringBuilder output =
new StringBuilder();
326 int lastMatchEnd = 0;
327 Matcher matcher = HTML_ENTITY_PATTERN.matcher(htmlText);
328 while (matcher.find()) {
330 String entity = matcher.group(1);
332 Character convertedEntity =
null;
333 if (entity.startsWith(
"#x")) {
335 String hhhh = entity.substring(2);
337 System.out.println(
"hex number is " + hhhh);
338 int code = Integer.parseInt(hhhh, 16);
339 convertedEntity = Character.valueOf((
char) code);
340 }
catch (NumberFormatException e) {
343 }
else if (entity.startsWith(
"#")) {
345 String nnnn = entity.substring(1);
347 int code = Integer.parseInt(nnnn);
348 convertedEntity = Character.valueOf((
char) code);
349 }
catch (NumberFormatException e) {
353 convertedEntity = lookup.get(entity);
356 if (convertedEntity !=
null) {
357 output.append(htmlText.substring(lastMatchEnd, matcher.start()));
358 output.append(convertedEntity);
359 lastMatchEnd = matcher.end();
362 if (lastMatchEnd < htmlText.length()) {
363 output.append(htmlText.substring(lastMatchEnd));
365 return output.toString();