001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020 package org.apache.myfaces.tobago.util;
021
022 import java.io.IOException;
023 import java.io.Writer;
024
025 /**
026 * User: weber
027 * Date: Jun 28, 2005
028 * Time: 2:07:29 PM
029 */
030 public final class HtmlWriterUtil {
031
032 private static final char[][] CHARS_TO_ESCAPE;
033
034 static {
035 // init lookup table
036 CHARS_TO_ESCAPE = new char[0xA0][];
037 CHARS_TO_ESCAPE['"'] = """.toCharArray();
038 CHARS_TO_ESCAPE['&'] = "&".toCharArray();
039 CHARS_TO_ESCAPE['<'] = "<".toCharArray();
040 CHARS_TO_ESCAPE['>'] = ">".toCharArray();
041 }
042
043 private final Writer out;
044
045 private final ResponseWriterBuffer buffer;
046
047 private final boolean utf8;
048
049 public HtmlWriterUtil(final Writer out, final String characterEncoding) {
050 this.out = out;
051 utf8 = "utf-8".equalsIgnoreCase(characterEncoding);
052 buffer = new ResponseWriterBuffer(out);
053 }
054
055 public void writeAttributeValue(final String text)
056 throws IOException {
057 writeEncodedValue(text.toCharArray(), 0, text.length(), true);
058 }
059
060 public void writeText(final String text) throws IOException {
061 writeEncodedValue(text.toCharArray(), 0, text.length(), false);
062 }
063
064 public void writeText(final char[] text, final int start, final int length)
065 throws IOException {
066 writeEncodedValue(text, start, length, false);
067 }
068
069 private void writeEncodedValue(final char[] text, final int start,
070 final int length, final boolean isAttribute)
071 throws IOException {
072
073 int localIndex = -1;
074
075 final int end = start + length;
076 for (int i = start; i < end; i++) {
077 char ch = text[i];
078 if (ch >= CHARS_TO_ESCAPE.length || CHARS_TO_ESCAPE[ch] != null) {
079 localIndex = i;
080 break;
081 }
082 }
083
084 if (localIndex == -1) {
085 // no need to escape
086 out.write(text, start, length);
087 } else {
088 // write until localIndex and then encode the remainder
089 out.write(text, start, localIndex);
090
091 for (int i = localIndex; i < end; i++) {
092 final char ch = text[i];
093
094 // Tilde or less...
095 if (ch < CHARS_TO_ESCAPE.length) {
096 if (isAttribute && ch == '&' && (i + 1 < end) && text[i + 1] == '{') {
097 // HTML 4.0, section B.7.1: ampersands followed by
098 // an open brace don't get escaped
099 buffer.addToBuffer('&');
100 } else if (CHARS_TO_ESCAPE[ch] != null) {
101 buffer.addToBuffer(CHARS_TO_ESCAPE[ch]);
102 } else {
103 buffer.addToBuffer(ch);
104 }
105 } else if (utf8) {
106 buffer.addToBuffer(ch);
107 } else if (ch <= 0xff) {
108 // ISO-8859-1 entities: encode as needed
109 buffer.flushBuffer();
110
111 out.write('&');
112 char[] chars = ISO8859_1_ENTITIES[ch - 0xA0];
113 out.write(chars, 0, chars.length);
114 out.write(';');
115 } else {
116 buffer.flushBuffer();
117
118 // Double-byte characters to encode.
119 // PENDING: when outputting to an encoding that
120 // supports double-byte characters (UTF-8, for example),
121 // we should not be encoding
122 writeDecRef(ch);
123 }
124 }
125
126 buffer.flushBuffer();
127 }
128 }
129
130
131 /**
132 * Writes a character as a decimal escape. Hex escapes are smaller than
133 * the decimal version, but Netscape didn't support hex escapes until
134 * 4.7.4.
135 */
136 private void writeDecRef(final char ch) throws IOException {
137 if (ch == '\u20ac') {
138 out.write("€");
139 return;
140 }
141 out.write("&#");
142 // Formerly used String.valueOf(). This version tests out
143 // about 40% faster in a microbenchmark (and on systems where GC is
144 // going gonzo, it should be even better)
145 int i = (int) ch;
146 if (i > 10000) {
147 out.write('0' + (i / 10000));
148 i = i % 10000;
149 out.write('0' + (i / 1000));
150 i = i % 1000;
151 out.write('0' + (i / 100));
152 i = i % 100;
153 out.write('0' + (i / 10));
154 i = i % 10;
155 out.write('0' + i);
156 } else if (i > 1000) {
157 out.write('0' + (i / 1000));
158 i = i % 1000;
159 out.write('0' + (i / 100));
160 i = i % 100;
161 out.write('0' + (i / 10));
162 i = i % 10;
163 out.write('0' + i);
164 } else {
165 out.write('0' + (i / 100));
166 i = i % 100;
167 out.write('0' + (i / 10));
168 i = i % 10;
169 out.write('0' + i);
170 }
171
172 out.write(';');
173 }
174
175 public static boolean attributeValueMustEscaped(final String name) {
176 // this is 30% faster then the .equals(name) version
177 // tested with 100 loops over 19871 names
178 // (extracted from logfile over all demo pages)
179
180 try {
181 switch (name.charAt(0)) {
182 case 'i': // 'id'
183 if (name.length() == 2 && name.charAt(1) == 'd') {
184 return false;
185 }
186 break;
187 case 'n': // 'name'
188 if (name.length() == 4 && name.charAt(1) == 'a' && name.charAt(2) == 'm'
189 && name.charAt(3) == 'e') {
190 return false;
191 }
192 break;
193 case 'c': // 'class'
194 if (name.length() == 5 && name.charAt(1) == 'l' && name.charAt(2) == 'a'
195 && name.charAt(3) == 's' && name.charAt(4) == 's') {
196 return false;
197 }
198 break;
199 default:
200 return true;
201 }
202 } catch (NullPointerException e) {
203 // ignore
204 } catch (StringIndexOutOfBoundsException e) {
205 // ignore
206 }
207 return true;
208 }
209
210 //
211 // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF
212 //
213 private static final char [][] ISO8859_1_ENTITIES = new char [][]{
214 "nbsp".toCharArray(),
215 "iexcl".toCharArray(),
216 "cent".toCharArray(),
217 "pound".toCharArray(),
218 "curren".toCharArray(),
219 "yen".toCharArray(),
220 "brvbar".toCharArray(),
221 "sect".toCharArray(),
222 "uml".toCharArray(),
223 "copy".toCharArray(),
224 "ordf".toCharArray(),
225 "laquo".toCharArray(),
226 "not".toCharArray(),
227 "shy".toCharArray(),
228 "reg".toCharArray(),
229 "macr".toCharArray(),
230 "deg".toCharArray(),
231 "plusmn".toCharArray(),
232 "sup2".toCharArray(),
233 "sup3".toCharArray(),
234 "acute".toCharArray(),
235 "micro".toCharArray(),
236 "para".toCharArray(),
237 "middot".toCharArray(),
238 "cedil".toCharArray(),
239 "sup1".toCharArray(),
240 "ordm".toCharArray(),
241 "raquo".toCharArray(),
242 "frac14".toCharArray(),
243 "frac12".toCharArray(),
244 "frac34".toCharArray(),
245 "iquest".toCharArray(),
246 "Agrave".toCharArray(),
247 "Aacute".toCharArray(),
248 "Acirc".toCharArray(),
249 "Atilde".toCharArray(),
250 "Auml".toCharArray(),
251 "Aring".toCharArray(),
252 "AElig".toCharArray(),
253 "Ccedil".toCharArray(),
254 "Egrave".toCharArray(),
255 "Eacute".toCharArray(),
256 "Ecirc".toCharArray(),
257 "Euml".toCharArray(),
258 "Igrave".toCharArray(),
259 "Iacute".toCharArray(),
260 "Icirc".toCharArray(),
261 "Iuml".toCharArray(),
262 "ETH".toCharArray(),
263 "Ntilde".toCharArray(),
264 "Ograve".toCharArray(),
265 "Oacute".toCharArray(),
266 "Ocirc".toCharArray(),
267 "Otilde".toCharArray(),
268 "Ouml".toCharArray(),
269 "times".toCharArray(),
270 "Oslash".toCharArray(),
271 "Ugrave".toCharArray(),
272 "Uacute".toCharArray(),
273 "Ucirc".toCharArray(),
274 "Uuml".toCharArray(),
275 "Yacute".toCharArray(),
276 "THORN".toCharArray(),
277 "szlig".toCharArray(),
278 "agrave".toCharArray(),
279 "aacute".toCharArray(),
280 "acirc".toCharArray(),
281 "atilde".toCharArray(),
282 "auml".toCharArray(),
283 "aring".toCharArray(),
284 "aelig".toCharArray(),
285 "ccedil".toCharArray(),
286 "egrave".toCharArray(),
287 "eacute".toCharArray(),
288 "ecirc".toCharArray(),
289 "euml".toCharArray(),
290 "igrave".toCharArray(),
291 "iacute".toCharArray(),
292 "icirc".toCharArray(),
293 "iuml".toCharArray(),
294 "eth".toCharArray(),
295 "ntilde".toCharArray(),
296 "ograve".toCharArray(),
297 "oacute".toCharArray(),
298 "ocirc".toCharArray(),
299 "otilde".toCharArray(),
300 "ouml".toCharArray(),
301 "divide".toCharArray(),
302 "oslash".toCharArray(),
303 "ugrave".toCharArray(),
304 "uacute".toCharArray(),
305 "ucirc".toCharArray(),
306 "uuml".toCharArray(),
307 "yacute".toCharArray(),
308 "thorn".toCharArray(),
309 "yuml".toCharArray()
310 };
311 }