Skip to content

Package: HTMLSerializer

HTMLSerializer

nameinstructionbranchcomplexitylinemethod
HTMLSerializer()
M: 10 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%
HTMLSerializer(OutputFormat)
M: 14 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%
HTMLSerializer(OutputStream, OutputFormat)
M: 17 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 3 C: 0
0%
M: 1 C: 0
0%
HTMLSerializer(Writer, OutputFormat)
M: 17 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 3 C: 0
0%
M: 1 C: 0
0%
HTMLSerializer(boolean, OutputFormat)
M: 0 C: 10
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 4
100%
M: 0 C: 1
100%
characters(String)
M: 0 C: 7
100%
M: 0 C: 0
100%
M: 0 C: 1
100%
M: 0 C: 3
100%
M: 0 C: 1
100%
characters(char[], int, int)
M: 19 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 7 C: 0
0%
M: 1 C: 0
0%
endElement(String)
M: 6 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%
endElement(String, String, String)
M: 13 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 5 C: 0
0%
M: 1 C: 0
0%
endElementIO(String, String, String)
M: 77 C: 64
45%
M: 28 C: 10
26%
M: 17 C: 3
15%
M: 16 C: 18
53%
M: 0 C: 1
100%
escapeURI(String)
M: 13 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 4 C: 0
0%
M: 1 C: 0
0%
getEntityRef(int)
M: 3 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 1 C: 0
0%
M: 1 C: 0
0%
serializeElement(Element)
M: 102 C: 177
63%
M: 26 C: 30
54%
M: 18 C: 11
38%
M: 23 C: 49
68%
M: 0 C: 1
100%
setOutputFormat(OutputFormat)
M: 13 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%
setXHTMLNamespace(String)
M: 4 C: 0
0%
M: 0 C: 0
100%
M: 1 C: 0
0%
M: 2 C: 0
0%
M: 1 C: 0
0%
startDocument(String)
M: 120 C: 0
0%
M: 26 C: 0
0%
M: 14 C: 0
0%
M: 32 C: 0
0%
M: 1 C: 0
0%
startElement(String, AttributeList)
M: 234 C: 0
0%
M: 46 C: 0
0%
M: 24 C: 0
0%
M: 60 C: 0
0%
M: 1 C: 0
0%
startElement(String, String, String, Attributes)
M: 369 C: 0
0%
M: 84 C: 0
0%
M: 43 C: 0
0%
M: 91 C: 0
0%
M: 1 C: 0
0%

Coverage

1: /*
2: * Copyright 1999-2004 The Apache Software Foundation.
3: *
4: * Licensed under the Apache License, Version 2.0 (the "License");
5: * you may not use this file except in compliance with the License.
6: * You may obtain a copy of the License at
7: *
8: * http://www.apache.org/licenses/LICENSE-2.0
9: *
10: * Unless required by applicable law or agreed to in writing, software
11: * distributed under the License is distributed on an "AS IS" BASIS,
12: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13: * See the License for the specific language governing permissions and
14: * limitations under the License.
15: */
16:
17:
18: // Sep 14, 2000:
19: // Fixed serializer to report IO exception directly, instead at
20: // the end of document processing.
21: // Reported by Patrick Higgins <phiggins@transzap.com>
22: // Aug 21, 2000:
23: // Fixed bug in startDocument not calling prepare.
24: // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
25: // Aug 21, 2000:
26: // Added ability to omit DOCTYPE declaration.
27: // Sep 1, 2000:
28: // If no output format is provided the serializer now defaults
29: // to ISO-8859-1 encoding. Reported by Mikael Staldal
30: // <d96-mst@d.kth.se>
31:
32:
33: package it.tidalwave.northernwind.core.impl.patches;
34:
35: import java.util.Collections;
36: import java.util.Enumeration;
37: import java.util.Locale;
38: import java.io.IOException;
39: import java.io.OutputStream;
40: import java.io.Writer;
41: import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
42: import com.sun.org.apache.xml.internal.serialize.BaseMarkupSerializer;
43: import com.sun.org.apache.xml.internal.serialize.ElementState;
44: import com.sun.org.apache.xml.internal.serialize.Method;
45: import com.sun.org.apache.xml.internal.serialize.OutputFormat;
46: import com.sun.org.apache.xml.internal.serialize.Serializer;
47: import org.w3c.dom.Attr;
48: import org.w3c.dom.Element;
49: import org.w3c.dom.NamedNodeMap;
50: import org.w3c.dom.Node;
51: import org.xml.sax.AttributeList;
52: import org.xml.sax.Attributes;
53: import org.xml.sax.SAXException;
54:
55:
56: /**
57: * Implements an HTML/XHTML serializer supporting both DOM and SAX
58: * pretty serializing. HTML/XHTML mode is determined in the
59: * constructor. For usage instructions see {@link Serializer}.
60: * <p>
61: * If an output stream is used, the encoding is taken from the
62: * output format (defaults to <tt>UTF-8</tt>). If a writer is
63: * used, make sure the writer uses the same encoding (if applies)
64: * as specified in the output format.
65: * <p>
66: * The serializer supports both DOM and SAX. DOM serializing is done
67: * by calling {@link #serialize} and SAX serializing is done by firing
68: * SAX events and using the serializer as a document handler.
69: * <p>
70: * If an I/O exception occurs while serializing, the serializer
71: * will not throw an exception directly, but only throw it
72: * at the end of serializing (either DOM or SAX's {@link
73: * org.xml.sax.DocumentHandler#endDocument}.
74: * <p>
75: * For elements that are not specified as whitespace preserving,
76: * the serializer will potentially break long text lines at space
77: * boundaries, indent lines, and serialize elements on separate
78: * lines. Line terminators will be regarded as spaces, and
79: * spaces at beginning of line will be stripped.
80: * <p>
81: * XHTML is slightly different than HTML:
82: * <ul>
83: * <li>Element/attribute names are lower case and case matters
84: * <li>Attributes must specify value, even if empty string
85: * <li>Empty elements must have '/' in empty tag
86: * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
87: * </ul>
88: *
89: * @deprecated This class was deprecated in Xerces 2.6.2. It is
90: * recommended that new applications use JAXP's Transformation API
91: * for XML (TrAX) for serializing HTML. See the Xerces documentation
92: * for more information.
93: * @version $Revision$ $Date$
94: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
95: * @see Serializer
96: */
97: @SuppressWarnings("all")
98: public class HTMLSerializer
99: extends BaseMarkupSerializer
100: {
101:
102:
103: /**
104: * True if serializing in XHTML format.
105: */
106: private boolean _xhtml;
107:
108:
109: public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml";
110:
111: // for users to override XHTMLNamespace if need be.
112: private String fUserXHTMLNamespace = null;
113:
114:
115: /**
116: * Constructs a new HTML/XHTML serializer depending on the value of
117: * <tt>xhtml</tt>. The serializer cannot be used without calling
118: * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
119: *
120: * @param xhtml True if XHTML serializing
121: */
122: protected HTMLSerializer( boolean xhtml, OutputFormat format )
123: {
124: super( format );
125: _xhtml = xhtml;
126: }
127:
128:
129: /**
130: * Constructs a new serializer. The serializer cannot be used without
131: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
132: * first.
133: */
134: public HTMLSerializer()
135: {
136: this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
137: }
138:
139:
140: /**
141: * Constructs a new serializer. The serializer cannot be used without
142: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
143: * first.
144: */
145: public HTMLSerializer( OutputFormat format )
146: {
147:• this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
148: }
149:
150:
151:
152: /**
153: * Constructs a new serializer that writes to the specified writer
154: * using the specified output format. If <tt>format</tt> is null,
155: * will use a default output format.
156: *
157: * @param writer The writer to use
158: * @param format The output format to use, null for the default
159: */
160: public HTMLSerializer( Writer writer, OutputFormat format )
161: {
162:• this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
163: setOutputCharStream( writer );
164: }
165:
166:
167: /**
168: * Constructs a new serializer that writes to the specified output
169: * stream using the specified output format. If <tt>format</tt>
170: * is null, will use a default output format.
171: *
172: * @param output The output stream to use
173: * @param format The output format to use, null for the default
174: */
175: public HTMLSerializer( OutputStream output, OutputFormat format )
176: {
177:• this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
178: setOutputByteStream( output );
179: }
180:
181:
182: public void setOutputFormat( OutputFormat format )
183: {
184:• super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
185: }
186:
187: // Set value for alternate XHTML namespace.
188: public void setXHTMLNamespace(String newNamespace) {
189: fUserXHTMLNamespace = newNamespace;
190: } // setXHTMLNamespace(String)
191:
192: //-----------------------------------------//
193: // SAX content handler serializing methods //
194: //-----------------------------------------//
195:
196:
197: public void startElement( String namespaceURI, String localName,
198: String rawName, Attributes attrs )
199: throws SAXException
200: {
201: int i;
202: boolean preserveSpace;
203: ElementState state;
204: String name;
205: String value;
206: String htmlName;
207: boolean addNSAttr = false;
208:
209: try {
210:• if ( _printer == null )
211:         throw new IllegalStateException(
212:                                  DOMMessageFormatter.formatMessage(
213:                                  DOMMessageFormatter.SERIALIZER_DOMAIN,
214: "NoWriterSupplied", null));
215:
216: state = getElementState();
217:• if ( isDocumentState() ) {
218: // If this is the root element handle it differently.
219: // If the first root element in the document, serialize
220: // the document's DOCTYPE. Space preserving defaults
221: // to that of the output format.
222:• if ( ! _started )
223:• startDocument( (localName == null || localName.length() == 0)
224: ? rawName : localName );
225: } else {
226: // For any other element, if first in parent, then
227: // close parent's opening tag and use the parnet's
228: // space preserving.
229:• if ( state.empty )
230: _printer.printText( '>' );
231: // Indent this element on a new line if the first
232: // content of the parent element or immediately
233: // following an element.
234:• if ( _indenting && ! state.preserveSpace &&
235: ( state.empty || state.afterElement ) )
236: _printer.breakLine();
237: }
238: preserveSpace = state.preserveSpace;
239:
240: // Do not change the current element state yet.
241: // This only happens in endElement().
242:
243: // As per SAX2, the namespace URI is an empty string if the element has no
244: // namespace URI, or namespaces is turned off. The check against null protects
245: // against broken SAX implementations, so I've left it there. - mrglavas
246:• boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);
247:
248: // SAX2: rawName (QName) could be empty string if
249: // namespace-prefixes property is false.
250:• if ( rawName == null || rawName.length() == 0) {
251: rawName = localName;
252:• if ( hasNamespaceURI ) {
253: String prefix;
254: prefix = getPrefix( namespaceURI );
255:• if ( prefix != null && prefix.length() != 0 )
256: rawName = prefix + ":" + localName;
257: }
258: addNSAttr = true;
259: }
260:• if ( !hasNamespaceURI )
261: htmlName = rawName;
262: else {
263:• if ( namespaceURI.equals( XHTMLNamespace ) ||
264:• (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )
265: htmlName = localName;
266: else
267: htmlName = null;
268: }
269:
270: // XHTML: element names are lower case, DOM will be different
271: _printer.printText( '<' );
272:• if ( _xhtml )
273: _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );
274: else
275: _printer.printText( rawName );
276: _printer.indent();
277:
278: // For each attribute serialize it's name and value as one part,
279: // separated with a space so the element can be broken on
280: // multiple lines.
281:• if ( attrs != null ) {
282:• for ( i = 0 ; i < attrs.getLength() ; ++i ) {
283: _printer.printSpace();
284: name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);
285: value = attrs.getValue( i );
286:• if ( _xhtml || hasNamespaceURI ) {
287: // XHTML: print empty string for null values.
288:• if ( value == null ) {
289: _printer.printText( name );
290: _printer.printText( "=\"\"" );
291: } else {
292: _printer.printText( name );
293: _printer.printText( "=\"" );
294: printEscaped( value );
295: _printer.printText( '"' );
296: }
297: } else {
298: // HTML: Empty values print as attribute name, no value.
299: // HTML: URI attributes will print unescaped
300:• if ( value == null ) {
301: value = "";
302: }
303:• if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
304: _printer.printText( name );
305:• else if ( HTMLdtd.isURI( rawName, name ) ) {
306: _printer.printText( name );
307: _printer.printText( "=\"" );
308: _printer.printText( escapeURI( value ) );
309: _printer.printText( '"' );
310:• } else if ( HTMLdtd.isBoolean( rawName, name ) )
311: _printer.printText( name );
312: else {
313: _printer.printText( name );
314: _printer.printText( "=\"" );
315: printEscaped( value );
316: _printer.printText( '"' );
317: }
318: }
319: }
320: }
321:• if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
322: preserveSpace = true;
323:
324:• if ( addNSAttr ) {
325: Enumeration keys;
326:
327: // keys = _prefixes.keys();
328: keys = Collections.enumeration(_prefixes.keySet());
329:• while ( keys.hasMoreElements() ) {
330: _printer.printSpace();
331: value = (String) keys.nextElement();
332: name = (String) _prefixes.get( value );
333:• if ( name.length() == 0 ) {
334: _printer.printText( "xmlns=\"" );
335: printEscaped( value );
336: _printer.printText( '"' );
337: } else {
338: _printer.printText( "xmlns:" );
339: _printer.printText( name );
340: _printer.printText( "=\"" );
341: printEscaped( value );
342: _printer.printText( '"' );
343: }
344: }
345: }
346:
347: // Now it's time to enter a new element state
348: // with the tag name and space preserving.
349: // We still do not change the curent element state.
350: state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
351:
352: // Prevents line breaks inside A/TD
353:
354:• if ( HTMLdtd.isPreserveSpace(htmlName)) {
355: state.empty = false;
356: _printer.printText( '>' );
357: }
358:
359: // Handle SCRIPT and STYLE specifically by changing the
360: // state of the current element to CDATA (XHTML) or
361: // unescaped (HTML).
362:• if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
363:• rawName.equalsIgnoreCase( "STYLE" ) ) ) {
364:• if ( _xhtml ) {
365: // XHTML: Print contents as CDATA section
366: state.doCData = true;
367: } else {
368: // HTML: Print contents unescaped
369: state.unescaped = true;
370: }
371: }
372: } catch ( IOException except ) {
373: throw new SAXException( except );
374: }
375: }
376:
377:
378: public void endElement( String namespaceURI, String localName,
379: String rawName )
380: throws SAXException
381: {
382: try {
383: endElementIO( namespaceURI, localName, rawName );
384: } catch ( IOException except ) {
385: throw new SAXException( except );
386: }
387: }
388:
389:
390: public void endElementIO( String namespaceURI, String localName,
391: String rawName )
392: throws IOException
393: {
394: ElementState state;
395: String htmlName;
396:
397: // Works much like content() with additions for closing
398: // an element. Note the different checks for the closed
399: // element's state and the parent element's state.
400: _printer.unindent();
401: state = getElementState();
402:
403:• if ( state.namespaceURI == null || state.namespaceURI.length() == 0 )
404: htmlName = state.rawName;
405: else {
406:• if ( state.namespaceURI.equals( XHTMLNamespace ) ||
407:• (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) )
408: htmlName = state.localName;
409: else
410: htmlName = null;
411: }
412:
413:• if ( _xhtml) {
414:• if ( state.empty ) {
415: _printer.printText( " />" );
416: } else {
417: // Must leave CData section first
418:• if ( state.inCData )
419: _printer.printText( "]]>" );
420: // XHTML: element names are lower case, DOM will be different
421: _printer.printText( "</" );
422: _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) );
423: _printer.printText( '>' );
424: }
425: } else {
426:• if ( state.empty )
427: _printer.printText( '>' );
428: // This element is not empty and that last content was
429: // another element, so print a line break before that
430: // last element and this element's closing tag.
431: // [keith] Provided this is not an anchor.
432: // HTML: some elements do not print closing tag (e.g. LI)
433:• if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
434:• if ( _indenting && ! state.preserveSpace && state.afterElement )
435: _printer.breakLine();
436: // Must leave CData section first (Illegal in HTML, but still)
437:• if ( state.inCData )
438: _printer.printText( "]]>" );
439: _printer.printText( "</" );
440: _printer.printText( state.rawName );
441: _printer.printText( '>' );
442: }
443: }
444: // Leave the element state and update that of the parent
445: // (if we're not root) to not empty and after element.
446: state = leaveElementState();
447: // Temporary hack to prevent line breaks inside A/TD
448:• if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
449:• ! htmlName.equalsIgnoreCase( "TD" ) ) )
450:
451: state.afterElement = true;
452: state.empty = false;
453:• if ( isDocumentState() )
454: _printer.flush();
455: }
456:
457:
458: //------------------------------------------//
459: // SAX document handler serializing methods //
460: //------------------------------------------//
461:
462:
463: public void characters( char[] chars, int start, int length )
464: throws SAXException
465: {
466: ElementState state;
467:
468: try {
469: // HTML: no CDATA section
470: state = content();
471: state.doCData = false;
472: super.characters( chars, start, length );
473: } catch ( IOException except ) {
474: throw new SAXException( except );
475: }
476: }
477:
478:
479: public void startElement( String tagName, AttributeList attrs )
480: throws SAXException
481: {
482: int i;
483: boolean preserveSpace;
484: ElementState state;
485: String name;
486: String value;
487:
488: try {
489:• if ( _printer == null )
490: throw new IllegalStateException(
491:                                  DOMMessageFormatter.formatMessage(
492:                                  DOMMessageFormatter.SERIALIZER_DOMAIN,
493: "NoWriterSupplied", null));
494:
495:
496: state = getElementState();
497:• if ( isDocumentState() ) {
498: // If this is the root element handle it differently.
499: // If the first root element in the document, serialize
500: // the document's DOCTYPE. Space preserving defaults
501: // to that of the output format.
502:• if ( ! _started )
503: startDocument( tagName );
504: } else {
505: // For any other element, if first in parent, then
506: // close parent's opening tag and use the parnet's
507: // space preserving.
508:• if ( state.empty )
509: _printer.printText( '>' );
510: // Indent this element on a new line if the first
511: // content of the parent element or immediately
512: // following an element.
513:• if ( _indenting && ! state.preserveSpace &&
514: ( state.empty || state.afterElement ) )
515: _printer.breakLine();
516: }
517: preserveSpace = state.preserveSpace;
518:
519: // Do not change the current element state yet.
520: // This only happens in endElement().
521:
522: // XHTML: element names are lower case, DOM will be different
523: _printer.printText( '<' );
524:• if ( _xhtml )
525: _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
526: else
527: _printer.printText( tagName );
528: _printer.indent();
529:
530: // For each attribute serialize it's name and value as one part,
531: // separated with a space so the element can be broken on
532: // multiple lines.
533:• if ( attrs != null ) {
534:• for ( i = 0 ; i < attrs.getLength() ; ++i ) {
535: _printer.printSpace();
536: name = attrs.getName( i ).toLowerCase(Locale.ENGLISH);
537: value = attrs.getValue( i );
538:• if ( _xhtml ) {
539: // XHTML: print empty string for null values.
540:• if ( value == null ) {
541: _printer.printText( name );
542: _printer.printText( "=\"\"" );
543: } else {
544: _printer.printText( name );
545: _printer.printText( "=\"" );
546: printEscaped( value );
547: _printer.printText( '"' );
548: }
549: } else {
550: // HTML: Empty values print as attribute name, no value.
551: // HTML: URI attributes will print unescaped
552:• if ( value == null ) {
553: value = "";
554: }
555:• if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
556: _printer.printText( name );
557:• else if ( HTMLdtd.isURI( tagName, name ) ) {
558: _printer.printText( name );
559: _printer.printText( "=\"" );
560: _printer.printText( escapeURI( value ) );
561: _printer.printText( '"' );
562:• } else if ( HTMLdtd.isBoolean( tagName, name ) )
563: _printer.printText( name );
564: else {
565: _printer.printText( name );
566: _printer.printText( "=\"" );
567: printEscaped( value );
568: _printer.printText( '"' );
569: }
570: }
571: }
572: }
573:• if ( HTMLdtd.isPreserveSpace( tagName ) )
574: preserveSpace = true;
575:
576: // Now it's time to enter a new element state
577: // with the tag name and space preserving.
578: // We still do not change the curent element state.
579: state = enterElementState( null, null, tagName, preserveSpace );
580:
581:• if ( HTMLdtd.isPreserveSpace(tagName)) {
582: state.empty = false;
583: _printer.printText( '>' );
584: }
585:
586: // Handle SCRIPT and STYLE specifically by changing the
587: // state of the current element to CDATA (XHTML) or
588: // unescaped (HTML).
589:• if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
590:• tagName.equalsIgnoreCase( "STYLE" ) ) {
591:• if ( _xhtml ) {
592: // XHTML: Print contents as CDATA section
593: state.doCData = true;
594: } else {
595: // HTML: Print contents unescaped
596: state.unescaped = true;
597: }
598: }
599: } catch ( IOException except ) {
600: throw new SAXException( except );
601: }
602: }
603:
604:
605: public void endElement( String tagName )
606: throws SAXException
607: {
608: endElement( null, null, tagName );
609: }
610:
611:
612: //------------------------------------------//
613: // Generic node serializing methods methods //
614: //------------------------------------------//
615:
616:
617: /**
618: * Called to serialize the document's DOCTYPE by the root element.
619: * The document type declaration must name the root element,
620: * but the root element is only known when that element is serialized,
621: * and not at the start of the document.
622: * <p>
623: * This method will check if it has not been called before ({@link #_started}),
624: * will serialize the document type declaration, and will serialize all
625: * pre-root comments and PIs that were accumulated in the document
626: * (see {@link #serializePreRoot}). Pre-root will be serialized even if
627: * this is not the first root element of the document.
628: */
629: protected void startDocument( String rootTagName )
630: throws IOException
631: {
632: StringBuffer buffer;
633:
634: // Not supported in HTML/XHTML, but we still have to switch
635: // out of DTD mode.
636: _printer.leaveDTD();
637:• if ( ! _started ) {
638: // If the public and system identifiers were not specified
639: // in the output format, use the appropriate ones for HTML
640: // or XHTML.
641:• if ( _docTypePublicId == null && _docTypeSystemId == null ) {
642:• if ( _xhtml ) {
643: _docTypePublicId = HTMLdtd.XHTMLPublicId;
644: _docTypeSystemId = HTMLdtd.XHTMLSystemId;
645: } else {
646: _docTypePublicId = HTMLdtd.HTMLPublicId;
647: _docTypeSystemId = HTMLdtd.HTMLSystemId;
648: }
649: }
650:
651:• if ( ! _format.getOmitDocumentType() ) {
652: // XHTML: If public identifier and system identifier
653: // specified, print them, else print just system identifier
654: // HTML: If public identifier specified, print it with
655: // system identifier, if specified.
656: // XHTML requires that all element names are lower case, so the
657: // root on the DOCTYPE must be 'html'. - mrglavas
658:• if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null ) ) {
659:• if (_xhtml) {
660: _printer.printText( "<!DOCTYPE html PUBLIC " );
661: }
662: else {
663: _printer.printText( "<!DOCTYPE HTML PUBLIC " );
664: }
665: printDoctypeURL( _docTypePublicId );
666:• if ( _docTypeSystemId != null ) {
667:• if ( _indenting ) {
668: _printer.breakLine();
669: _printer.printText( " " );
670: } else
671: _printer.printText( ' ' );
672: printDoctypeURL( _docTypeSystemId );
673: }
674: _printer.printText( '>' );
675: _printer.breakLine();
676:• } else if ( _docTypeSystemId != null ) {
677:• if (_xhtml) {
678: _printer.printText( "<!DOCTYPE html SYSTEM " );
679: }
680: else {
681: _printer.printText( "<!DOCTYPE HTML SYSTEM " );
682: }
683: printDoctypeURL( _docTypeSystemId );
684: _printer.printText( '>' );
685: _printer.breakLine();
686: }
687: }
688: }
689:
690: _started = true;
691: // Always serialize these, even if not te first root element.
692: serializePreRoot();
693: }
694:
695:
696: /**
697: * Called to serialize a DOM element. Equivalent to calling {@link
698: * #startElement}, {@link #endElement} and serializing everything
699: * inbetween, but better optimized.
700: */
701: protected void serializeElement( Element elem )
702: throws IOException
703: {
704: Attr attr;
705: NamedNodeMap attrMap;
706: int i;
707: Node child;
708: ElementState state;
709: boolean preserveSpace;
710: String name;
711: String value;
712: String tagName;
713:
714: tagName = elem.getTagName();
715: state = getElementState();
716:• if ( isDocumentState() ) {
717: // If this is the root element handle it differently.
718: // If the first root element in the document, serialize
719: // the document's DOCTYPE. Space preserving defaults
720: // to that of the output format.
721:• if ( ! _started )
722: startDocument( tagName );
723: } else {
724: // For any other element, if first in parent, then
725: // close parent's opening tag and use the parnet's
726: // space preserving.
727:• if ( state.empty )
728: _printer.printText( '>' );
729: // Indent this element on a new line if the first
730: // content of the parent element or immediately
731: // following an element.
732:• if ( _indenting && ! state.preserveSpace &&
733: ( state.empty || state.afterElement ) )
734: _printer.breakLine();
735: }
736: preserveSpace = state.preserveSpace;
737:
738: // Do not change the current element state yet.
739: // This only happens in endElement().
740:
741: // XHTML: element names are lower case, DOM will be different
742: _printer.printText( '<' );
743:• if ( _xhtml )
744: _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
745: else
746: _printer.printText( tagName );
747: _printer.indent();
748:
749: // Lookup the element's attribute, but only print specified
750: // attributes. (Unspecified attributes are derived from the DTD.
751: // For each attribute print it's name and value as one part,
752: // separated with a space so the element can be broken on
753: // multiple lines.
754: attrMap = elem.getAttributes();
755:• if ( attrMap != null ) {
756:• for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
757: attr = (Attr) attrMap.item( i );
758: name = attr.getName().toLowerCase(Locale.ENGLISH);
759: value = attr.getValue();
760:• if ( attr.getSpecified() ) {
761: _printer.printSpace();
762:• if ( _xhtml ) {
763: // XHTML: print empty string for null values.
764:• if ( value == null ) {
765: _printer.printText( name );
766: _printer.printText( "=\"\"" );
767: } else {
768: _printer.printText( name );
769: _printer.printText( "=\"" );
770: printEscaped( value );
771: _printer.printText( '"' );
772: }
773: } else {
774: // HTML: Empty values print as attribute name, no value.
775: // HTML: URI attributes will print unescaped
776:• if ( value == null ) {
777: value = "";
778: }
779:• if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
780: _printer.printText( name );
781:• else if ( HTMLdtd.isURI( tagName, name ) ) {
782: _printer.printText( name );
783: _printer.printText( "=\"" );
784: _printer.printText( escapeURI( value ) );
785: _printer.printText( '"' );
786:• } else if ( HTMLdtd.isBoolean( tagName, name ) )
787: _printer.printText( name );
788: else {
789: _printer.printText( name );
790: _printer.printText( "=\"" );
791: printEscaped( value );
792: _printer.printText( '"' );
793: }
794: }
795: }
796: }
797: }
798:• if ( HTMLdtd.isPreserveSpace( tagName ) )
799: preserveSpace = true;
800:
801: // If element has children, or if element is not an empty tag,
802: // serialize an opening tag.
803:• if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
804: // Enter an element state, and serialize the children
805: // one by one. Finally, end the element.
806: state = enterElementState( null, null, tagName, preserveSpace );
807:
808:• if ( HTMLdtd.isPreserveSpace(tagName)) {
809: state.empty = false;
810: _printer.printText( '>' );
811: }
812:
813: // Handle SCRIPT and STYLE specifically by changing the
814: // state of the current element to CDATA (XHTML) or
815: // unescaped (HTML).
816:• if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
817:• tagName.equalsIgnoreCase( "STYLE" ) ) {
818:• if ( _xhtml ) {
819: // XHTML: Print contents as CDATA section
820: state.doCData = true;
821: } else {
822: // HTML: Print contents unescaped
823: state.unescaped = true;
824: }
825: }
826: child = elem.getFirstChild();
827:• while ( child != null ) {
828: serializeNode( child );
829: child = child.getNextSibling();
830: }
831: endElementIO( null, null, tagName );
832: } else {
833: _printer.unindent();
834: // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
835: // HTML: Empty tags are defined as such in DTD no in document.
836:• if ( _xhtml )
837: _printer.printText( " />" );
838: else
839: _printer.printText( '>' );
840: // After element but parent element is no longer empty.
841: state.afterElement = true;
842: state.empty = false;
843:• if ( isDocumentState() )
844: _printer.flush();
845: }
846: }
847:
848:
849:
850: protected void characters( String text )
851: throws IOException
852: {
853: ElementState state;
854:
855: // HTML: no CDATA section
856: state = content();
857: super.characters( text );
858: }
859:
860:
861: protected String getEntityRef( int ch )
862: {
863: return HTMLdtd.fromChar( ch );
864: }
865:
866:
867: protected String escapeURI( String uri )
868: {
869: int index;
870:
871: // XXX Apparently Netscape doesn't like if we escape the URI
872: // using %nn, so we leave it as is, just remove any quotes.
873: index = uri.indexOf( "\"" );
874:• if ( index >= 0 )
875: return uri.substring( 0, index );
876: else
877: return uri;
878: }
879:
880:
881: }
882:
883:
884:
885: