1 | // Copyright (C) 2007 Google Inc. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | package com.google.caja.parser.html; |
16 | |
17 | import com.google.caja.lexer.FilePosition; |
18 | import com.google.caja.lexer.HtmlTokenType; |
19 | import com.google.caja.lexer.Token; |
20 | import com.google.caja.reporting.MessageQueue; |
21 | |
22 | import java.util.List; |
23 | |
24 | import org.w3c.dom.Document; |
25 | import org.w3c.dom.DocumentFragment; |
26 | |
27 | /** |
28 | * Consumes SAX style events (tag name and attributes) from the |
29 | * {@link DomParser} to build a DomTree. |
30 | * |
31 | * <p> |
32 | * Instances of this class are not reusable over multiple parses. |
33 | * |
34 | * <p> |
35 | * The {@link OpenElementStack.Factory Factory} class has implementations of |
36 | * this interface for both |
37 | * {@link OpenElementStack.Factory#createHtml5ElementStack HTML} |
38 | * and a trivial one for all |
39 | * {@link OpenElementStack.Factory#createXmlElementStack XML} including XHTML. |
40 | * |
41 | * @see <a href="http://www.whatwg.org/specs/web-apps/current-work/">HTML5</a> |
42 | * @see <a href="http://www.w3.org/TR/REC-xml/">XML</a> |
43 | * @see <a href="http://james.html5.org/parsetree.html">HTML5 Validator</a> |
44 | * @see <a href="http://html5lib.googlecode.com/svn/trunk/testdata/">Tests</a> |
45 | * @see <a href="http://wiki.whatwg.org/wiki/Parser_tests">More Tests</a> |
46 | * |
47 | * @author mikesamuel@gmail.com |
48 | */ |
49 | public interface OpenElementStack { |
50 | /** The document used to create Nodes. */ |
51 | Document getDocument(); |
52 | |
53 | /** |
54 | * The root element. |
55 | */ |
56 | DocumentFragment getRootElement(); |
57 | |
58 | /** |
59 | * Records the fact that a tag has been seen, updating internal state |
60 | * |
61 | * @param start the token of the beginning of the tag, so {@code "<p"} for a |
62 | * paragraph start, {@code </p} for an end tag. |
63 | * @param end the token of the beginning of the tag, so {@code ">"} for a |
64 | * paragraph start, {@code />} for an unary break tag. |
65 | * @param attrs the attributes for the element. This will be empty |
66 | * for end tags. |
67 | */ |
68 | void processTag(Token<HtmlTokenType> start, Token<HtmlTokenType> end, |
69 | List<AttrStub> attrs) |
70 | throws IllegalDocumentStateException; |
71 | |
72 | /** |
73 | * Adds the given text node to the DOM. |
74 | */ |
75 | void processText(Token<HtmlTokenType> text); |
76 | |
77 | /** |
78 | * Adds the given comment node to the DOM. |
79 | */ |
80 | void processComment(Token<HtmlTokenType> comment); |
81 | |
82 | /** |
83 | * Called before parsing starts. |
84 | * |
85 | * @param isFragment true to parse a fragment, not a full html document. |
86 | */ |
87 | void open(boolean isFragment); |
88 | |
89 | /** |
90 | * Check that the document is in a consistent state, by checking that all |
91 | * elements that need to be closed, have been properly closed. |
92 | * |
93 | * This method may modify the DOM, e.g. by removing ignorable text nodes from |
94 | * the root to ensure a single document element. |
95 | * |
96 | * @param endOfFile position at which parsing ends. |
97 | */ |
98 | void finish(FilePosition endOfFile) |
99 | throws IllegalDocumentStateException; |
100 | |
101 | boolean needsNamespaceFixup(); |
102 | |
103 | /** |
104 | * Returns text with semicolons added to entities that lack them. This may |
105 | * emit {@link com.google.caja.reporting.MessageType#MALFORMED_HTML_ENTITY} |
106 | * messages about missing semicolons. |
107 | * @param textPos the position of rawText in the input. |
108 | */ |
109 | String fixBrokenEntities(String rawText, FilePosition textPos); |
110 | |
111 | /** |
112 | * Constructors. |
113 | */ |
114 | public static final class Factory { |
115 | /** |
116 | * @param doc the document used to create DOM nodes. |
117 | * @param needsDebugData see {@link DomParser#setNeedsDebugData(boolean)} |
118 | * @param mq receives parser warnings. |
119 | */ |
120 | public static OpenElementStack createHtml5ElementStack( |
121 | Document doc, boolean needsDebugData, MessageQueue mq) { |
122 | return new Html5ElementStack(doc, needsDebugData, mq); |
123 | } |
124 | |
125 | /** |
126 | * @param doc the document used to create DOM nodes. |
127 | * @param needsDebugData see {@link DomParser#setNeedsDebugData(boolean)} |
128 | * @param mq receives parser warnings. |
129 | */ |
130 | public static OpenElementStack createXmlElementStack( |
131 | Document doc, boolean needsDebugData, Namespaces ns, MessageQueue mq) { |
132 | return new XmlElementStack(doc, ns, needsDebugData, mq); |
133 | } |
134 | |
135 | private Factory() { /* no zero-argument ctor */ } |
136 | } |
137 | } |
138 | |