blob: 26c48e29773c1d5a086cf6be1d4ca730538b826c [file] [log] [blame]
Shawn Pearce743ca742015-02-11 13:05:05 -08001// Copyright 2015 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package com.google.gitiles.doc.html;
16
17import static com.google.common.base.Preconditions.checkArgument;
18import static com.google.common.base.Preconditions.checkState;
19
20import com.google.common.base.Strings;
21import com.google.common.collect.ImmutableSet;
22import com.google.template.soy.data.SanitizedContent;
23import com.google.template.soy.data.SanitizedContent.ContentKind;
24import com.google.template.soy.data.UnsafeSanitizedContentOrdainer;
25import com.google.template.soy.shared.restricted.EscapingConventions.EscapeHtml;
26import com.google.template.soy.shared.restricted.EscapingConventions.FilterImageDataUri;
27import com.google.template.soy.shared.restricted.EscapingConventions.FilterNormalizeUri;
Shawn Pearce743ca742015-02-11 13:05:05 -080028import java.io.IOException;
29import java.util.regex.Pattern;
30
31/**
32 * Builds a document fragment using a restricted subset of HTML.
33 * <p>
34 * Most attributes are rejected ({@code style}, {@code onclick}, ...) by
35 * throwing IllegalArgumentException if the caller attempts to add them to a
36 * pending element.
37 * <p>
38 * Useful but critical attributes like {@code href} on anchors or {@code src} on
39 * img permit only safe subset of URIs, primarily {@code http://},
40 * {@code https://}, and for image src {@code data:image/*;base64,...}.
41 */
42public final class HtmlBuilder {
Han-Wen Nienhuysc0200f62016-05-02 17:34:51 +020043 private static final ImmutableSet<String> ALLOWED_TAGS =
44 ImmutableSet.of(
45 "h1",
46 "h2",
47 "h3",
48 "h4",
49 "h5",
50 "h6",
51 "a",
52 "div",
53 "img",
54 "p",
55 "blockquote",
56 "pre",
57 "ol",
58 "ul",
59 "li",
60 "dl",
61 "dd",
62 "dt",
63 "del",
64 "em",
65 "strong",
66 "code",
67 "br",
68 "hr",
69 "table",
70 "thead",
71 "tbody",
72 "caption",
73 "tr",
74 "th",
75 "td",
76 "iframe",
77 "span");
Shawn Pearce743ca742015-02-11 13:05:05 -080078
Han-Wen Nienhuysc0200f62016-05-02 17:34:51 +020079 private static final ImmutableSet<String> ALLOWED_ATTRIBUTES =
80 ImmutableSet.of("id", "class", "role");
Shawn Pearce743ca742015-02-11 13:05:05 -080081
Han-Wen Nienhuysc0200f62016-05-02 17:34:51 +020082 private static final ImmutableSet<String> SELF_CLOSING_TAGS = ImmutableSet.of("img", "br", "hr");
Shawn Pearce743ca742015-02-11 13:05:05 -080083
84 private static final FilterNormalizeUri URI = FilterNormalizeUri.INSTANCE;
85 private static final FilterImageDataUri IMAGE_DATA = FilterImageDataUri.INSTANCE;
86
Shawn Pearce532b62f2016-06-05 12:20:38 -070087 private static final Pattern GIT_URI =
88 Pattern.compile(
89 "^"
90 +
91 // Reject paths containing /../ or ending in /..
92 "(?![^#?]*/(?:\\.|%2E){2}(?:[/?#]|\\z))"
93 +
94 // Accept git://host/path
95 "git://[^/]+/.+",
96 Pattern.CASE_INSENSITIVE);
97
Shawn Pearceee0b06e2015-02-13 00:13:01 -080098 public static boolean isValidCssDimension(String val) {
99 return val != null && val.matches("(?:[1-9][0-9]*px|100%|[1-9][0-9]?%)");
100 }
101
102 public static boolean isValidHttpUri(String val) {
Han-Wen Nienhuysc0200f62016-05-02 17:34:51 +0200103 return (val.startsWith("https://") || val.startsWith("http://") || val.startsWith("//"))
Shawn Pearceee0b06e2015-02-13 00:13:01 -0800104 && URI.getValueFilter().matcher(val).find();
105 }
106
Shawn Pearce99cdbce2015-02-10 12:05:45 -0800107 /** Check if URL is valid for {@code <img src="data:image/*;base64,...">}. */
108 public static boolean isImageDataUri(String url) {
109 return IMAGE_DATA.getValueFilter().matcher(url).find();
110 }
111
Shawn Pearce532b62f2016-06-05 12:20:38 -0700112 public static boolean isValidGitUri(String val) {
113 return GIT_URI.matcher(val).find();
114 }
115
Shawn Pearce743ca742015-02-11 13:05:05 -0800116 private final StringBuilder htmlBuf;
117 private final Appendable textBuf;
118 private String tag;
119
120 public HtmlBuilder() {
121 htmlBuf = new StringBuilder();
122 textBuf = EscapeHtml.INSTANCE.escape(htmlBuf);
123 }
124
125 /** Begin a new HTML tag. */
126 public HtmlBuilder open(String tagName) {
127 checkArgument(ALLOWED_TAGS.contains(tagName), "invalid HTML tag %s", tagName);
128 finishActiveTag();
129 htmlBuf.append('<').append(tagName);
130 tag = tagName;
131 return this;
132 }
133
134 /** Filter and append an attribute to the last tag. */
135 public HtmlBuilder attribute(String att, String val) {
136 if (Strings.isNullOrEmpty(val)) {
137 return this;
138 } else if ("href".equals(att) && "a".equals(tag)) {
139 val = anchorHref(val);
140 } else if ("src".equals(att) && "img".equals(tag)) {
141 val = imgSrc(val);
Shawn Pearceee0b06e2015-02-13 00:13:01 -0800142 } else if ("src".equals(att) && "iframe".equals(tag)) {
143 if (!isValidHttpUri(val)) {
144 return this;
145 }
146 val = URI.escape(val);
147 } else if (("height".equals(att) || "width".equals(att)) && "iframe".equals(tag)) {
148 val = isValidCssDimension(val) ? val : "250px";
Shawn Pearce743ca742015-02-11 13:05:05 -0800149 } else if ("alt".equals(att) && "img".equals(tag)) {
150 // allow
151 } else if ("title".equals(att) && ("img".equals(tag) || "a".equals(tag))) {
152 // allow
Shawn Pearce25d91962015-06-22 15:35:36 -0700153 } else if ("name".equals(att) && "a".equals(tag)) {
154 // allow
Shawn Pearce12c8fab2016-05-15 16:55:21 -0700155 } else if ("start".equals(att) && "ol".equals(tag)) {
156 // allow
Shawn Pearce743ca742015-02-11 13:05:05 -0800157 } else if (("colspan".equals(att) || "align".equals(att))
158 && ("td".equals(tag) || "th".equals(tag))) {
159 // allow
160 } else {
161 checkState(tag != null, "tag must be pending");
162 checkArgument(ALLOWED_ATTRIBUTES.contains(att), "invalid attribute %s", att);
163 }
164
165 try {
166 htmlBuf.append(' ').append(att).append("=\"");
167 textBuf.append(val);
168 htmlBuf.append('"');
169 return this;
170 } catch (IOException e) {
171 throw new IllegalStateException(e);
172 }
173 }
174
175 private String anchorHref(String val) {
Shawn Pearce532b62f2016-06-05 12:20:38 -0700176 if (URI.getValueFilter().matcher(val).find() || isValidGitUri(val)) {
Shawn Pearce743ca742015-02-11 13:05:05 -0800177 return URI.escape(val);
178 }
179 return URI.getInnocuousOutput();
180 }
181
182 private static String imgSrc(String val) {
Shawn Pearceee0b06e2015-02-13 00:13:01 -0800183 if (isValidHttpUri(val)) {
Shawn Pearce743ca742015-02-11 13:05:05 -0800184 return URI.escape(val);
185 }
Shawn Pearce99cdbce2015-02-10 12:05:45 -0800186 if (isImageDataUri(val)) {
Shawn Pearce743ca742015-02-11 13:05:05 -0800187 return val; // pass through data:image/*;base64,...
188 }
189 return IMAGE_DATA.getInnocuousOutput();
190 }
191
192 private void finishActiveTag() {
193 if (tag != null) {
194 if (SELF_CLOSING_TAGS.contains(tag)) {
195 htmlBuf.append(" />");
196 } else {
197 htmlBuf.append('>');
198 }
199 tag = null;
200 }
201 }
202
203 /** Close an open tag with {@code </tag>} */
204 public HtmlBuilder close(String tag) {
Shawn Pearceb304a052015-02-12 21:39:55 -0800205 checkArgument(
Han-Wen Nienhuysc0200f62016-05-02 17:34:51 +0200206 ALLOWED_TAGS.contains(tag) && !SELF_CLOSING_TAGS.contains(tag), "invalid HTML tag %s", tag);
Shawn Pearce743ca742015-02-11 13:05:05 -0800207
208 finishActiveTag();
209 htmlBuf.append("</").append(tag).append('>');
210 return this;
211 }
212
213 /** Escapes and appends any text as a child of the current element. */
214 public HtmlBuilder appendAndEscape(CharSequence in) {
215 try {
216 finishActiveTag();
217 textBuf.append(in);
218 return this;
219 } catch (IOException e) {
220 throw new IllegalStateException(e);
221 }
222 }
223
Shawn Pearce12c8fab2016-05-15 16:55:21 -0700224 /** Append a space outside of an element. */
225 public HtmlBuilder space() {
226 finishActiveTag();
227 htmlBuf.append(' ');
228 return this;
229 }
230
Shawn Pearce743ca742015-02-11 13:05:05 -0800231 private static final Pattern HTML_ENTITY = Pattern.compile("&[a-z]+;");
232
233 /** Append constant entity reference like {@code &nbsp;}. */
234 public void entity(String entity) {
235 checkArgument(HTML_ENTITY.matcher(entity).matches(), "invalid entity %s", entity);
236 finishActiveTag();
237 htmlBuf.append(entity);
238 }
239
240 /** Bless the current content as HTML. */
241 public SanitizedContent toSoy() {
242 finishActiveTag();
Han-Wen Nienhuysc0200f62016-05-02 17:34:51 +0200243 return UnsafeSanitizedContentOrdainer.ordainAsSafe(htmlBuf.toString(), ContentKind.HTML);
Shawn Pearce743ca742015-02-11 13:05:05 -0800244 }
245}