| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 1 | // Copyright 2015 Google Inc. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package com.google.gitiles.doc.html; |
| 16 | |
| 17 | import static com.google.common.base.Preconditions.checkArgument; |
| 18 | import static com.google.common.base.Preconditions.checkState; |
| 19 | |
| 20 | import com.google.common.base.Strings; |
| 21 | import com.google.common.collect.ImmutableSet; |
| 22 | import com.google.template.soy.data.SanitizedContent; |
| 23 | import com.google.template.soy.data.SanitizedContent.ContentKind; |
| 24 | import com.google.template.soy.data.UnsafeSanitizedContentOrdainer; |
| 25 | import com.google.template.soy.shared.restricted.EscapingConventions.EscapeHtml; |
| 26 | import com.google.template.soy.shared.restricted.EscapingConventions.FilterImageDataUri; |
| 27 | import com.google.template.soy.shared.restricted.EscapingConventions.FilterNormalizeUri; |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 28 | import java.io.IOException; |
| 29 | import java.util.regex.Pattern; |
| 30 | |
| 31 | /** |
| 32 | * Builds a document fragment using a restricted subset of HTML. |
| 33 | * <p> |
| 34 | * Most attributes are rejected ({@code style}, {@code onclick}, ...) by |
| 35 | * throwing IllegalArgumentException if the caller attempts to add them to a |
| 36 | * pending element. |
| 37 | * <p> |
| 38 | * Useful but critical attributes like {@code href} on anchors or {@code src} on |
| 39 | * img permit only safe subset of URIs, primarily {@code http://}, |
| 40 | * {@code https://}, and for image src {@code data:image/*;base64,...}. |
| 41 | */ |
| 42 | public final class HtmlBuilder { |
| Han-Wen Nienhuys | c0200f6 | 2016-05-02 17:34:51 +0200 | [diff] [blame] | 43 | private static final ImmutableSet<String> ALLOWED_TAGS = |
| 44 | ImmutableSet.of( |
| 45 | "h1", |
| 46 | "h2", |
| 47 | "h3", |
| 48 | "h4", |
| 49 | "h5", |
| 50 | "h6", |
| 51 | "a", |
| 52 | "div", |
| 53 | "img", |
| 54 | "p", |
| 55 | "blockquote", |
| 56 | "pre", |
| 57 | "ol", |
| 58 | "ul", |
| 59 | "li", |
| 60 | "dl", |
| 61 | "dd", |
| 62 | "dt", |
| 63 | "del", |
| 64 | "em", |
| 65 | "strong", |
| 66 | "code", |
| 67 | "br", |
| 68 | "hr", |
| 69 | "table", |
| 70 | "thead", |
| 71 | "tbody", |
| 72 | "caption", |
| 73 | "tr", |
| 74 | "th", |
| 75 | "td", |
| 76 | "iframe", |
| 77 | "span"); |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 78 | |
| Han-Wen Nienhuys | c0200f6 | 2016-05-02 17:34:51 +0200 | [diff] [blame] | 79 | private static final ImmutableSet<String> ALLOWED_ATTRIBUTES = |
| 80 | ImmutableSet.of("id", "class", "role"); |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 81 | |
| Han-Wen Nienhuys | c0200f6 | 2016-05-02 17:34:51 +0200 | [diff] [blame] | 82 | private static final ImmutableSet<String> SELF_CLOSING_TAGS = ImmutableSet.of("img", "br", "hr"); |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 83 | |
| 84 | private static final FilterNormalizeUri URI = FilterNormalizeUri.INSTANCE; |
| 85 | private static final FilterImageDataUri IMAGE_DATA = FilterImageDataUri.INSTANCE; |
| 86 | |
| Shawn Pearce | 532b62f | 2016-06-05 12:20:38 -0700 | [diff] [blame] | 87 | private static final Pattern GIT_URI = |
| 88 | Pattern.compile( |
| 89 | "^" |
| 90 | + |
| 91 | // Reject paths containing /../ or ending in /.. |
| 92 | "(?![^#?]*/(?:\\.|%2E){2}(?:[/?#]|\\z))" |
| 93 | + |
| 94 | // Accept git://host/path |
| 95 | "git://[^/]+/.+", |
| 96 | Pattern.CASE_INSENSITIVE); |
| 97 | |
| Shawn Pearce | ee0b06e | 2015-02-13 00:13:01 -0800 | [diff] [blame] | 98 | public static boolean isValidCssDimension(String val) { |
| 99 | return val != null && val.matches("(?:[1-9][0-9]*px|100%|[1-9][0-9]?%)"); |
| 100 | } |
| 101 | |
| 102 | public static boolean isValidHttpUri(String val) { |
| Han-Wen Nienhuys | c0200f6 | 2016-05-02 17:34:51 +0200 | [diff] [blame] | 103 | return (val.startsWith("https://") || val.startsWith("http://") || val.startsWith("//")) |
| Shawn Pearce | ee0b06e | 2015-02-13 00:13:01 -0800 | [diff] [blame] | 104 | && URI.getValueFilter().matcher(val).find(); |
| 105 | } |
| 106 | |
| Shawn Pearce | 99cdbce | 2015-02-10 12:05:45 -0800 | [diff] [blame] | 107 | /** Check if URL is valid for {@code <img src="data:image/*;base64,...">}. */ |
| 108 | public static boolean isImageDataUri(String url) { |
| 109 | return IMAGE_DATA.getValueFilter().matcher(url).find(); |
| 110 | } |
| 111 | |
| Shawn Pearce | 532b62f | 2016-06-05 12:20:38 -0700 | [diff] [blame] | 112 | public static boolean isValidGitUri(String val) { |
| 113 | return GIT_URI.matcher(val).find(); |
| 114 | } |
| 115 | |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 116 | private final StringBuilder htmlBuf; |
| 117 | private final Appendable textBuf; |
| 118 | private String tag; |
| 119 | |
| 120 | public HtmlBuilder() { |
| 121 | htmlBuf = new StringBuilder(); |
| 122 | textBuf = EscapeHtml.INSTANCE.escape(htmlBuf); |
| 123 | } |
| 124 | |
| 125 | /** Begin a new HTML tag. */ |
| 126 | public HtmlBuilder open(String tagName) { |
| 127 | checkArgument(ALLOWED_TAGS.contains(tagName), "invalid HTML tag %s", tagName); |
| 128 | finishActiveTag(); |
| 129 | htmlBuf.append('<').append(tagName); |
| 130 | tag = tagName; |
| 131 | return this; |
| 132 | } |
| 133 | |
| 134 | /** Filter and append an attribute to the last tag. */ |
| 135 | public HtmlBuilder attribute(String att, String val) { |
| 136 | if (Strings.isNullOrEmpty(val)) { |
| 137 | return this; |
| 138 | } else if ("href".equals(att) && "a".equals(tag)) { |
| 139 | val = anchorHref(val); |
| 140 | } else if ("src".equals(att) && "img".equals(tag)) { |
| 141 | val = imgSrc(val); |
| Shawn Pearce | ee0b06e | 2015-02-13 00:13:01 -0800 | [diff] [blame] | 142 | } else if ("src".equals(att) && "iframe".equals(tag)) { |
| 143 | if (!isValidHttpUri(val)) { |
| 144 | return this; |
| 145 | } |
| 146 | val = URI.escape(val); |
| 147 | } else if (("height".equals(att) || "width".equals(att)) && "iframe".equals(tag)) { |
| 148 | val = isValidCssDimension(val) ? val : "250px"; |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 149 | } else if ("alt".equals(att) && "img".equals(tag)) { |
| 150 | // allow |
| 151 | } else if ("title".equals(att) && ("img".equals(tag) || "a".equals(tag))) { |
| 152 | // allow |
| Shawn Pearce | 25d9196 | 2015-06-22 15:35:36 -0700 | [diff] [blame] | 153 | } else if ("name".equals(att) && "a".equals(tag)) { |
| 154 | // allow |
| Shawn Pearce | 12c8fab | 2016-05-15 16:55:21 -0700 | [diff] [blame] | 155 | } else if ("start".equals(att) && "ol".equals(tag)) { |
| 156 | // allow |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 157 | } else if (("colspan".equals(att) || "align".equals(att)) |
| 158 | && ("td".equals(tag) || "th".equals(tag))) { |
| 159 | // allow |
| 160 | } else { |
| 161 | checkState(tag != null, "tag must be pending"); |
| 162 | checkArgument(ALLOWED_ATTRIBUTES.contains(att), "invalid attribute %s", att); |
| 163 | } |
| 164 | |
| 165 | try { |
| 166 | htmlBuf.append(' ').append(att).append("=\""); |
| 167 | textBuf.append(val); |
| 168 | htmlBuf.append('"'); |
| 169 | return this; |
| 170 | } catch (IOException e) { |
| 171 | throw new IllegalStateException(e); |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | private String anchorHref(String val) { |
| Shawn Pearce | 532b62f | 2016-06-05 12:20:38 -0700 | [diff] [blame] | 176 | if (URI.getValueFilter().matcher(val).find() || isValidGitUri(val)) { |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 177 | return URI.escape(val); |
| 178 | } |
| 179 | return URI.getInnocuousOutput(); |
| 180 | } |
| 181 | |
| 182 | private static String imgSrc(String val) { |
| Shawn Pearce | ee0b06e | 2015-02-13 00:13:01 -0800 | [diff] [blame] | 183 | if (isValidHttpUri(val)) { |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 184 | return URI.escape(val); |
| 185 | } |
| Shawn Pearce | 99cdbce | 2015-02-10 12:05:45 -0800 | [diff] [blame] | 186 | if (isImageDataUri(val)) { |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 187 | return val; // pass through data:image/*;base64,... |
| 188 | } |
| 189 | return IMAGE_DATA.getInnocuousOutput(); |
| 190 | } |
| 191 | |
| 192 | private void finishActiveTag() { |
| 193 | if (tag != null) { |
| 194 | if (SELF_CLOSING_TAGS.contains(tag)) { |
| 195 | htmlBuf.append(" />"); |
| 196 | } else { |
| 197 | htmlBuf.append('>'); |
| 198 | } |
| 199 | tag = null; |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | /** Close an open tag with {@code </tag>} */ |
| 204 | public HtmlBuilder close(String tag) { |
| Shawn Pearce | b304a05 | 2015-02-12 21:39:55 -0800 | [diff] [blame] | 205 | checkArgument( |
| Han-Wen Nienhuys | c0200f6 | 2016-05-02 17:34:51 +0200 | [diff] [blame] | 206 | ALLOWED_TAGS.contains(tag) && !SELF_CLOSING_TAGS.contains(tag), "invalid HTML tag %s", tag); |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 207 | |
| 208 | finishActiveTag(); |
| 209 | htmlBuf.append("</").append(tag).append('>'); |
| 210 | return this; |
| 211 | } |
| 212 | |
| 213 | /** Escapes and appends any text as a child of the current element. */ |
| 214 | public HtmlBuilder appendAndEscape(CharSequence in) { |
| 215 | try { |
| 216 | finishActiveTag(); |
| 217 | textBuf.append(in); |
| 218 | return this; |
| 219 | } catch (IOException e) { |
| 220 | throw new IllegalStateException(e); |
| 221 | } |
| 222 | } |
| 223 | |
| Shawn Pearce | 12c8fab | 2016-05-15 16:55:21 -0700 | [diff] [blame] | 224 | /** Append a space outside of an element. */ |
| 225 | public HtmlBuilder space() { |
| 226 | finishActiveTag(); |
| 227 | htmlBuf.append(' '); |
| 228 | return this; |
| 229 | } |
| 230 | |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 231 | private static final Pattern HTML_ENTITY = Pattern.compile("&[a-z]+;"); |
| 232 | |
| 233 | /** Append constant entity reference like {@code }. */ |
| 234 | public void entity(String entity) { |
| 235 | checkArgument(HTML_ENTITY.matcher(entity).matches(), "invalid entity %s", entity); |
| 236 | finishActiveTag(); |
| 237 | htmlBuf.append(entity); |
| 238 | } |
| 239 | |
| 240 | /** Bless the current content as HTML. */ |
| 241 | public SanitizedContent toSoy() { |
| 242 | finishActiveTag(); |
| Han-Wen Nienhuys | c0200f6 | 2016-05-02 17:34:51 +0200 | [diff] [blame] | 243 | return UnsafeSanitizedContentOrdainer.ordainAsSafe(htmlBuf.toString(), ContentKind.HTML); |
| Shawn Pearce | 743ca74 | 2015-02-11 13:05:05 -0800 | [diff] [blame] | 244 | } |
| 245 | } |