Markdown: add restricted HtmlBuilder to produce dynamic HTML Construct a very limited subset of HTML in a buffer and convert it to SantizedContent for insertion into a Soy template without additional escaping. Change-Id: I562ca3d2b8cc7a92fa51edf699536aba4c9b68f9
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java new file mode 100644 index 0000000..c80c28c --- /dev/null +++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java
@@ -0,0 +1,175 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gitiles.doc.html; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; + +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableSet; +import com.google.template.soy.data.SanitizedContent; +import com.google.template.soy.data.SanitizedContent.ContentKind; +import com.google.template.soy.data.UnsafeSanitizedContentOrdainer; +import com.google.template.soy.shared.restricted.EscapingConventions.EscapeHtml; +import com.google.template.soy.shared.restricted.EscapingConventions.FilterImageDataUri; +import com.google.template.soy.shared.restricted.EscapingConventions.FilterNormalizeUri; + +import java.io.IOException; +import java.util.regex.Pattern; + +/** + * Builds a document fragment using a restricted subset of HTML. + * <p> + * Most attributes are rejected ({@code style}, {@code onclick}, ...) by + * throwing IllegalArgumentException if the caller attempts to add them to a + * pending element. + * <p> + * Useful but critical attributes like {@code href} on anchors or {@code src} on + * img permit only safe subset of URIs, primarily {@code http://}, + * {@code https://}, and for image src {@code data:image/*;base64,...}. + */ +public final class HtmlBuilder { + private static final ImmutableSet<String> ALLOWED_TAGS = ImmutableSet.of( + "h1", "h2", "h3", "h4", "h5", "h6", + "a", "div", "img", "p", "blockquote", "pre", + "ol", "ul", "li", "dl", "dd", "dt", + "del", "em", "strong", "code", "br", "hr", + "table", "thead", "tbody", "caption", "tr", "th", "td" + ); + + private static final ImmutableSet<String> ALLOWED_ATTRIBUTES = ImmutableSet.of( + "id", "class", "role"); + + private static final ImmutableSet<String> SELF_CLOSING_TAGS = ImmutableSet.of( + "img", "br", "hr"); + + private static final FilterNormalizeUri URI = FilterNormalizeUri.INSTANCE; + private static final FilterImageDataUri IMAGE_DATA = FilterImageDataUri.INSTANCE; + + private final StringBuilder htmlBuf; + private final Appendable textBuf; + private String tag; + + public HtmlBuilder() { + htmlBuf = new StringBuilder(); + textBuf = EscapeHtml.INSTANCE.escape(htmlBuf); + } + + /** Begin a new HTML tag. */ + public HtmlBuilder open(String tagName) { + checkArgument(ALLOWED_TAGS.contains(tagName), "invalid HTML tag %s", tagName); + finishActiveTag(); + htmlBuf.append('<').append(tagName); + tag = tagName; + return this; + } + + /** Filter and append an attribute to the last tag. */ + public HtmlBuilder attribute(String att, String val) { + if (Strings.isNullOrEmpty(val)) { + return this; + } else if ("href".equals(att) && "a".equals(tag)) { + val = anchorHref(val); + } else if ("src".equals(att) && "img".equals(tag)) { + val = imgSrc(val); + } else if ("alt".equals(att) && "img".equals(tag)) { + // allow + } else if ("title".equals(att) && ("img".equals(tag) || "a".equals(tag))) { + // allow + } else if (("colspan".equals(att) || "align".equals(att)) + && ("td".equals(tag) || "th".equals(tag))) { + // allow + } else { + checkState(tag != null, "tag must be pending"); + checkArgument(ALLOWED_ATTRIBUTES.contains(att), "invalid attribute %s", att); + } + + try { + htmlBuf.append(' ').append(att).append("=\""); + textBuf.append(val); + htmlBuf.append('"'); + return this; + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + private String anchorHref(String val) { + if (URI.getValueFilter().matcher(val).find()) { + return URI.escape(val); + } + return URI.getInnocuousOutput(); + } + + private static String imgSrc(String val) { + if ((val.startsWith("http:") || val.startsWith("https:")) + && URI.getValueFilter().matcher(val).find()) { + return URI.escape(val); + } + if (IMAGE_DATA.getValueFilter().matcher(val).find()) { + return val; // pass through data:image/*;base64,... + } + return IMAGE_DATA.getInnocuousOutput(); + } + + private void finishActiveTag() { + if (tag != null) { + if (SELF_CLOSING_TAGS.contains(tag)) { + htmlBuf.append(" />"); + } else { + htmlBuf.append('>'); + } + tag = null; + } + } + + /** Close an open tag with {@code </tag>} */ + public HtmlBuilder close(String tag) { + checkArgument(ALLOWED_TAGS.contains(tag) && !SELF_CLOSING_TAGS.equals(tag), + "invalid HTML tag %s", tag); + + finishActiveTag(); + htmlBuf.append("</").append(tag).append('>'); + return this; + } + + /** Escapes and appends any text as a child of the current element. */ + public HtmlBuilder appendAndEscape(CharSequence in) { + try { + finishActiveTag(); + textBuf.append(in); + return this; + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + private static final Pattern HTML_ENTITY = Pattern.compile("&[a-z]+;"); + + /** Append constant entity reference like {@code }. */ + public void entity(String entity) { + checkArgument(HTML_ENTITY.matcher(entity).matches(), "invalid entity %s", entity); + finishActiveTag(); + htmlBuf.append(entity); + } + + /** Bless the current content as HTML. */ + public SanitizedContent toSoy() { + finishActiveTag(); + return UnsafeSanitizedContentOrdainer.ordainAsSafe( + htmlBuf.toString(), + ContentKind.HTML); + } +}