Markdown: add minimal support for AST to HTML Includes extension support to walk the AST and produce a table of contents from the contained HeaderNodes. Rendered HTML is blessed as SanitizedContent for Soy to include inside of templates without further escaping. Change-Id: Ib53e091617b39e22a230da9ec8a4aa39c3bf5c26
diff --git a/gitiles-servlet/BUCK b/gitiles-servlet/BUCK index 9fdacae..9e7a1e8 100644 --- a/gitiles-servlet/BUCK +++ b/gitiles-servlet/BUCK
@@ -2,10 +2,12 @@ RSRC = glob(['src/main/resources/**/*']) DEPS = [ '//lib:commons-lang', + '//lib:grappa', '//lib:gson', '//lib:guava', '//lib:joda-time', '//lib:jsr305', + '//lib:pegdown', '//lib:prettify', '//lib/jgit:jgit', '//lib/jgit:jgit-servlet',
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownHelper.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownHelper.java new file mode 100644 index 0000000..6d67505 --- /dev/null +++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownHelper.java
@@ -0,0 +1,53 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gitiles.doc; + +import com.google.common.base.Strings; + +import org.pegdown.ast.Node; +import org.pegdown.ast.TextNode; + +public class MarkdownHelper { + /** Check if anchor URL is like {@code /top.md}. */ + public static boolean isAbsolutePathToMarkdown(String url) { + return url.length() >= 5 + && url.charAt(0) == '/' && url.charAt(1) != '/' + && url.endsWith(".md"); + } + + /** Combine child nodes as string; this must be escaped for HTML. */ + public static String getInnerText(Node node) { + if (node == null || node.getChildren().isEmpty()) { + return null; + } + + StringBuilder b = new StringBuilder(); + appendTextFromChildren(b, node); + return Strings.emptyToNull(b.toString().trim()); + } + + private static void appendTextFromChildren(StringBuilder b, Node node) { + for (Node child : node.getChildren()) { + if (child instanceof TextNode) { + b.append(((TextNode) child).getText()); + } else { + appendTextFromChildren(b, child); + } + } + } + + private MarkdownHelper() { + } +}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java new file mode 100644 index 0000000..bad8ab7 --- /dev/null +++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
@@ -0,0 +1,418 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gitiles.doc; + +import static com.google.common.base.Preconditions.checkState; +import static com.google.gitiles.doc.MarkdownHelper.getInnerText; + +import com.google.gitiles.doc.html.HtmlBuilder; +import com.google.template.soy.data.SanitizedContent; +import com.google.template.soy.shared.restricted.EscapingConventions; + +import org.pegdown.ast.AbbreviationNode; +import org.pegdown.ast.AutoLinkNode; +import org.pegdown.ast.BlockQuoteNode; +import org.pegdown.ast.BulletListNode; +import org.pegdown.ast.CodeNode; +import org.pegdown.ast.DefinitionListNode; +import org.pegdown.ast.DefinitionNode; +import org.pegdown.ast.DefinitionTermNode; +import org.pegdown.ast.ExpImageNode; +import org.pegdown.ast.ExpLinkNode; +import org.pegdown.ast.HeaderNode; +import org.pegdown.ast.HtmlBlockNode; +import org.pegdown.ast.InlineHtmlNode; +import org.pegdown.ast.ListItemNode; +import org.pegdown.ast.MailLinkNode; +import org.pegdown.ast.Node; +import org.pegdown.ast.OrderedListNode; +import org.pegdown.ast.ParaNode; +import org.pegdown.ast.QuotedNode; +import org.pegdown.ast.RefImageNode; +import org.pegdown.ast.RefLinkNode; +import org.pegdown.ast.ReferenceNode; +import org.pegdown.ast.RootNode; +import org.pegdown.ast.SimpleNode; +import org.pegdown.ast.SpecialTextNode; +import org.pegdown.ast.StrikeNode; +import org.pegdown.ast.StrongEmphSuperNode; +import org.pegdown.ast.SuperNode; +import org.pegdown.ast.TableBodyNode; +import org.pegdown.ast.TableCaptionNode; +import org.pegdown.ast.TableCellNode; +import org.pegdown.ast.TableColumnNode; +import org.pegdown.ast.TableHeaderNode; +import org.pegdown.ast.TableNode; +import org.pegdown.ast.TableRowNode; +import org.pegdown.ast.TextNode; +import org.pegdown.ast.VerbatimNode; +import org.pegdown.ast.WikiLinkNode; + +/** + * Formats parsed markdown AST into HTML. + * <p> + * Callers must create a new instance for each RootNode. + */ +public class MarkdownToHtml implements Visitor { + private final ReferenceMap references = new ReferenceMap(); + private final HtmlBuilder html = new HtmlBuilder(); + private final TocFormatter toc = new TocFormatter(html, 3); + private TableState table; + + /** Render the document AST to sanitized HTML. */ + public SanitizedContent toSoyHtml(RootNode node) { + if (node == null) { + return null; + } + + toc.setRoot(node); + node.accept(this); + return html.toSoy(); + } + + @Override + public void visit(RootNode node) { + references.add(node); + visitChildren(node); + } + + @Override + public void visit(TocNode node) { + toc.format(); + } + + @Override + public void visit(HeaderNode node) { + String tag = "h" + node.getLevel(); + html.open(tag); + if (toc.include(node)) { + html.attribute("id", toc.idFromHeader(node)); + } + visitChildren(node); + html.close(tag); + } + + @Override + public void visit(ParaNode node) { + wrapChildren("p", node); + } + + @Override + public void visit(BlockQuoteNode node) { + wrapChildren("blockquote", node); + } + + @Override + public void visit(OrderedListNode node) { + wrapChildren("ol", node); + } + + @Override + public void visit(BulletListNode node) { + wrapChildren("ul", node); + } + + @Override + public void visit(ListItemNode node) { + wrapChildren("li", node); + } + + @Override + public void visit(DefinitionListNode node) { + wrapChildren("dl", node); + } + + @Override + public void visit(DefinitionNode node) { + wrapChildren("dd", node); + } + + @Override + public void visit(DefinitionTermNode node) { + wrapChildren("dt", node); + } + + @Override + public void visit(VerbatimNode node) { + html.open("pre").attribute("class", "code"); + String text = node.getText(); + while (text.startsWith("\n")) { + html.open("br"); + text = text.substring(1); + } + html.appendAndEscape(text); + html.close("pre"); + } + + @Override + public void visit(CodeNode node) { + wrapText("code", node); + } + + @Override + public void visit(StrikeNode node) { + wrapChildren("del", node); + } + + @Override + public void visit(StrongEmphSuperNode node) { + if (node.isClosed()) { + wrapChildren(node.isStrong() ? "strong" : "em", node); + } else { + // Unclosed (or unmatched) sequence is plain text. + html.appendAndEscape(node.getChars()); + visitChildren(node); + } + } + + @Override + public void visit(AutoLinkNode node) { + String url = node.getText(); + html.open("a").attribute("href", url) + .appendAndEscape(url) + .close("a"); + } + + @Override + public void visit(MailLinkNode node) { + String addr = node.getText(); + html.open("a").attribute("href", "mailto:" + addr) + .appendAndEscape(addr) + .close("a"); + } + + @Override + public void visit(WikiLinkNode node) { + String text = node.getText(); + String path = text.replace(' ', '-') + ".md"; + html.open("a").attribute("href", path) + .appendAndEscape(text) + .close("a"); + } + + @Override + public void visit(ExpLinkNode node) { + html.open("a") + .attribute("href", node.url) + .attribute("title", node.title); + visitChildren(node); + html.close("a"); + } + + @Override + public void visit(RefLinkNode node) { + ReferenceNode ref = references.get(node.referenceKey, getInnerText(node)); + if (ref != null) { + html.open("a") + .attribute("href", ref.getUrl()) + .attribute("title", ref.getTitle()); + visitChildren(node); + html.close("a"); + } else { + // Treat a broken RefLink as plain text. + visitChildren(node); + } + } + + @Override + public void visit(ExpImageNode node) { + html.open("img") + .attribute("src", node.url) + .attribute("title", node.title) + .attribute("alt", getInnerText(node)); + } + + @Override + public void visit(RefImageNode node) { + String alt = getInnerText(node); + String url, title = alt; + ReferenceNode ref = references.get(node.referenceKey, alt); + if (ref != null) { + url = ref.getUrl(); + title = ref.getTitle(); + } else { + // If reference is missing, insert a broken image. + url = EscapingConventions.FilterImageDataUri.INSTANCE.getInnocuousOutput(); + } + html.open("img") + .attribute("src", url) + .attribute("title", title) + .attribute("alt", alt); + } + + @Override + public void visit(TableNode node) { + table = new TableState(node); + wrapChildren("table", node); + table = null; + } + + private void mustBeInsideTable(Node node) { + checkState(table != null, "%s must be in table", node); + } + + @Override + public void visit(TableHeaderNode node) { + mustBeInsideTable(node); + table.inHeader = true; + wrapChildren("thead", node); + table.inHeader = false; + } + + @Override + public void visit(TableBodyNode node) { + wrapChildren("tbody", node); + } + + @Override + public void visit(TableCaptionNode node) { + wrapChildren("caption", node); + } + + @Override + public void visit(TableRowNode node) { + mustBeInsideTable(node); + table.startRow(); + wrapChildren("tr", node); + } + + @Override + public void visit(TableCellNode node) { + mustBeInsideTable(node); + String tag = table.inHeader ? "th" : "td"; + html.open(tag) + .attribute("align", table.getAlign()); + if (node.getColSpan() > 1) { + html.attribute("colspan", Integer.toString(node.getColSpan())); + } + visitChildren(node); + html.close(tag); + table.done(node); + } + + @Override + public void visit(TableColumnNode node) { + // Not for output; should not be in the Visitor API. + } + + @Override + public void visit(TextNode node) { + html.appendAndEscape(node.getText()); + // TODO(sop) printWithAbbreviations + } + + @Override + public void visit(SpecialTextNode node) { + html.appendAndEscape(node.getText()); + } + + @Override + public void visit(QuotedNode node) { + switch (node.getType()) { + case DoubleAngle: + html.entity("«"); + visitChildren(node); + html.entity("»"); + break; + case Double: + html.entity("“"); + visitChildren(node); + html.entity("”"); + break; + case Single: + html.entity("‘"); + visitChildren(node); + html.entity("’"); + break; + default: + checkState(false, "unsupported quote %s", node.getType()); + } + } + + @Override + public void visit(SimpleNode node) { + switch (node.getType()) { + case Apostrophe: + html.entity("’"); + break; + case Ellipsis: + html.entity("…"); + break; + case Emdash: + html.entity("—"); + break; + case Endash: + html.entity("–"); + break; + case HRule: + html.open("hr"); + break; + case Linebreak: + html.open("br"); + break; + case Nbsp: + html.entity(" "); + break; + default: + checkState(false, "unsupported node %s", node.getType()); + } + } + + @Override + public void visit(SuperNode node) { + visitChildren(node); + } + + @Override + public void visit(Node node) { + checkState(false, "node %s unsupported", node.getClass()); + } + + @Override + public void visit(HtmlBlockNode node) { + // Drop all HTML nodes. + } + + @Override + public void visit(InlineHtmlNode node) { + // Drop all HTML nodes. + } + + @Override + public void visit(ReferenceNode node) { + // Reference nodes are not printed; they only declare an item. + } + + @Override + public void visit(AbbreviationNode node) { + // Abbreviation nodes are not printed; they only declare an item. + } + + private void wrapText(String tag, TextNode node) { + html.open(tag).appendAndEscape(node.getText()).close(tag); + } + + private void wrapChildren(String tag, SuperNode node) { + html.open(tag); + visitChildren(node); + html.close(tag); + } + + private void visitChildren(Node node) { + for (Node child : node.getChildren()) { + child.accept(this); + } + } +}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/ReferenceMap.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/ReferenceMap.java new file mode 100644 index 0000000..65fe86e --- /dev/null +++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/ReferenceMap.java
@@ -0,0 +1,61 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gitiles.doc; + +import static com.google.gitiles.doc.MarkdownHelper.getInnerText; + +import org.pegdown.ast.ReferenceNode; +import org.pegdown.ast.RootNode; +import org.pegdown.ast.SuperNode; + +import java.util.HashMap; +import java.util.Map; + +class ReferenceMap { + private final Map<String, ReferenceNode> references = new HashMap<>(); + + void add(RootNode node) { + for (ReferenceNode ref : node.getReferences()) { + String id = getInnerText(ref); + references.put(key(id), ref); + } + } + + ReferenceNode get(SuperNode keyNode, String text) { + String id = keyNode != null ? getInnerText(keyNode) : text; + return references.get(key(id)); + } + + private static String key(String in) { + // Strip whitespace and normalize to lower case. Pegdown's default + // HTML formatter also applies this type of normalization to make + // it easier for document authors to reference links. Links should + // be case insensitive to allow for easier formatting of title case + // in prose vs. in the reference table, especially if a link is used + // both at the start of a sentence and later in the middle of sentence. + // + // Whitespace stripping is also performed by pegdown's default code. + // This allows references to to be declared as "foobar" but prose to + // mention it as "Foo Bar". + StringBuilder r = new StringBuilder(in.length()); + for (int i = 0; i < in.length(); i++) { + char c = in.charAt(i); + if (!Character.isWhitespace(c)) { + r.append(Character.toLowerCase(c)); + } + } + return r.toString(); + } +}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/TableState.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/TableState.java new file mode 100644 index 0000000..80276d2 --- /dev/null +++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/TableState.java
@@ -0,0 +1,59 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gitiles.doc; + +import org.pegdown.ast.TableCellNode; +import org.pegdown.ast.TableColumnNode; +import org.pegdown.ast.TableNode; + +import java.util.List; + +class TableState { + private final List<TableColumnNode> columns; + + boolean inHeader; + int column; + + TableState(TableNode node) { + columns = node.getColumns(); + } + + void startRow() { + column = 0; + } + + String getAlign() { + int pos = Math.min(column, columns.size() - 1); + TableColumnNode c = columns.get(pos); + switch (c.getAlignment()) { + case None: + return null; + case Left: + return "left"; + case Right: + return "right"; + case Center: + return "center"; + default: + throw new IllegalStateException(String.format( + "unsupported alignment %s on column %d", + c.getAlignment(), pos)); + } + } + + void done(TableCellNode cell) { + column += cell.getColSpan(); + } +} \ No newline at end of file
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/TocFormatter.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/TocFormatter.java new file mode 100644 index 0000000..5dcfc77 --- /dev/null +++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/TocFormatter.java
@@ -0,0 +1,170 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gitiles.doc; + +import com.google.gitiles.doc.html.HtmlBuilder; + +import org.apache.commons.lang3.StringUtils; +import org.pegdown.ast.HeaderNode; +import org.pegdown.ast.Node; +import org.pegdown.ast.RootNode; + +/** Outputs outline from HeaderNodes in the AST. */ +class TocFormatter { + private final HtmlBuilder html; + private final int maxLevel; + + private RootNode root; + private Boolean hasToc; + private int countH1; + + private int level; + + TocFormatter(HtmlBuilder html, int maxLevel) { + this.html = html; + this.maxLevel = maxLevel; + } + + void setRoot(RootNode doc) { + root = doc; + hasToc = null; + } + + boolean include(HeaderNode h) { + init(); + if (!hasToc) { + return false; + } else if (h.getLevel() == 1) { + return countH1 > 1; + } + return h.getLevel() <= maxLevel; + } + + String idFromHeader(HeaderNode header) { + String t = MarkdownHelper.getInnerText(header); + return t != null ? idFromTitle(t) : null; + } + + void format() { + init(); + + int startLevel = countH1 > 1 ? 1 : 2; + hasToc = true; + level = startLevel; + + html.open("div") + .attribute("class", "toc") + .attribute("role", "navigation") + .open("h2").appendAndEscape("Contents").close("h2") + .open("div").attribute("class", "toc-aux") + .open("ul"); + outline(root); + while (level >= startLevel) { + html.close("ul"); + level--; + } + html.close("div").close("div"); + } + + private void outline(Node node) { + if (node instanceof HeaderNode) { + outline((HeaderNode) node); + } else { + for (Node child : node.getChildren()) { + outline(child); + } + } + } + + private void outline(HeaderNode h) { + if (!include(h)) { + return; + } + + String title = MarkdownHelper.getInnerText(h); + if (title == null) { + return; + } + + while (level > h.getLevel()) { + html.close("ul"); + level--; + } + while (level < h.getLevel()) { + html.open("ul"); + level++; + } + + html.open("li") + .open("a").attribute("href", "#" + idFromTitle(title)) + .appendAndEscape(title) + .close("a") + .close("li"); + } + + private static String idFromTitle(String title) { + StringBuilder b = new StringBuilder(title.length()); + for (char c : StringUtils.stripAccents(title).toCharArray()) { + if (('a' <= c && c <= 'z') + || ('A' <= c && c <= 'Z') + || ('0' <= c && c <= '9')) { + b.append(c); + } else if (c == ' ') { + if (b.length() > 0 + && b.charAt(b.length() - 1) != '-' + && b.charAt(b.length() - 1) != '_') { + b.append('-'); + } + } else if (b.length() > 0 + && b.charAt(b.length() - 1) != '-' + && b.charAt(b.length() - 1) != '_') { + b.append('_'); + } + } + while (b.length() > 0) { + char c = b.charAt(b.length() - 1); + if (c == '-' || c == '_') { + b.setLength(b.length() - 1); + continue; + } + break; + } + return b.toString(); + } + + private void init() { + if (hasToc == null) { + hasToc = false; + init(root); + } + } + + private void init(Node node) { + if (node instanceof TocNode) { + hasToc = true; + return; + } else if (node instanceof HeaderNode + && ((HeaderNode) node).getLevel() == 1) { + countH1++; + return; + } + for (Node child : node.getChildren()) { + init(child); + if (hasToc && countH1 > 1) { + break; + } + } + } +}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/TocNode.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/TocNode.java new file mode 100644 index 0000000..33dd31c --- /dev/null +++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/TocNode.java
@@ -0,0 +1,34 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gitiles.doc; + +import org.pegdown.ast.AbstractNode; +import org.pegdown.ast.Node; + +import java.util.Collections; +import java.util.List; + +/** Block node {@code [TOC]} to display table of contents. */ +public class TocNode extends AbstractNode { + @Override + public void accept(org.pegdown.ast.Visitor visitor) { + ((Visitor) visitor).visit(this); + } + + @Override + public List<Node> getChildren() { + return Collections.emptyList(); + } +}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/Visitor.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/Visitor.java new file mode 100644 index 0000000..b8cf64e --- /dev/null +++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/Visitor.java
@@ -0,0 +1,19 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gitiles.doc; + +public interface Visitor extends org.pegdown.ast.Visitor { + void visit(TocNode node); +}