Intern commits/authors/paths in blame region lists This saves some storage space both in the in-memory cache and in alternative persistent cache backends, at the cost of a little more processing during construction. Considering how expensive blame already is, this is a fair tradeoff. Make sure the result is serializable to aid in the construction of such persistent caches. Along with a rewrite of the blame loop comes a slight behavior change: adjacent blame regions from the same source (i.e. with deleted intervening lines) are no longer squashed. Change-Id: I031c758f5beb09453c9e2b998b3b09503ec1f70d
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/blame/BlameCacheImpl.java b/gitiles-servlet/src/main/java/com/google/gitiles/blame/BlameCacheImpl.java index 614da2a..768e8e5 100644 --- a/gitiles-servlet/src/main/java/com/google/gitiles/blame/BlameCacheImpl.java +++ b/gitiles-servlet/src/main/java/com/google/gitiles/blame/BlameCacheImpl.java
@@ -14,22 +14,28 @@ package com.google.gitiles.blame; +import static com.google.common.base.Preconditions.checkState; + import com.google.common.base.Objects; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.cache.Weigher; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.eclipse.jgit.blame.BlameGenerator; -import org.eclipse.jgit.blame.BlameResult; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Repository; import java.io.IOException; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.concurrent.ExecutionException; /** Guava implementation of BlameCache, weighted by number of blame regions. */ @@ -112,31 +118,77 @@ } } - private List<Region> loadBlame(Key key) throws IOException { + public static List<Region> loadBlame(Key key) throws IOException { try { BlameGenerator gen = new BlameGenerator(key.repo, key.path); - BlameResult blame; try { gen.push(null, key.commitId); - blame = gen.computeBlameResult(); + return loadRegions(gen); } finally { gen.release(); } - if (blame == null) { - return ImmutableList.of(); - } - int lineCount = blame.getResultContents().size(); - blame.discardResultContents(); - - List<Region> regions = Lists.newArrayList(); - for (int i = 0; i < lineCount; i++) { - if (regions.isEmpty() || !regions.get(regions.size() - 1).growFrom(blame, i)) { - regions.add(new Region(blame, i)); - } - } - return Collections.unmodifiableList(regions); } finally { key.repo = null; } } + + private static class PooledCommit { + final ObjectId commit; + final PersonIdent author; + + private PooledCommit(ObjectId commit, PersonIdent author) { + this.commit = commit; + this.author = author; + } + } + + private static List<Region> loadRegions(BlameGenerator gen) throws IOException { + Map<ObjectId, PooledCommit> commits = Maps.newHashMap(); + Interner<String> strings = Interners.newStrongInterner(); + int lineCount = gen.getResultContents().size(); + + List<Region> regions = Lists.newArrayList(); + while (gen.next()) { + String path = gen.getSourcePath(); + PersonIdent author = gen.getSourceAuthor(); + ObjectId commit = gen.getSourceCommit(); + checkState(path != null && author != null && commit != null); + + PooledCommit pc = commits.get(commit); + if (pc == null) { + pc = new PooledCommit(commit.copy(), + new PersonIdent( + strings.intern(author.getName()), + strings.intern(author.getEmailAddress()), + author.getWhen(), + author.getTimeZone())); + commits.put(pc.commit, pc); + } + path = strings.intern(path); + commit = pc.commit; + author = pc.author; + regions.add(new Region(path, commit, author, gen.getResultStart(), gen.getResultEnd())); + } + Collections.sort(regions); + + // Fill in any gaps left by bugs in JGit, since rendering code assumes the + // full set of contiguous regions. + List<Region> result = Lists.newArrayListWithExpectedSize(regions.size()); + Region last = null; + for (Region r : regions) { + if (last != null) { + checkState(last.getEnd() <= r.getStart()); + if (last.getEnd() < r.getStart()) { + result.add(new Region(null, null, null, last.getEnd(), r.getStart())); + } + } + result.add(r); + last = r; + } + if (last != null && last.getEnd() != lineCount) { + result.add(new Region(null, null, null, last.getEnd(), lineCount)); + } + + return ImmutableList.copyOf(result); + } }
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/blame/Region.java b/gitiles-servlet/src/main/java/com/google/gitiles/blame/Region.java index 8b3fd41..13a3dd3 100644 --- a/gitiles-servlet/src/main/java/com/google/gitiles/blame/Region.java +++ b/gitiles-servlet/src/main/java/com/google/gitiles/blame/Region.java
@@ -14,39 +14,40 @@ package com.google.gitiles.blame; -import com.google.common.base.Objects; +import static com.google.common.base.Preconditions.checkArgument; -import org.eclipse.jgit.blame.BlameResult; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.PersonIdent; -import org.eclipse.jgit.revwalk.RevCommit; + +import java.io.Serializable; /** Region of the blame of a file. */ -public class Region { +public class Region implements Serializable, Comparable<Region> { + private static final long serialVersionUID = 1L; + private final String sourcePath; private final ObjectId sourceCommit; private final PersonIdent sourceAuthor; - private int count; + private final int count; + private transient int start; - public Region(BlameResult blame, int start) { - this.sourcePath = blame.getSourcePath(start); - RevCommit c = blame.getSourceCommit(start); - if (c != null) { - this.sourceCommit = c.copy(); - this.sourceAuthor = c.getAuthorIdent(); - } else { - // Unknown source commit due to JGit bug. - this.sourceCommit = null; - this.sourceAuthor = null; - } - this.count = 1; + public Region(String path, ObjectId commit, PersonIdent author, int start, int end) { + checkArgument((path != null && commit != null && author != null) + || (path == null && commit == null && author == null), + "expected all null or none: %s, %s, %s", path, commit, author); + this.sourcePath = path; + this.sourceCommit = commit; + this.sourceAuthor = author; + this.start = start; + this.count = end - start; } - public Region(String sourcePath, ObjectId sourceCommit, PersonIdent sourceAuthor, int count) { - this.sourcePath = sourcePath; - this.sourceCommit = sourceCommit; - this.sourceAuthor = sourceAuthor; - this.count = count; + int getStart() { + return start; + } + + int getEnd() { + return start + count; } public int getCount() { @@ -65,15 +66,25 @@ return sourceAuthor; } - public boolean growFrom(BlameResult blame, int i) { - // Don't compare line numbers, so we collapse regions from the same source - // but with deleted lines into one. - if (Objects.equal(blame.getSourcePath(i), sourcePath) - && Objects.equal(blame.getSourceCommit(i), sourceCommit)) { - count++; - return true; + @Override + public int compareTo(Region o) { + return start - o.start; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + if (sourceCommit != null) { + sb.append(sourceCommit.name().substring(0, 7)) + .append(' ') + .append(sourceAuthor.toExternalString()) + .append(" (").append(sourcePath).append(')'); } else { - return false; + sb.append("<unblamed region>"); } + sb.append(' ') + .append("start=").append(start) + .append(", count=").append(count); + return sb.toString(); } }