Intern commits/authors/paths in blame region lists

This saves some storage space both in the in-memory cache and in
alternative persistent cache backends, at the cost of a little more
processing during construction. Considering how expensive blame
already is, this is a fair tradeoff.

Make sure the result is serializable to aid in the construction of
such persistent caches.

Along with a rewrite of the blame loop comes a slight behavior change:
adjacent blame regions from the same source (i.e. with deleted
intervening lines) are no longer squashed.

Change-Id: I031c758f5beb09453c9e2b998b3b09503ec1f70d
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/blame/BlameCacheImpl.java b/gitiles-servlet/src/main/java/com/google/gitiles/blame/BlameCacheImpl.java
index 614da2a..768e8e5 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/blame/BlameCacheImpl.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/blame/BlameCacheImpl.java
@@ -14,22 +14,28 @@
 
 package com.google.gitiles.blame;
 
+import static com.google.common.base.Preconditions.checkState;
+
 import com.google.common.base.Objects;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
 import com.google.common.cache.Weigher;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Interner;
+import com.google.common.collect.Interners;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 import org.eclipse.jgit.blame.BlameGenerator;
-import org.eclipse.jgit.blame.BlameResult;
 import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.PersonIdent;
 import org.eclipse.jgit.lib.Repository;
 
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ExecutionException;
 
 /** Guava implementation of BlameCache, weighted by number of blame regions. */
@@ -112,31 +118,77 @@
     }
   }
 
-  private List<Region> loadBlame(Key key) throws IOException {
+  public static List<Region> loadBlame(Key key) throws IOException {
     try {
       BlameGenerator gen = new BlameGenerator(key.repo, key.path);
-      BlameResult blame;
       try {
         gen.push(null, key.commitId);
-        blame = gen.computeBlameResult();
+        return loadRegions(gen);
       } finally {
         gen.release();
       }
-      if (blame == null) {
-        return ImmutableList.of();
-      }
-      int lineCount = blame.getResultContents().size();
-      blame.discardResultContents();
-
-      List<Region> regions = Lists.newArrayList();
-      for (int i = 0; i < lineCount; i++) {
-        if (regions.isEmpty() || !regions.get(regions.size() - 1).growFrom(blame, i)) {
-          regions.add(new Region(blame, i));
-        }
-      }
-      return Collections.unmodifiableList(regions);
     } finally {
       key.repo = null;
     }
   }
+
+  private static class PooledCommit {
+    final ObjectId commit;
+    final PersonIdent author;
+
+    private PooledCommit(ObjectId commit, PersonIdent author) {
+      this.commit = commit;
+      this.author = author;
+    }
+  }
+
+  private static List<Region> loadRegions(BlameGenerator gen) throws IOException {
+    Map<ObjectId, PooledCommit> commits = Maps.newHashMap();
+    Interner<String> strings = Interners.newStrongInterner();
+    int lineCount = gen.getResultContents().size();
+
+    List<Region> regions = Lists.newArrayList();
+    while (gen.next()) {
+      String path = gen.getSourcePath();
+      PersonIdent author = gen.getSourceAuthor();
+      ObjectId commit = gen.getSourceCommit();
+      checkState(path != null && author != null && commit != null);
+
+      PooledCommit pc = commits.get(commit);
+      if (pc == null) {
+        pc = new PooledCommit(commit.copy(),
+            new PersonIdent(
+              strings.intern(author.getName()),
+              strings.intern(author.getEmailAddress()),
+              author.getWhen(),
+              author.getTimeZone()));
+        commits.put(pc.commit, pc);
+      }
+      path = strings.intern(path);
+      commit = pc.commit;
+      author = pc.author;
+      regions.add(new Region(path, commit, author, gen.getResultStart(), gen.getResultEnd()));
+    }
+    Collections.sort(regions);
+
+    // Fill in any gaps left by bugs in JGit, since rendering code assumes the
+    // full set of contiguous regions.
+    List<Region> result = Lists.newArrayListWithExpectedSize(regions.size());
+    Region last = null;
+    for (Region r : regions) {
+      if (last != null) {
+        checkState(last.getEnd() <= r.getStart());
+        if (last.getEnd() < r.getStart()) {
+          result.add(new Region(null, null, null, last.getEnd(), r.getStart()));
+        }
+      }
+      result.add(r);
+      last = r;
+    }
+    if (last != null && last.getEnd() != lineCount) {
+      result.add(new Region(null, null, null, last.getEnd(), lineCount));
+    }
+
+    return ImmutableList.copyOf(result);
+  }
 }
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/blame/Region.java b/gitiles-servlet/src/main/java/com/google/gitiles/blame/Region.java
index 8b3fd41..13a3dd3 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/blame/Region.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/blame/Region.java
@@ -14,39 +14,40 @@
 
 package com.google.gitiles.blame;
 
-import com.google.common.base.Objects;
+import static com.google.common.base.Preconditions.checkArgument;
 
-import org.eclipse.jgit.blame.BlameResult;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.PersonIdent;
-import org.eclipse.jgit.revwalk.RevCommit;
+
+import java.io.Serializable;
 
 /** Region of the blame of a file. */
-public class Region {
+public class Region implements Serializable, Comparable<Region> {
+  private static final long serialVersionUID = 1L;
+
   private final String sourcePath;
   private final ObjectId sourceCommit;
   private final PersonIdent sourceAuthor;
-  private int count;
+  private final int count;
+  private transient int start;
 
-  public Region(BlameResult blame, int start) {
-    this.sourcePath = blame.getSourcePath(start);
-    RevCommit c = blame.getSourceCommit(start);
-    if (c != null) {
-      this.sourceCommit = c.copy();
-      this.sourceAuthor = c.getAuthorIdent();
-    } else {
-      // Unknown source commit due to JGit bug.
-      this.sourceCommit = null;
-      this.sourceAuthor = null;
-    }
-    this.count = 1;
+  public Region(String path, ObjectId commit, PersonIdent author, int start, int end) {
+    checkArgument((path != null && commit != null && author != null)
+        || (path == null && commit == null && author == null),
+        "expected all null or none: %s, %s, %s", path, commit, author);
+    this.sourcePath = path;
+    this.sourceCommit = commit;
+    this.sourceAuthor = author;
+    this.start = start;
+    this.count = end - start;
   }
 
-  public Region(String sourcePath, ObjectId sourceCommit, PersonIdent sourceAuthor, int count) {
-    this.sourcePath = sourcePath;
-    this.sourceCommit = sourceCommit;
-    this.sourceAuthor = sourceAuthor;
-    this.count = count;
+  int getStart() {
+    return start;
+  }
+
+  int getEnd() {
+    return start + count;
   }
 
   public int getCount() {
@@ -65,15 +66,25 @@
     return sourceAuthor;
   }
 
-  public boolean growFrom(BlameResult blame, int i) {
-    // Don't compare line numbers, so we collapse regions from the same source
-    // but with deleted lines into one.
-    if (Objects.equal(blame.getSourcePath(i), sourcePath)
-        && Objects.equal(blame.getSourceCommit(i), sourceCommit)) {
-      count++;
-      return true;
+  @Override
+  public int compareTo(Region o) {
+    return start - o.start;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    if (sourceCommit != null) {
+      sb.append(sourceCommit.name().substring(0, 7))
+          .append(' ')
+          .append(sourceAuthor.toExternalString())
+          .append(" (").append(sourcePath).append(')');
     } else {
-      return false;
+      sb.append("<unblamed region>");
     }
+    sb.append(' ')
+        .append("start=").append(start)
+        .append(", count=").append(count);
+    return sb.toString();
   }
 }