VisibilityCache/Checker: Use streams for the reachability starters

The materialized collection of starters performs the potentially
expensive ObjectId to RevCommit translation, but probably many commits
won't be needed at all in an incremental reachability check.

Use streams to pass the starters to the reachability checker, delaying
the ObjectId to RevCommit translation until it is really needed.

Note that for repositories without bitmaps the performance could vary.
Before, it was setting some start points, walk, set new start points,
walk.... until finding the required commit. Now it will set all start
points from the beginning. This increases the cost to start walking,
(but could also avoid re-walking previously visited nodes).

Change-Id: Ifa58e8e7f4cf44e30e1ff57d5a60f24378181f56
diff --git a/java/com/google/gitiles/VisibilityCache.java b/java/com/google/gitiles/VisibilityCache.java
index 49743d9..1d0467a 100644
--- a/java/com/google/gitiles/VisibilityCache.java
+++ b/java/com/google/gitiles/VisibilityCache.java
@@ -14,18 +14,15 @@
 
 package com.google.gitiles;
 
-import static com.google.common.base.MoreObjects.firstNonNull;
 import static com.google.common.base.MoreObjects.toStringHelper;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static java.util.Objects.hash;
-import static java.util.stream.Collectors.toList;
-import static org.eclipse.jgit.lib.Constants.R_HEADS;
-import static org.eclipse.jgit.lib.Constants.R_TAGS;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Throwables;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
+import com.google.common.collect.Streams;
 import com.google.common.util.concurrent.ExecutionError;
 import java.io.IOException;
 import java.util.Arrays;
@@ -33,8 +30,10 @@
 import java.util.Objects;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
+import java.util.function.Predicate;
 import java.util.stream.Stream;
 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
+import org.eclipse.jgit.lib.Constants;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.Ref;
 import org.eclipse.jgit.lib.RefDatabase;
@@ -163,34 +162,38 @@
       return true;
     }
 
+    Stream<ObjectId> reachableTips =
+        importantRefsFirst(refDb.getRefsByPrefix(RefDatabase.ALL))
+            .map(VisibilityCache::refToObjectId);
+
     // Check heads first under the assumption that most requests are for refs close to a head. Tags
     // tend to be much further back in history and just clutter up the priority queue in the common
     // case.
-    return checker.isReachableFrom("knownReachable", walk, commit, knownReachable)
-        || isReachableFromRefs("heads", walk, commit, refDb.getRefsByPrefix(R_HEADS).stream())
-        || isReachableFromRefs("tags", walk, commit, refDb.getRefsByPrefix(R_TAGS).stream())
-        || isReachableFromRefs(
-            "other", walk, commit, refDb.getRefs().stream().filter(VisibilityCache::otherRefs));
+    Stream<RevCommit> startCommits =
+        Stream.concat(knownReachable.stream(), reachableTips)
+            .map(objId -> VisibilityChecker.objectIdToRevCommit(walk, objId))
+            .filter(Objects::nonNull); // Ignore missing tips
+
+    return checker.isReachableFrom("known and sorted refs", walk, commit, startCommits);
   }
 
-  private static boolean refStartsWith(Ref ref, String prefix) {
-    return ref.getName().startsWith(prefix);
+  static Stream<Ref> importantRefsFirst(Collection<Ref> visibleRefs) {
+    Predicate<Ref> startsWithRefsHeads = ref -> ref.getName().startsWith(Constants.R_HEADS);
+    Predicate<Ref> startsWithRefsTags = ref -> ref.getName().startsWith(Constants.R_TAGS);
+    Predicate<Ref> startsWithRefsChanges = ref -> ref.getName().startsWith("refs/changes");
+    Predicate<Ref> allOther =
+        ref ->
+            !startsWithRefsHeads.test(ref)
+                && !startsWithRefsTags.test(ref)
+                && !startsWithRefsChanges.test(ref);
+
+    return Streams.concat(
+        visibleRefs.stream().filter(startsWithRefsHeads),
+        visibleRefs.stream().filter(startsWithRefsTags),
+        visibleRefs.stream().filter(allOther));
   }
 
-  private static boolean otherRefs(Ref r) {
-    return !(refStartsWith(r, R_HEADS)
-        || refStartsWith(r, R_TAGS)
-        || refStartsWith(r, "refs/changes/"));
-  }
-
-  private boolean isReachableFromRefs(String desc, RevWalk walk, RevCommit commit, Stream<Ref> refs)
-      throws IOException {
-    return isReachableFrom(
-        desc, walk, commit, refs.map(r -> firstNonNull(r.getPeeledObjectId(), r.getObjectId())));
-  }
-
-  private boolean isReachableFrom(String desc, RevWalk walk, RevCommit commit, Stream<ObjectId> ids)
-      throws IOException {
-    return checker.isReachableFrom(desc, walk, commit, ids.collect(toList()));
+  private static ObjectId refToObjectId(Ref ref) {
+    return ref.getObjectId() != null ? ref.getObjectId() : ref.getPeeledObjectId();
   }
 }
diff --git a/java/com/google/gitiles/VisibilityChecker.java b/java/com/google/gitiles/VisibilityChecker.java
index 0183ea9..4c4ac3a 100644
--- a/java/com/google/gitiles/VisibilityChecker.java
+++ b/java/com/google/gitiles/VisibilityChecker.java
@@ -17,12 +17,17 @@
 import com.google.common.collect.ImmutableList;
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Objects;
+import java.util.stream.Stream;
+import org.eclipse.jgit.annotations.Nullable;
 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
 import org.eclipse.jgit.errors.MissingObjectException;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.RefDatabase;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevWalk;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Checks for object visibility
@@ -31,6 +36,8 @@
  */
 public class VisibilityChecker {
 
+  private static final Logger log = LoggerFactory.getLogger(VisibilityChecker.class);
+
   /**
    * Check if any of the refs in {@code refDb} points to the object {@code id}.
    *
@@ -55,38 +62,48 @@
    *     or ids referring to other kinds of objects are ignored.
    * @return true if we can get to {@code commit} from the {@code starters}
    * @throws IOException a pack file or loose object could not be read
+   * @deprecated see {@link #isReachableFrom(String, RevWalk, RevCommit, Stream)}
    */
+  @Deprecated
   protected boolean isReachableFrom(
       String description, RevWalk walk, RevCommit commit, Collection<ObjectId> starters)
       throws IOException {
-    if (starters.isEmpty()) {
-      return false;
-    }
-
-    ImmutableList<RevCommit> startCommits = objectIdsToCommits(walk, starters);
-    if (startCommits.isEmpty()) {
-      return false;
-    }
-
-    return !walk.getObjectReader()
-        .createReachabilityChecker(walk)
-        .areAllReachable(ImmutableList.of(commit), startCommits.stream())
-        .isPresent();
+    Stream<RevCommit> startCommits =
+        starters.stream().map(objId -> objectIdToRevCommit(walk, objId)).filter(Objects::nonNull);
+    return isReachableFrom(description, walk, commit, startCommits);
   }
 
-  private static ImmutableList<RevCommit> objectIdsToCommits(RevWalk walk, Collection<ObjectId> ids)
-      throws IOException {
-    ImmutableList.Builder<RevCommit> commits = ImmutableList.builder();
-    for (ObjectId id : ids) {
-      try {
-        commits.add(walk.parseCommit(id));
-      } catch (MissingObjectException e) {
-        // TODO(ifrade): ResolveParser has already checked that the object exists in the repo.
-        // Report as AssertionError.
-      } catch (IncorrectObjectTypeException e) {
-        // Ignore, doesn't affect commit reachability
-      }
+  /**
+   * Check if {@code commit} is reachable starting from {@code starters}.
+   *
+   * @param description Description of the ids (e.g. "heads"). Mainly for tracing.
+   * @param walk The walk to use for the reachability check
+   * @param commit The starting commit. It *MUST* come from the walk in use
+   * @param starters visible commits. Anything reachable from these commits is visible. Missing ids
+   *     or ids referring to other kinds of objects are ignored.
+   * @return true if we can get to {@code commit} from the {@code starters}
+   * @throws IOException a pack file or loose object could not be read
+   */
+  protected boolean isReachableFrom(
+      String description, RevWalk walk, RevCommit commit, Stream<RevCommit> starters)
+      throws MissingObjectException, IncorrectObjectTypeException, IOException {
+    return walk.getObjectReader()
+        .createReachabilityChecker(walk)
+        .areAllReachable(ImmutableList.of(commit), starters)
+        .isEmpty();
+  }
+
+  @Nullable
+  static RevCommit objectIdToRevCommit(RevWalk walk, ObjectId objectId) {
+    if (objectId == null) {
+      return null;
     }
-    return commits.build();
+
+    try {
+      return walk.parseCommit(objectId);
+    } catch (IOException e) {
+      log.warn("Cannot translate %s to RevCommit: %s", objectId.name(), e.getMessage());
+      return null;
+    }
   }
 }