1#!/usr/bin/env python
2# Copyright 2014 the V8 project authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# This script retrieves the history of all V8 branches and trunk revisions and
7# their corresponding Chromium revisions.
8
9# Requires a chromium checkout with branch heads:
10# gclient sync --with_branch_heads
11# gclient fetch
12
13import argparse
14import csv
15import itertools
16import json
17import os
18import re
19import sys
20
21from common_includes import *
22
23CONFIG = {
24  "BRANCHNAME": "retrieve-v8-releases",
25  "PERSISTFILE_BASENAME": "/tmp/v8-releases-tempfile",
26}
27
28# Expression for retrieving the bleeding edge revision from a commit message.
29PUSH_MESSAGE_RE = re.compile(r".* \(based on bleeding_edge revision r(\d+)\)$")
30
31# Expression for retrieving the merged patches from a merge commit message
32# (old and new format).
33MERGE_MESSAGE_RE = re.compile(r"^.*[M|m]erged (.+)(\)| into).*$", re.M)
34
35# Expression for retrieving reverted patches from a commit message (old and
36# new format).
37ROLLBACK_MESSAGE_RE = re.compile(r"^.*[R|r]ollback of (.+)(\)| in).*$", re.M)
38
39# Expression for retrieving the code review link.
40REVIEW_LINK_RE = re.compile(r"^Review URL: (.+)$", re.M)
41
42# Expression with three versions (historical) for extracting the v8 revision
43# from the chromium DEPS file.
44DEPS_RE = re.compile(r"""^\s*(?:["']v8_revision["']: ["']"""
45                     """|\(Var\("googlecode_url"\) % "v8"\) \+ "\/trunk@"""
46                     """|"http\:\/\/v8\.googlecode\.com\/svn\/trunk@)"""
47                     """([^"']+)["'].*$""", re.M)
48
49# Expression to pick tag and revision for bleeding edge tags. To be used with
50# output of 'svn log'.
51BLEEDING_EDGE_TAGS_RE = re.compile(
52    r"A \/tags\/([^\s]+) \(from \/branches\/bleeding_edge\:(\d+)\)")
53
54
55def SortBranches(branches):
56  """Sort branches with version number names."""
57  return sorted(branches, key=SortingKey, reverse=True)
58
59
60def FilterDuplicatesAndReverse(cr_releases):
61  """Returns the chromium releases in reverse order filtered by v8 revision
62  duplicates.
63
64  cr_releases is a list of [cr_rev, v8_rev] reverse-sorted by cr_rev.
65  """
66  last = ""
67  result = []
68  for release in reversed(cr_releases):
69    if last == release[1]:
70      continue
71    last = release[1]
72    result.append(release)
73  return result
74
75
76def BuildRevisionRanges(cr_releases):
77  """Returns a mapping of v8 revision -> chromium ranges.
78  The ranges are comma-separated, each range has the form R1:R2. The newest
79  entry is the only one of the form R1, as there is no end range.
80
81  cr_releases is a list of [cr_rev, v8_rev] reverse-sorted by cr_rev.
82  cr_rev either refers to a chromium svn revision or a chromium branch number.
83  """
84  range_lists = {}
85  cr_releases = FilterDuplicatesAndReverse(cr_releases)
86
87  # Visit pairs of cr releases from oldest to newest.
88  for cr_from, cr_to in itertools.izip(
89      cr_releases, itertools.islice(cr_releases, 1, None)):
90
91    # Assume the chromium revisions are all different.
92    assert cr_from[0] != cr_to[0]
93
94    # TODO(machenbach): Subtraction is not git friendly.
95    ran = "%s:%d" % (cr_from[0], int(cr_to[0]) - 1)
96
97    # Collect the ranges in lists per revision.
98    range_lists.setdefault(cr_from[1], []).append(ran)
99
100  # Add the newest revision.
101  if cr_releases:
102    range_lists.setdefault(cr_releases[-1][1], []).append(cr_releases[-1][0])
103
104  # Stringify and comma-separate the range lists.
105  return dict((rev, ", ".join(ran)) for rev, ran in range_lists.iteritems())
106
107
108def MatchSafe(match):
109  if match:
110    return match.group(1)
111  else:
112    return ""
113
114
115class Preparation(Step):
116  MESSAGE = "Preparation."
117
118  def RunStep(self):
119    self.CommonPrepare()
120    self.PrepareBranch()
121
122
123class RetrieveV8Releases(Step):
124  MESSAGE = "Retrieve all V8 releases."
125
126  def ExceedsMax(self, releases):
127    return (self._options.max_releases > 0
128            and len(releases) > self._options.max_releases)
129
130  def GetBleedingEdgeFromPush(self, title):
131    return MatchSafe(PUSH_MESSAGE_RE.match(title))
132
133  def GetMergedPatches(self, body):
134    patches = MatchSafe(MERGE_MESSAGE_RE.search(body))
135    if not patches:
136      patches = MatchSafe(ROLLBACK_MESSAGE_RE.search(body))
137      if patches:
138        # Indicate reverted patches with a "-".
139        patches = "-%s" % patches
140    return patches
141
142  def GetReleaseDict(
143      self, git_hash, bleeding_edge_rev, branch, version, patches, cl_body):
144    revision = self.GitSVNFindSVNRev(git_hash)
145    return {
146      # The SVN revision on the branch.
147      "revision": revision,
148      # The SVN revision on bleeding edge (only for newer trunk pushes).
149      "bleeding_edge": bleeding_edge_rev,
150      # The branch name.
151      "branch": branch,
152      # The version for displaying in the form 3.26.3 or 3.26.3.12.
153      "version": version,
154      # The date of the commit.
155      "date": self.GitLog(n=1, format="%ci", git_hash=git_hash),
156      # Merged patches if available in the form 'r1234, r2345'.
157      "patches_merged": patches,
158      # Default for easier output formatting.
159      "chromium_revision": "",
160      # Default for easier output formatting.
161      "chromium_branch": "",
162      # Link to the CL on code review. Trunk pushes are not uploaded, so this
163      # field will be populated below with the recent roll CL link.
164      "review_link": MatchSafe(REVIEW_LINK_RE.search(cl_body)),
165      # Link to the commit message on google code.
166      "revision_link": ("https://code.google.com/p/v8/source/detail?r=%s"
167                        % revision),
168    }
169
170  def GetRelease(self, git_hash, branch):
171    self.ReadAndPersistVersion()
172    base_version = [self["major"], self["minor"], self["build"]]
173    version = ".".join(base_version)
174    body = self.GitLog(n=1, format="%B", git_hash=git_hash)
175
176    patches = ""
177    if self["patch"] != "0":
178      version += ".%s" % self["patch"]
179      patches = self.GetMergedPatches(body)
180
181    title = self.GitLog(n=1, format="%s", git_hash=git_hash)
182    return self.GetReleaseDict(
183        git_hash, self.GetBleedingEdgeFromPush(title), branch, version,
184        patches, body), self["patch"]
185
186  def GetReleasesFromBleedingEdge(self):
187    tag_text = self.SVN("log https://v8.googlecode.com/svn/tags -v --limit 20")
188    releases = []
189    for (tag, revision) in re.findall(BLEEDING_EDGE_TAGS_RE, tag_text):
190      git_hash = self.GitSVNFindGitHash(revision)
191
192      # Add bleeding edge release. It does not contain patches or a code
193      # review link, as tags are not uploaded.
194      releases.append(self.GetReleaseDict(
195        git_hash, revision, "bleeding_edge", tag, "", ""))
196    return releases
197
198  def GetReleasesFromBranch(self, branch):
199    self.GitReset("svn/%s" % branch)
200    if branch == 'bleeding_edge':
201      return self.GetReleasesFromBleedingEdge()
202
203    releases = []
204    try:
205      for git_hash in self.GitLog(format="%H").splitlines():
206        if VERSION_FILE not in self.GitChangedFiles(git_hash):
207          continue
208        if self.ExceedsMax(releases):
209          break  # pragma: no cover
210        if not self.GitCheckoutFileSafe(VERSION_FILE, git_hash):
211          break  # pragma: no cover
212
213        release, patch_level = self.GetRelease(git_hash, branch)
214        releases.append(release)
215
216        # Follow branches only until their creation point.
217        # TODO(machenbach): This omits patches if the version file wasn't
218        # manipulated correctly. Find a better way to detect the point where
219        # the parent of the branch head leads to the trunk branch.
220        if branch != "trunk" and patch_level == "0":
221          break
222
223    # Allow Ctrl-C interrupt.
224    except (KeyboardInterrupt, SystemExit):  # pragma: no cover
225      pass
226
227    # Clean up checked-out version file.
228    self.GitCheckoutFileSafe(VERSION_FILE, "HEAD")
229    return releases
230
231  def RunStep(self):
232    self.GitCreateBranch(self._config["BRANCHNAME"])
233    # Get relevant remote branches, e.g. "svn/3.25".
234    branches = filter(lambda s: re.match(r"^svn/\d+\.\d+$", s),
235                      self.GitRemotes())
236    # Remove 'svn/' prefix.
237    branches = map(lambda s: s[4:], branches)
238
239    releases = []
240    if self._options.branch == 'recent':
241      # Get only recent development on trunk, beta and stable.
242      if self._options.max_releases == 0:  # pragma: no cover
243        self._options.max_releases = 10
244      beta, stable = SortBranches(branches)[0:2]
245      releases += self.GetReleasesFromBranch(stable)
246      releases += self.GetReleasesFromBranch(beta)
247      releases += self.GetReleasesFromBranch("trunk")
248      releases += self.GetReleasesFromBranch("bleeding_edge")
249    elif self._options.branch == 'all':  # pragma: no cover
250      # Retrieve the full release history.
251      for branch in branches:
252        releases += self.GetReleasesFromBranch(branch)
253      releases += self.GetReleasesFromBranch("trunk")
254      releases += self.GetReleasesFromBranch("bleeding_edge")
255    else:  # pragma: no cover
256      # Retrieve history for a specified branch.
257      assert self._options.branch in branches + ["trunk", "bleeding_edge"]
258      releases += self.GetReleasesFromBranch(self._options.branch)
259
260    self["releases"] = sorted(releases,
261                              key=lambda r: SortingKey(r["version"]),
262                              reverse=True)
263
264
265class SwitchChromium(Step):
266  MESSAGE = "Switch to Chromium checkout."
267
268  def RunStep(self):
269    cwd = self._options.chromium
270    # Check for a clean workdir.
271    if not self.GitIsWorkdirClean(cwd=cwd):  # pragma: no cover
272      self.Die("Workspace is not clean. Please commit or undo your changes.")
273    # Assert that the DEPS file is there.
274    if not os.path.exists(os.path.join(cwd, "DEPS")):  # pragma: no cover
275      self.Die("DEPS file not present.")
276
277
278class UpdateChromiumCheckout(Step):
279  MESSAGE = "Update the checkout and create a new branch."
280
281  def RunStep(self):
282    cwd = self._options.chromium
283    self.GitCheckout("master", cwd=cwd)
284    self.GitPull(cwd=cwd)
285    self.GitCreateBranch(self.Config("BRANCHNAME"), cwd=cwd)
286
287
288def ConvertToCommitNumber(step, revision):
289  # Simple check for git hashes.
290  if revision.isdigit() and len(revision) < 8:
291    return revision
292  return step.GitConvertToSVNRevision(
293      revision, cwd=os.path.join(step._options.chromium, "v8"))
294
295
296class RetrieveChromiumV8Releases(Step):
297  MESSAGE = "Retrieve V8 releases from Chromium DEPS."
298
299  def RunStep(self):
300    cwd = self._options.chromium
301    releases = filter(
302        lambda r: r["branch"] in ["trunk", "bleeding_edge"], self["releases"])
303    if not releases:  # pragma: no cover
304      print "No releases detected. Skipping chromium history."
305      return True
306
307    # Update v8 checkout in chromium.
308    self.GitFetchOrigin(cwd=os.path.join(cwd, "v8"))
309
310    oldest_v8_rev = int(releases[-1]["revision"])
311
312    cr_releases = []
313    try:
314      for git_hash in self.GitLog(
315          format="%H", grep="V8", cwd=cwd).splitlines():
316        if "DEPS" not in self.GitChangedFiles(git_hash, cwd=cwd):
317          continue
318        if not self.GitCheckoutFileSafe("DEPS", git_hash, cwd=cwd):
319          break  # pragma: no cover
320        deps = FileToText(os.path.join(cwd, "DEPS"))
321        match = DEPS_RE.search(deps)
322        if match:
323          cr_rev = self.GetCommitPositionNumber(git_hash, cwd=cwd)
324          if cr_rev:
325            v8_rev = ConvertToCommitNumber(self, match.group(1))
326            cr_releases.append([cr_rev, v8_rev])
327
328          # Stop after reaching beyond the last v8 revision we want to update.
329          # We need a small buffer for possible revert/reland frenzies.
330          # TODO(machenbach): Subtraction is not git friendly.
331          if int(v8_rev) < oldest_v8_rev - 100:
332            break  # pragma: no cover
333
334    # Allow Ctrl-C interrupt.
335    except (KeyboardInterrupt, SystemExit):  # pragma: no cover
336      pass
337
338    # Clean up.
339    self.GitCheckoutFileSafe("DEPS", "HEAD", cwd=cwd)
340
341    # Add the chromium ranges to the v8 trunk and bleeding_edge releases.
342    all_ranges = BuildRevisionRanges(cr_releases)
343    releases_dict = dict((r["revision"], r) for r in releases)
344    for revision, ranges in all_ranges.iteritems():
345      releases_dict.get(revision, {})["chromium_revision"] = ranges
346
347
348# TODO(machenbach): Unify common code with method above.
349class RietrieveChromiumBranches(Step):
350  MESSAGE = "Retrieve Chromium branch information."
351
352  def RunStep(self):
353    cwd = self._options.chromium
354    trunk_releases = filter(lambda r: r["branch"] == "trunk", self["releases"])
355    if not trunk_releases:  # pragma: no cover
356      print "No trunk releases detected. Skipping chromium history."
357      return True
358
359    oldest_v8_rev = int(trunk_releases[-1]["revision"])
360
361    # Filter out irrelevant branches.
362    branches = filter(lambda r: re.match(r"branch-heads/\d+", r),
363                      self.GitRemotes(cwd=cwd))
364
365    # Transform into pure branch numbers.
366    branches = map(lambda r: int(re.match(r"branch-heads/(\d+)", r).group(1)),
367                   branches)
368
369    branches = sorted(branches, reverse=True)
370
371    cr_branches = []
372    try:
373      for branch in branches:
374        if not self.GitCheckoutFileSafe("DEPS",
375                                        "branch-heads/%d" % branch,
376                                        cwd=cwd):
377          break  # pragma: no cover
378        deps = FileToText(os.path.join(cwd, "DEPS"))
379        match = DEPS_RE.search(deps)
380        if match:
381          v8_rev = ConvertToCommitNumber(self, match.group(1))
382          cr_branches.append([str(branch), v8_rev])
383
384          # Stop after reaching beyond the last v8 revision we want to update.
385          # We need a small buffer for possible revert/reland frenzies.
386          # TODO(machenbach): Subtraction is not git friendly.
387          if int(v8_rev) < oldest_v8_rev - 100:
388            break  # pragma: no cover
389
390    # Allow Ctrl-C interrupt.
391    except (KeyboardInterrupt, SystemExit):  # pragma: no cover
392      pass
393
394    # Clean up.
395    self.GitCheckoutFileSafe("DEPS", "HEAD", cwd=cwd)
396
397    # Add the chromium branches to the v8 trunk releases.
398    all_ranges = BuildRevisionRanges(cr_branches)
399    trunk_dict = dict((r["revision"], r) for r in trunk_releases)
400    for revision, ranges in all_ranges.iteritems():
401      trunk_dict.get(revision, {})["chromium_branch"] = ranges
402
403
404class CleanUp(Step):
405  MESSAGE = "Clean up."
406
407  def RunStep(self):
408    self.GitCheckout("master", cwd=self._options.chromium)
409    self.GitDeleteBranch(self.Config("BRANCHNAME"), cwd=self._options.chromium)
410    self.CommonCleanup()
411
412
413class WriteOutput(Step):
414  MESSAGE = "Print output."
415
416  def Run(self):
417    if self._options.csv:
418      with open(self._options.csv, "w") as f:
419        writer = csv.DictWriter(f,
420                                ["version", "branch", "revision",
421                                 "chromium_revision", "patches_merged"],
422                                restval="",
423                                extrasaction="ignore")
424        for release in self["releases"]:
425          writer.writerow(release)
426    if self._options.json:
427      with open(self._options.json, "w") as f:
428        f.write(json.dumps(self["releases"]))
429    if not self._options.csv and not self._options.json:
430      print self["releases"]  # pragma: no cover
431
432
433class Releases(ScriptsBase):
434  def _PrepareOptions(self, parser):
435    parser.add_argument("-b", "--branch", default="recent",
436                        help=("The branch to analyze. If 'all' is specified, "
437                              "analyze all branches. If 'recent' (default) "
438                              "is specified, track beta, stable and trunk."))
439    parser.add_argument("-c", "--chromium",
440                        help=("The path to your Chromium src/ "
441                              "directory to automate the V8 roll."))
442    parser.add_argument("--csv", help="Path to a CSV file for export.")
443    parser.add_argument("-m", "--max-releases", type=int, default=0,
444                        help="The maximum number of releases to track.")
445    parser.add_argument("--json", help="Path to a JSON file for export.")
446
447  def _ProcessOptions(self, options):  # pragma: no cover
448    return True
449
450  def _Config(self):
451    return {
452      "BRANCHNAME": "retrieve-v8-releases",
453      "PERSISTFILE_BASENAME": "/tmp/v8-releases-tempfile",
454    }
455
456  def _Steps(self):
457    return [
458      Preparation,
459      RetrieveV8Releases,
460      SwitchChromium,
461      UpdateChromiumCheckout,
462      RetrieveChromiumV8Releases,
463      RietrieveChromiumBranches,
464      CleanUp,
465      WriteOutput,
466    ]
467
468
469if __name__ == "__main__":  # pragma: no cover
470  sys.exit(Releases().Run())
471