15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/adjustment_method.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <algorithm>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <limits>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <list>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map>
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <set>
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/format_macros.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
185e3f23d412006dc4db4e659864679f29341e113fTorne (Richard Coles)#include "base/strings/stringprintf.h"
19eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "base/time/time.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/assembly_program.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/courgette.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "courgette/encoded_program.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Shingle weighting matching.
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)We have a sequence S1 of symbols from alphabet A1={A,B,C,...} called the 'model'
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)and a second sequence of S2 of symbols from alphabet A2={U,V,W,....} called the
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)'program'.  Each symbol in A1 has a unique numerical name or index.  We can
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)transcribe the sequence S1 to a sequence T1 of indexes of the symbols. We wish
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)to assign indexes to the symbols in A2 so that when we transcribe S2 into T2, T2
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)has long subsequences that occur in T1.  This will ensure that the sequence
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)T1;T2 compresses to be only slightly larger than the compressed T1.
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)The algorithm for matching members of S2 with members of S1 is eager - it makes
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)matches without backtracking, until no more matches can be made.  Each variable
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)(symbol) U,V,... in A2 has a set of candidates from A1, each candidate with a
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)weight summarizing the evidence for the match.  We keep a VariableQueue of
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)U,V,... sorted by how much the evidence for the best choice outweighs the
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)evidence for the second choice, i.e. prioritized by how 'clear cut' the best
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)assignment is.  We pick the variable with the most clear-cut candidate, make the
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)assignment, adjust the evidence and repeat.
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)What has not been described so far is how the evidence is gathered and
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)maintained.  We are working under the assumption that S1 and S2 are largely
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)similar.  (A different assumption might be that S1 and S2 are dissimilar except
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)for many long subsequences.)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)A naive algorithm would consider all pairs (A,U) and for each pair assess the
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)benefit, or score, the assignment U:=A.  The score might count the number of
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)occurrences of U in S2 which appear in similar contexts to A in S1.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)To distinguish contexts we view S1 and S2 as a sequence of overlapping k-length
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)substrings or 'shingles'.  Two shingles are compatible if the symbols in one
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)shingle could be matched with the symbols in the other symbol.  For example, ABC
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)is *not* compatible with UVU because it would require conflicting matches A=U
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)and C=U.  ABC is compatible with UVW, UWV, WUV, VUW etc.  We can't tell which
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)until we make an assignment - the compatible shingles form an equivalence class.
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)After assigning U:=A then only UVW and UWV (equivalently AVW, AWV) are
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)compatible.  As we make assignments the number of equivalence classes of
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)shingles increases and the number of members of each equivalence class
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)decreases.  The compatibility test becomes more restrictive.
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)We gather evidence for the potential assignment U:=A by counting how many
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)shingles containing U are compatible with shingles containing A.  Thus symbols
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)occurring a large number of times in compatible contexts will be assigned first.
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Finding the 'most clear-cut' assignment by considering all pairs symbols and for
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)each pair comparing the contexts of each pair of occurrences of the symbols is
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)computationally infeasible.  We get the job done in a reasonable time by
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)approaching it 'backwards' and making incremental changes as we make
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)assignments.
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)First the shingles are partitioned according to compatibility.  In S1=ABCDD and
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)S2=UVWXX we have a total of 6 shingles, each occuring once. (ABC:1 BCD:1 CDD:1;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)UVW:1 VWX: WXX:1) all fit the pattern <V0 V1 V2> or the pattern <V0 V1 V1>.  The
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)first pattern indicates that each position matches a different symbol, the
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)second pattern indicates that the second symbol is repeated.
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pattern      S1 members      S2 members
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  <V0 V1 V2>:  {ABC:1, BCD:1}; {UVW:1, VWX:1}
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  <V0 V1 V1>:  {CDD:1}         {WXX:1}
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)The second pattern appears to have a unique assignment but we don't make the
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)assignment on such scant evidence.  If S1 and S2 do not match exactly, there
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)will be numerous spurious low-score matches like this.  Instead we must see what
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)assignments are indicated by considering all of the evidence.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)First pattern has 2 x 2 = 4 shingle pairs.  For each pair we count the number
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)of symbol assignments.  For ABC:a * UVW:b accumulate min(a,b) to each of
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  {U:=A, V:=B, W:=C}.
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)After accumulating over all 2 x 2 pairs:
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  U: {A:1  B:1}
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  V: {A:1  B:2  C:1}
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  W: {B:1  C:2  D:1 }
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  X: {C:1  D:1}
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)The second pattern contributes:
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  W: {C:1}
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  X: {D:2}
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Sum:
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  U: {A:1  B:1}
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  V: {A:1  B:2  C:1}
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  W: {B:1  C:3  D:1}
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  X: {C:1  D:3}
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)From this we decide to assign X:=D (because this assignment has both the largest
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)difference above the next candidate (X:=C) and this is also the largest
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)proportionately over the sum of alternatives).
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Lets assume D has numerical 'name' 77.  The assignment X:=D sets X to 77 too.
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Next we repartition all the shingles containing X or D:
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pattern      S1 members      S2 members
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  <V0 V1 V2>:  {ABC:1};        {UVW:1}
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  <V0 V1 77>:  {BCD:1};        {VWX:1}
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  <V0 77 77>:  {CDD:1}         {WXX:1}
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)As we repartition, we recalculate the contributions to the scores:
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  U: {A:1}
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  V: {B:2}
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  W: {C:3}
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)All the remaining assignments are now fixed.
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)There is one step in the incremental algorithm that is still infeasibly
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)expensive: the contributions due to the cross product of large equivalence
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)classes.  We settle for making an approximation by computing the contribution of
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)the cross product of only the most common shingles.  The hope is that the noise
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from the long tail of uncounted shingles is well below the scores being used to
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)pick assignments.  The second hope is that as assignment are made, the large
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)equivalence class will be partitioned into smaller equivalence classes, reducing
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)the noise over time.
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)In the code below the shingles are bigger (Shingle::kWidth = 5).
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Class ShinglePattern holds the data for one pattern.
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)There is an optimization for this case:
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  <V0 V1 V1>:  {CDD:1}         {WXX:1}
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Above we said that we don't make an assignment on this "scant evidence".  There
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)is an exception: if there is only one variable unassigned (more like the <V0 77
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)77> pattern) AND there are no occurrences of C and W other than those counted in
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)this pattern, then there is no competing evidence and we go ahead with the
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)assignment immediately.  This produces slightly better results because these
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)cases tend to be low-scoring and susceptible to small mistakes made in
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)low-scoring assignments in the approximation for large equivalence classes.
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace courgette {
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace adjustment_method_2 {
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)////////////////////////////////////////////////////////////////////////////////
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AssignmentCandidates;
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class LabelInfoMaker;
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Shingle;
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ShinglePattern;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The purpose of adjustment is to assign indexes to Labels of a program 'p' to
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// make the sequence of indexes similar to a 'model' program 'm'.  Labels
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// themselves don't have enough information to do this job, so we work with a
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// LabelInfo surrogate for each label.
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class LabelInfo {
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Just a no-argument constructor and copy constructor.  Actual LabelInfo
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // objects are allocated in std::pair structs in a std::map.
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfo()
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      : label_(NULL), is_model_(false), debug_index_(0), refs_(0),
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        assignment_(NULL), candidates_(NULL)
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  {}
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~LabelInfo();
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AssignmentCandidates* candidates();
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Label* label_;             // The label that this info a surrogate for.
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 is_model_ : 1;      // Is the label in the model?
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 debug_index_ : 31;  // A small number for naming the label in debug
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             // output. The pair (is_model_, debug_index_) is
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             // unique.
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int refs_;                 // Number of times this Label is referenced.
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfo* assignment_;    // Label from other program corresponding to this.
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<uint32> positions_;  // Offsets into the trace of references.
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AssignmentCandidates* candidates_;
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void operator=(const LabelInfo*);  // Disallow assignment only.
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Public compiler generated copy constructor is needed to constuct
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // std::pair<Label*, LabelInfo> so that fresh LabelInfos can be allocated
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // inside a std::map.
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef std::vector<LabelInfo*> Trace;
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string ToString(const LabelInfo* info) {
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string s;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::StringAppendF(&s, "%c%d", "pm"[info->is_model_], info->debug_index_);
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (info->label_->index_ != Label::kNoIndex)
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::StringAppendF(&s, " (%d)", info->label_->index_);
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::StringAppendF(&s, " #%u", info->refs_);
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// LabelInfoMaker maps labels to their surrogate LabelInfo objects.
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class LabelInfoMaker {
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfoMaker() : debug_label_index_gen_(0) {}
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfo* MakeLabelInfo(Label* label, bool is_model, uint32 position) {
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo& slot = label_infos_[label];
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (slot.label_ == NULL) {
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      slot.label_ = label;
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      slot.is_model_ = is_model;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      slot.debug_index_ = ++debug_label_index_gen_;
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    slot.positions_.push_back(position);
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++slot.refs_;
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return &slot;
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ResetDebugLabel() { debug_label_index_gen_ = 0; }
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int debug_label_index_gen_;
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note LabelInfo is allocated 'flat' inside map::value_type, so the LabelInfo
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // lifetimes are managed by the map.
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::map<Label*, LabelInfo> label_infos_;
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(LabelInfoMaker);
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct OrderLabelInfo {
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool operator()(const LabelInfo* a, const LabelInfo* b) const {
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (a->label_->rva_ < b->label_->rva_) return true;
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (a->label_->rva_ > b->label_->rva_) return false;
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (a == b) return false;
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return a->positions_ < b->positions_;  // Lexicographic ordering of vector.
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// AssignmentCandidates is a priority queue of candidate assignments to
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a single program LabelInfo, |program_info_|.
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AssignmentCandidates {
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit AssignmentCandidates(LabelInfo* program_info)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      : program_info_(program_info) {}
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfo* program_info() const { return program_info_; }
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool empty() const { return label_to_score_.empty(); }
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfo* top_candidate() const { return queue_.begin()->second; }
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Update(LabelInfo* model_info, int delta_score) {
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(delta_score != 0);
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int old_score = 0;
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int new_score = 0;
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelToScore::iterator p = label_to_score_.find(model_info);
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (p != label_to_score_.end()) {
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      old_score = p->second;
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      new_score = old_score + delta_score;
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      queue_.erase(ScoreAndLabel(old_score, p->first));
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (new_score == 0) {
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        label_to_score_.erase(p);
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        p->second = new_score;
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        queue_.insert(ScoreAndLabel(new_score, model_info));
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      new_score = delta_score;
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      label_to_score_.insert(std::make_pair(model_info, new_score));
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      queue_.insert(ScoreAndLabel(new_score, model_info));
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(queue_.size() == label_to_score_.size());
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int TopScore() const {
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int first_value = 0;
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int second_value = 0;
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Queue::const_iterator p = queue_.begin();
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (p != queue_.end()) {
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      first_value = p->first;
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++p;
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (p != queue_.end()) {
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        second_value = p->first;
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return first_value - second_value;
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool HasPendingUpdates() { return !pending_updates_.empty(); }
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddPendingUpdate(LabelInfo* model_info, int delta_score) {
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(delta_score != 0);
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pending_updates_[model_info] += delta_score;
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ApplyPendingUpdates() {
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // TODO(sra): try to walk |pending_updates_| and |label_to_score_| in
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // lockstep.  Try to batch updates to |queue_|.
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t zeroes = 0;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (LabelToScore::iterator p = pending_updates_.begin();
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != pending_updates_.end();
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (p->second != 0)
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        Update(p->first, p->second);
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      else
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++zeroes;
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pending_updates_.clear();
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Print(int max) {
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    VLOG(2) << "score "  << TopScore() << "  " << ToString(program_info_)
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << " := ?";
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!pending_updates_.empty())
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(2) << pending_updates_.size() << " pending";
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int count = 0;
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (Queue::iterator q = queue_.begin();  q != queue_.end();  ++q) {
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (++count > max) break;
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(2) << "   " << q->first << "  " << ToString(q->second);
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::map<LabelInfo*, int, OrderLabelInfo> LabelToScore;
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::pair<int, LabelInfo*> ScoreAndLabel;
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct OrderScoreAndLabelByScoreDecreasing {
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    OrderLabelInfo tie_breaker;
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool operator()(const ScoreAndLabel& a, const ScoreAndLabel& b) const {
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (a.first > b.first) return true;
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (a.first < b.first) return false;
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return tie_breaker(a.second, b.second);
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::set<ScoreAndLabel, OrderScoreAndLabelByScoreDecreasing> Queue;
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfo* program_info_;
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelToScore label_to_score_;
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelToScore pending_updates_;
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Queue queue_;
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)AssignmentCandidates* LabelInfo::candidates() {
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (candidates_ == NULL)
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    candidates_ = new AssignmentCandidates(this);
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return candidates_;
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)LabelInfo::~LabelInfo() {
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete candidates_;
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A Shingle is a short fixed-length string of LabelInfos that actually occurs
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in a Trace.  A Shingle may occur many times.  We repesent the Shingle by the
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// position of one of the occurrences in the Trace.
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Shingle {
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const size_t kWidth = 5;
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct InterningLess {
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool operator()(const Shingle& a, const Shingle& b) const;
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::set<Shingle, InterningLess> OwningSet;
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Shingle* Find(const Trace& trace, size_t position,
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       OwningSet* owning_set) {
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::pair<OwningSet::iterator, bool> pair =
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        owning_set->insert(Shingle(trace, position));
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // pair.first iterator 'points' to the newly inserted Shingle or the
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // previouly inserted one that looks the same according to the comparator.
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // const_cast required because key is const.  We modify the Shingle
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // extensively but not in a way that affects InterningLess.
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Shingle* shingle = const_cast<Shingle*>(&*pair.first);
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    shingle->add_position(position);
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return shingle;
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfo* at(size_t i) const { return trace_[exemplar_position_ + i]; }
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void add_position(size_t position) {
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    positions_.push_back(static_cast<uint32>(position));
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int position_count() const { return static_cast<int>(positions_.size()); }
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool InModel() const { return at(0)->is_model_; }
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ShinglePattern* pattern() const { return pattern_; }
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_pattern(ShinglePattern* pattern) { pattern_ = pattern; }
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct PointerLess {
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool operator()(const Shingle* a, const Shingle* b) const {
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Arbitrary but repeatable (memory-address) independent ordering:
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return a->exemplar_position_ < b->exemplar_position_;
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // return InterningLess()(*a, *b);
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Shingle(const Trace& trace, size_t exemplar_position)
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      : trace_(trace),
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        exemplar_position_(exemplar_position),
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        pattern_(NULL) {
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Trace& trace_;             // The shingle lives inside trace_.
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t exemplar_position_;       // At this position (and other positions).
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<uint32> positions_;  // Includes exemplar_position_.
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ShinglePattern* pattern_;       // Pattern changes as LabelInfos are assigned.
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  friend std::string ToString(const Shingle* instance);
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We can't disallow the copy constructor because we use std::set<Shingle> and
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // VS2005's implementation of std::set<T>::set() requires T to have a copy
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // constructor.
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   DISALLOW_COPY_AND_ASSIGN(Shingle);
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void operator=(const Shingle&);  // Disallow assignment only.
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string ToString(const Shingle* instance) {
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string s;
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* sep = "<";
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < Shingle::kWidth; ++i) {
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // base::StringAppendF(&s, "%s%x ", sep, instance.at(i)->label_->rva_);
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += sep;
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += ToString(instance->at(i));
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sep = ", ";
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::StringAppendF(&s, ">(%" PRIuS ")@{%d}",
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      instance->exemplar_position_,
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      instance->position_count());
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Shingle::InterningLess::operator()(
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Shingle& a,
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Shingle& b) const {
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0;  i < kWidth;  ++i) {
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo* info_a = a.at(i);
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo* info_b = b.at(i);
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (info_a->label_->rva_ < info_b->label_->rva_)
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (info_a->label_->rva_ > info_b->label_->rva_)
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (info_a->is_model_ < info_b->is_model_)
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (info_a->is_model_ > info_b->is_model_)
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (info_a != info_b) {
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      NOTREACHED();
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ShinglePattern {
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum { kOffsetMask = 7,  // Offset lives in low bits.
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         kFixed    = 0,    // kind & kVariable == 0  => fixed.
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         kVariable = 8     // kind & kVariable == 1  => variable.
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         };
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // sequence[position + (kinds_[i] & kOffsetMask)] gives LabelInfo for position
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // i of shingle.  Below, second 'A' is duplicate of position 1, second '102'
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // is duplicate of position 0.
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   <102, A, 103, A , 102>
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      --> <kFixed+0, kVariable+1, kFixed+2, kVariable+1, kFixed+0>
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct Index {
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    explicit Index(const Shingle* instance);
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint8 kinds_[Shingle::kWidth];
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint8 variables_;
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint8 unique_variables_;
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint8 first_variable_index_;
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32 hash_;
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int assigned_indexes_[Shingle::kWidth];
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ShinglePattern keeps histograms of member Shingle instances, ordered by
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // decreasing number of occurrences.  We don't have a pair (occurrence count,
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Shingle instance), so we use a FreqView adapter to make the instance
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // pointer look like the pair.
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class FreqView {
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   public:
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    explicit FreqView(const Shingle* instance) : instance_(instance) {}
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int count() const { return instance_->position_count(); }
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Shingle* instance() const { return instance_; }
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    struct Greater {
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      bool operator()(const FreqView& a, const FreqView& b) const {
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (a.count() > b.count()) return true;
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (a.count() < b.count()) return false;
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return resolve_ties(a.instance(), b.instance());
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     private:
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Shingle::PointerLess resolve_ties;
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    };
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   private:
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Shingle* instance_;
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::set<FreqView, FreqView::Greater> Histogram;
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ShinglePattern() : index_(NULL), model_coverage_(0), program_coverage_(0) {}
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Index* index_;  // Points to the key in the owning map value_type.
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Histogram model_histogram_;
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Histogram program_histogram_;
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int model_coverage_;
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int program_coverage_;
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string ToString(const ShinglePattern::Index* index) {
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string s;
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (index == NULL) {
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s = "<null>";
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::StringAppendF(&s, "<%d: ", index->variables_);
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const char* sep = "";
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = 0;  i < Shingle::kWidth;  ++i) {
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      s += sep;
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sep = ", ";
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      uint32 kind = index->kinds_[i];
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int offset = kind & ShinglePattern::kOffsetMask;
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (kind & ShinglePattern::kVariable)
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base::StringAppendF(&s, "V%d", offset);
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      else
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base::StringAppendF(&s, "%d", index->assigned_indexes_[offset]);
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::StringAppendF(&s, " %x", index->hash_);
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += ">";
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string HistogramToString(const ShinglePattern::Histogram& histogram,
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              size_t snippet_max) {
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string s;
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t histogram_size = histogram.size();
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t snippet_size = 0;
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (ShinglePattern::Histogram::const_iterator p = histogram.begin();
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       p != histogram.end();
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++p) {
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (++snippet_size > snippet_max && snippet_size != histogram_size) {
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      s += " ...";
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::StringAppendF(&s, " %d", p->count());
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string HistogramToStringFull(const ShinglePattern::Histogram& histogram,
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  const char* indent,
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  size_t snippet_max) {
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string s;
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t histogram_size = histogram.size();
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t snippet_size = 0;
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (ShinglePattern::Histogram::const_iterator p = histogram.begin();
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       p != histogram.end();
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++p) {
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += indent;
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (++snippet_size > snippet_max && snippet_size != histogram_size) {
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      s += "...\n";
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::StringAppendF(&s, "(%d) ", p->count());
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += ToString(&(*p->instance()));
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += "\n";
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string ToString(const ShinglePattern* pattern, size_t snippet_max = 3) {
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string s;
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (pattern == NULL) {
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s = "<null>";
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s = "{";
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += ToString(pattern->index_);
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::StringAppendF(&s, ";  %d(%d):",
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        static_cast<int>(pattern->model_histogram_.size()),
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        pattern->model_coverage_);
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += HistogramToString(pattern->model_histogram_, snippet_max);
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::StringAppendF(&s, ";  %d(%d):",
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        static_cast<int>(pattern->program_histogram_.size()),
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        pattern->program_coverage_);
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += HistogramToString(pattern->program_histogram_, snippet_max);
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += "}";
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string ShinglePatternToStringFull(const ShinglePattern* pattern,
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       size_t max) {
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string s;
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  s += ToString(pattern->index_);
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  s += "\n";
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t model_size = pattern->model_histogram_.size();
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t program_size = pattern->program_histogram_.size();
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::StringAppendF(&s, "  model shingles %" PRIuS "\n", model_size);
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  s += HistogramToStringFull(pattern->model_histogram_, "    ", max);
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::StringAppendF(&s, "  program shingles %" PRIuS "\n", program_size);
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  s += HistogramToStringFull(pattern->program_histogram_, "    ", max);
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct ShinglePatternIndexLess {
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool operator()(const ShinglePattern::Index& a,
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  const ShinglePattern::Index& b) const {
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (a.hash_ < b.hash_) return true;
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (a.hash_ > b.hash_) return false;
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = 0;  i < Shingle::kWidth;  ++i) {
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (a.kinds_[i] < b.kinds_[i]) return true;
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (a.kinds_[i] > b.kinds_[i]) return false;
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ((a.kinds_[i] & ShinglePattern::kVariable) == 0) {
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (a.assigned_indexes_[i] < b.assigned_indexes_[i])
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return true;
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (a.assigned_indexes_[i] > b.assigned_indexes_[i])
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return false;
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static uint32 hash_combine(uint32 h, uint32 v) {
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  h += v;
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (h * (37 + 0x0000d100)) ^ (h >> 13);
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ShinglePattern::Index::Index(const Shingle* instance) {
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 hash = 0;
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  variables_ = 0;
6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unique_variables_ = 0;
6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  first_variable_index_ = 255;
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (uint32 i = 0; i < Shingle::kWidth; ++i) {
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo* info = instance->at(i);
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32 kind = 0;
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int code = -1;
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t j = 0;
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for ( ; j < i; ++j) {
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (info == instance->at(j)) {  // Duplicate LabelInfo
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        kind = kinds_[j];
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (j == i) {  // Not found above.
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (info->assignment_) {
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        code = info->label_->index_;
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        assigned_indexes_[i] = code;
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        kind = kFixed + i;
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        kind = kVariable + i;
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++unique_variables_;
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (i < first_variable_index_)
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          first_variable_index_ = i;
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (kind & kVariable) ++variables_;
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    hash = hash_combine(hash, code);
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    hash = hash_combine(hash, kind);
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    kinds_[i] = kind;
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    assigned_indexes_[i] = code;
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hash_ = hash;
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct ShinglePatternLess {
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool operator()(const ShinglePattern& a, const ShinglePattern& b) const {
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return index_less(*a.index_, *b.index_);
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ShinglePatternIndexLess index_less;
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct ShinglePatternPointerLess {
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool operator()(const ShinglePattern* a, const ShinglePattern* b) const {
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return pattern_less(*a, *b);
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ShinglePatternLess pattern_less;
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template<int (*Scorer)(const ShinglePattern*)>
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct OrderShinglePatternByScoreDescending {
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool operator()(const ShinglePattern* a, const ShinglePattern* b) const {
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int score_a = Scorer(a);
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int score_b = Scorer(b);
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (score_a > score_b) return true;
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (score_a < score_b) return false;
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return break_ties(a, b);
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ShinglePatternPointerLess break_ties;
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns a score for a 'Single Use' rule.  Returns -1 if the rule is not
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// applicable.
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int SingleUseScore(const ShinglePattern* pattern) {
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (pattern->index_->variables_ != 1)
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return -1;
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (pattern->model_histogram_.size() != 1 ||
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->program_histogram_.size() != 1)
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return -1;
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Does this pattern account for all uses of the variable?
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const ShinglePattern::FreqView& program_freq =
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *pattern->program_histogram_.begin();
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const ShinglePattern::FreqView& model_freq =
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *pattern->model_histogram_.begin();
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int p1 = program_freq.count();
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int m1 = model_freq.count();
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (p1 == m1) {
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Shingle* program_instance = program_freq.instance();
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Shingle* model_instance = model_freq.instance();
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t variable_index = pattern->index_->first_variable_index_;
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo* program_info = program_instance->at(variable_index);
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo* model_info = model_instance->at(variable_index);
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!program_info->assignment_) {
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (program_info->refs_ == p1 && model_info->refs_ == m1) {
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return p1;
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return -1;
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The VariableQueue is a priority queue of unassigned LabelInfos from
7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the 'program' (the 'variables') and their AssignmentCandidates.
7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class VariableQueue {
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::pair<int, LabelInfo*> ScoreAndLabel;
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VariableQueue() {}
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool empty() const { return queue_.empty(); }
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const ScoreAndLabel& first() const { return *queue_.begin(); }
7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For debugging only.
7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Print() const {
7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (Queue::const_iterator p = queue_.begin();  p != queue_.end();  ++p) {
7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AssignmentCandidates* candidates = p->second->candidates();
7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      candidates->Print(std::numeric_limits<int>::max());
7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddPendingUpdate(LabelInfo* program_info, LabelInfo* model_info,
7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        int delta_score) {
7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AssignmentCandidates* candidates = program_info->candidates();
7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!candidates->HasPendingUpdates()) {
7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pending_update_candidates_.push_back(candidates);
7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    candidates->AddPendingUpdate(model_info, delta_score);
7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ApplyPendingUpdates() {
7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = 0;  i < pending_update_candidates_.size();  ++i) {
7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AssignmentCandidates* candidates = pending_update_candidates_[i];
7725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int old_score = candidates->TopScore();
7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      queue_.erase(ScoreAndLabel(old_score, candidates->program_info()));
7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      candidates->ApplyPendingUpdates();
7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!candidates->empty()) {
7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int new_score = candidates->TopScore();
7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        queue_.insert(ScoreAndLabel(new_score, candidates->program_info()));
7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pending_update_candidates_.clear();
7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct OrderScoreAndLabelByScoreDecreasing {
7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool operator()(const ScoreAndLabel& a, const ScoreAndLabel& b) const {
7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (a.first > b.first) return true;
7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (a.first < b.first) return false;
7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return OrderLabelInfo()(a.second, b.second);
7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::set<ScoreAndLabel, OrderScoreAndLabelByScoreDecreasing> Queue;
7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Queue queue_;
7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<AssignmentCandidates*> pending_update_candidates_;
7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(VariableQueue);
7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AssignmentProblem {
8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AssignmentProblem(const Trace& trace, size_t model_end)
8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      : trace_(trace),
8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        model_end_(model_end) {
8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    VLOG(2) << "AssignmentProblem::AssignmentProblem  " << model_end << ", "
8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << trace.size();
8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Solve() {
8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (model_end_ < Shingle::kWidth ||
8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        trace_.size() - model_end_ < Shingle::kWidth) {
8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Nothing much we can do with such a short problem.
8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    instances_.resize(trace_.size() - Shingle::kWidth + 1, NULL);
8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AddShingles(0, model_end_);
8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AddShingles(model_end_, trace_.size());
8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    InitialClassify();
8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AddPatternsNeedingUpdatesToQueues();
8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    patterns_needing_updates_.clear();
8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (FindAndAssignBestLeader())
8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      patterns_needing_updates_.clear();
8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PrintActivePatterns();
8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::set<Shingle*, Shingle::PointerLess> ShingleSet;
8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::set<const ShinglePattern*, ShinglePatternPointerLess>
8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ShinglePatternSet;
8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Patterns are partitioned into the following sets:
8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // * Retired patterns (not stored).  No shingles exist for this pattern (they
8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   all now match more specialized patterns).
8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // * Useless patterns (not stored).  There are no 'program' shingles for this
8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   pattern (they all now match more specialized patterns).
8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // * Single-use patterns - single_use_pattern_queue_.
8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // * Other patterns - active_non_single_use_patterns_ / variable_queue_.
8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::set<const ShinglePattern*,
8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   OrderShinglePatternByScoreDescending<&SingleUseScore> >
8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SingleUsePatternQueue;
8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void PrintPatternsHeader() const {
8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    VLOG(2) << shingle_instances_.size() << " instances  "
8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << trace_.size() << " trace length  "
8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << patterns_.size() << " shingle indexes  "
8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << single_use_pattern_queue_.size() << " single use patterns  "
8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << active_non_single_use_patterns_.size() << " active patterns";
8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void PrintActivePatterns() const {
8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (ShinglePatternSet::const_iterator p =
8585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             active_non_single_use_patterns_.begin();
8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != active_non_single_use_patterns_.end();
8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const ShinglePattern* pattern = *p;
8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(2) << ToString(pattern, 10);
8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void PrintPatterns() const {
8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PrintAllPatterns();
8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PrintActivePatterns();
8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PrintAllShingles();
8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void PrintAllPatterns() const {
8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (IndexToPattern::const_iterator p = patterns_.begin();
8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != patterns_.end();
8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const ShinglePattern& pattern = p->second;
8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(2) << ToString(&pattern, 10);
8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void PrintAllShingles() const {
8825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (Shingle::OwningSet::const_iterator p = shingle_instances_.begin();
8835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != shingle_instances_.end();
8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const Shingle& instance = *p;
8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(2) << ToString(&instance) << "   " << ToString(instance.pattern());
8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddShingles(size_t begin, size_t end) {
8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = begin;  i + Shingle::kWidth - 1 < end;  ++i) {
8935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      instances_[i] = Shingle::Find(trace_, i, &shingle_instances_);
8945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Declassify(Shingle* shingle) {
8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ShinglePattern* pattern = shingle->pattern();
8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (shingle->InModel()) {
9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->model_histogram_.erase(ShinglePattern::FreqView(shingle));
9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->model_coverage_ -= shingle->position_count();
9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->program_histogram_.erase(ShinglePattern::FreqView(shingle));
9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->program_coverage_ -= shingle->position_count();
9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    shingle->set_pattern(NULL);
9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Reclassify(Shingle* shingle) {
9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ShinglePattern* pattern = shingle->pattern();
9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(pattern == NULL);
9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ShinglePattern::Index index(shingle);
9145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (index.variables_ == 0)
9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::pair<IndexToPattern::iterator, bool> inserted =
9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        patterns_.insert(std::make_pair(index, ShinglePattern()));
9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pattern = &inserted.first->second;
9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pattern->index_ = &inserted.first->first;
9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    shingle->set_pattern(pattern);
9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    patterns_needing_updates_.insert(pattern);
9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (shingle->InModel()) {
9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->model_histogram_.insert(ShinglePattern::FreqView(shingle));
9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->model_coverage_ += shingle->position_count();
9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->program_histogram_.insert(ShinglePattern::FreqView(shingle));
9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern->program_coverage_ += shingle->position_count();
9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void InitialClassify() {
9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (Shingle::OwningSet::iterator p = shingle_instances_.begin();
9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != shingle_instances_.end();
9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // GCC's set<T>::iterator::operator *() returns a const object.
9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Reclassify(const_cast<Shingle*>(&*p));
9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For the positions in |info|, find the shingles that overlap that position.
9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddAffectedPositions(LabelInfo* info, ShingleSet* affected_shingles) {
9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const size_t kWidth = Shingle::kWidth;
9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = 0;  i < info->positions_.size();  ++i) {
9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size_t position = info->positions_[i];
9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Find bounds to the subrange of |trace_| we are in.
9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size_t start = position < model_end_ ? 0 : model_end_;
9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size_t end = position < model_end_ ? model_end_ : trace_.size();
9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Clip [position-kWidth+1, position+1)
9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size_t low = position > start + kWidth - 1
9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ? position - kWidth + 1
9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          : start;
9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size_t high = position + kWidth < end ? position + 1 : end - kWidth + 1;
9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (size_t shingle_position = low;
9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           shingle_position < high;
9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ++shingle_position) {
9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        Shingle* overlapping_shingle = instances_.at(shingle_position);
9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        affected_shingles->insert(overlapping_shingle);
9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void RemovePatternsNeedingUpdatesFromQueues() {
9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (ShinglePatternSet::iterator p = patterns_needing_updates_.begin();
9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != patterns_needing_updates_.end();
9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
9715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      RemovePatternFromQueues(*p);
9725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddPatternsNeedingUpdatesToQueues() {
9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (ShinglePatternSet::iterator p = patterns_needing_updates_.begin();
9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != patterns_needing_updates_.end();
9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddPatternToQueues(*p);
9805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    variable_queue_.ApplyPendingUpdates();
9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void RemovePatternFromQueues(const ShinglePattern* pattern) {
9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int single_use_score = SingleUseScore(pattern);
9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (single_use_score > 0) {
9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size_t n = single_use_pattern_queue_.erase(pattern);
9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LOG_ASSERT(n == 1);
9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (pattern->program_histogram_.empty() &&
9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               pattern->model_histogram_.empty()) {
9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      NOTREACHED();  // Should not come back to life.
9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (pattern->program_histogram_.empty()) {
9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Useless pattern.
9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      active_non_single_use_patterns_.erase(pattern);
9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddPatternToLabelQueue(pattern, -1);
9975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddPatternToQueues(const ShinglePattern* pattern) {
10015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int single_use_score = SingleUseScore(pattern);
10025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (single_use_score > 0) {
10035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      single_use_pattern_queue_.insert(pattern);
10045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (pattern->program_histogram_.empty() &&
10055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               pattern->model_histogram_.empty()) {
10065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (pattern->program_histogram_.empty()) {
10075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Useless pattern.
10085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
10095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      active_non_single_use_patterns_.insert(pattern);
10105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddPatternToLabelQueue(pattern, +1);
10115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
10125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddPatternToLabelQueue(const ShinglePattern* pattern, int sign) {
10155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // For each possible assignment in this pattern, update the potential
10165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // contributions to the LabelInfo queues.
10175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We want to find for each symbol (LabelInfo) the maximum contribution that
10195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // could be achieved by making shingle-wise assignments between shingles in
10205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // the model and shingles in the program.
10215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //
10225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If the shingles in the histograms are independent (no two shingles have a
10235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // symbol in common) then any permutation of the assignments is possible,
10245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // and the maximum contribution can be found by taking the maximum over all
10255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // the pairs.
10265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //
10275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If the shingles are dependent two things happen.  The maximum
10285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // contribution to any given symbol is a sum because the symbol has
10295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // contributions from all the shingles containing it.  Second, some
10305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // assignments are blocked by previous incompatible assignments.  We want to
10315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // avoid a combinatorial search, so we ignore the blocking.
10325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const size_t kUnwieldy = 5;
10345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    typedef std::map<LabelInfo*, int> LabelToScore;
10365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    typedef std::map<LabelInfo*, LabelToScore > ScoreSet;
10375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ScoreSet maxima;
10385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t n_model_samples = 0;
10405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (ShinglePattern::Histogram::const_iterator model_iter =
10415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             pattern->model_histogram_.begin();
10425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         model_iter != pattern->model_histogram_.end();
10435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++model_iter) {
10445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (++n_model_samples > kUnwieldy) break;
10455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const ShinglePattern::FreqView& model_freq = *model_iter;
10465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int m1 = model_freq.count();
10475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const Shingle* model_instance = model_freq.instance();
10485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ScoreSet sums;
10505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      size_t n_program_samples = 0;
10515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (ShinglePattern::Histogram::const_iterator program_iter =
10525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               pattern->program_histogram_.begin();
10535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           program_iter != pattern->program_histogram_.end();
10545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ++program_iter) {
10555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (++n_program_samples > kUnwieldy) break;
10565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const ShinglePattern::FreqView& program_freq = *program_iter;
10575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int p1 = program_freq.count();
10585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const Shingle* program_instance = program_freq.instance();
10595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // int score = p1;  // ? weigh all equally??
10615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int score = std::min(p1, m1);
10625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (size_t i = 0;  i < Shingle::kWidth;  ++i) {
10645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LabelInfo* program_info = program_instance->at(i);
10655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LabelInfo* model_info = model_instance->at(i);
10665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if ((model_info->assignment_ == NULL) !=
10675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              (program_info->assignment_ == NULL)) {
10685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            VLOG(2) << "ERROR " << i
10695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << "\n\t"  << ToString(pattern, 10)
10705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << "\n\t" << ToString(program_instance)
10715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << "\n\t" << ToString(model_instance);
10725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
10735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (!program_info->assignment_ && !model_info->assignment_) {
10745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            sums[program_info][model_info] += score;
10755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
10765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
10775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (ScoreSet::iterator assignee_iterator = sums.begin();
10795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             assignee_iterator != sums.end();
10805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             ++assignee_iterator) {
10815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          LabelInfo* program_info = assignee_iterator->first;
10825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          for (LabelToScore::iterator p = assignee_iterator->second.begin();
10835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               p != assignee_iterator->second.end();
10845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               ++p) {
10855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            LabelInfo* model_info = p->first;
10865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            int score = p->second;
10875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            int* slot = &maxima[program_info][model_info];
10885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            *slot = std::max(*slot, score);
10895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
10905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
10915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
10925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
10935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (ScoreSet::iterator assignee_iterator = maxima.begin();
10955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         assignee_iterator != maxima.end();
10965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++assignee_iterator) {
10975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LabelInfo* program_info = assignee_iterator->first;
10985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (LabelToScore::iterator p = assignee_iterator->second.begin();
10995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           p != assignee_iterator->second.end();
11005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ++p) {
11015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LabelInfo* model_info = p->first;
11025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int score = sign * p->second;
11035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        variable_queue_.AddPendingUpdate(program_info, model_info, score);
11045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
11055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
11065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
11075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AssignOne(LabelInfo* model_info, LabelInfo* program_info) {
11095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(!model_info->assignment_);
11105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(!program_info->assignment_);
11115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(model_info->is_model_);
11125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(!program_info->is_model_);
11135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    VLOG(3) << "Assign " << ToString(program_info)
11155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << " := " << ToString(model_info);
11165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ShingleSet affected_shingles;
11185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AddAffectedPositions(model_info, &affected_shingles);
11195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AddAffectedPositions(program_info, &affected_shingles);
11205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (ShingleSet::iterator p = affected_shingles.begin();
11225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != affected_shingles.end();
11235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
11245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      patterns_needing_updates_.insert((*p)->pattern());
11255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
11265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    RemovePatternsNeedingUpdatesFromQueues();
11285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (ShingleSet::iterator p = affected_shingles.begin();
11305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != affected_shingles.end();
11315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
11325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Declassify(*p);
11335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
11345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    program_info->label_->index_ = model_info->label_->index_;
11365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Mark as assigned
11375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model_info->assignment_ = program_info;
11385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    program_info->assignment_ = model_info;
11395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (ShingleSet::iterator p = affected_shingles.begin();
11415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         p != affected_shingles.end();
11425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++p) {
11435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Reclassify(*p);
11445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
11455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AddPatternsNeedingUpdatesToQueues();
11475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
11485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool AssignFirstVariableOfHistogramHead(const ShinglePattern& pattern) {
11505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const ShinglePattern::FreqView& program_1 =
11515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        *pattern.program_histogram_.begin();
11525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const ShinglePattern::FreqView& model_1 = *pattern.model_histogram_.begin();
11535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Shingle* program_instance = program_1.instance();
11545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Shingle* model_instance = model_1.instance();
11555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t variable_index = pattern.index_->first_variable_index_;
11565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo* program_info = program_instance->at(variable_index);
11575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo* model_info = model_instance->at(variable_index);
11585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AssignOne(model_info, program_info);
11595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
11605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
11615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool FindAndAssignBestLeader() {
11635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG_ASSERT(patterns_needing_updates_.empty());
11645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!single_use_pattern_queue_.empty()) {
11665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const ShinglePattern& pattern = **single_use_pattern_queue_.begin();
11675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AssignFirstVariableOfHistogramHead(pattern);
11685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
11695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (variable_queue_.empty())
11715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
11725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const VariableQueue::ScoreAndLabel best = variable_queue_.first();
11745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int score = best.first;
11755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LabelInfo* assignee = best.second;
11765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // TODO(sra): score (best.first) can be zero.  A zero score means we are
11785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // blindly picking between two (or more) alternatives which look the same.
11795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If we exit on the first zero-score we sometimes get 3-4% better total
11805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // compression.  This indicates that 'infill' is doing a better job than
11815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // picking blindly.  Perhaps we can use an extended region around the
11825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // undistinguished competing alternatives to break the tie.
11835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (score == 0) {
11845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      variable_queue_.Print();
11855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
11865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
11875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AssignmentCandidates* candidates = assignee->candidates();
11895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (candidates->empty())
11905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;  // Should not happen.
11915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AssignOne(candidates->top_candidate(), assignee);
11935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
11945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
11955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
11975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The trace vector contains the model sequence [0, model_end_) followed by
11985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the program sequence [model_end_, trace.end())
11995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Trace& trace_;
12005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t model_end_;
12015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // |shingle_instances_| is the set of 'interned' shingles.
12035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Shingle::OwningSet shingle_instances_;
12045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // |instances_| maps from position in |trace_| to Shingle at that position.
12065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<Shingle*> instances_;
12075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SingleUsePatternQueue single_use_pattern_queue_;
12095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ShinglePatternSet active_non_single_use_patterns_;
12105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VariableQueue variable_queue_;
12115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Transient information: when we make an assignment, we need to recompute
12135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // priority queue information derived from these ShinglePatterns.
12145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ShinglePatternSet patterns_needing_updates_;
12155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::map<ShinglePattern::Index,
12175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   ShinglePattern, ShinglePatternIndexLess> IndexToPattern;
12185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IndexToPattern patterns_;
12195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(AssignmentProblem);
12215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
12225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Adjuster : public AdjustmentMethod {
12245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
12255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Adjuster() : prog_(NULL), model_(NULL) {}
12265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~Adjuster() {}
12275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Adjust(const AssemblyProgram& model, AssemblyProgram* program) {
12295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    VLOG(1) << "Adjuster::Adjust";
12305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    prog_ = program;
12315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model_ = &model;
12325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return Finish();
12335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
12345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Finish() {
12365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    prog_->UnassignIndexes();
12375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Trace abs32_trace_;
12385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Trace rel32_trace_;
12395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CollectTraces(model_, &abs32_trace_, &rel32_trace_, true);
12405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t abs32_model_end = abs32_trace_.size();
12415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t rel32_model_end = rel32_trace_.size();
12425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CollectTraces(prog_,  &abs32_trace_,  &rel32_trace_,  false);
12435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Solve(abs32_trace_, abs32_model_end);
12445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Solve(rel32_trace_, rel32_model_end);
12455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    prog_->AssignRemainingIndexes();
12465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
12475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
12485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
12505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void CollectTraces(const AssemblyProgram* program, Trace* abs32, Trace* rel32,
12515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     bool is_model) {
12525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    label_info_maker_.ResetDebugLabel();
12535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const InstructionVector& instructions = program->instructions();
12545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = 0;  i < instructions.size();  ++i) {
12555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Instruction* instruction = instructions[i];
12565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (Label* label = program->InstructionAbs32Label(instruction))
12575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ReferenceLabel(abs32, label, is_model);
12585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (Label* label = program->InstructionRel32Label(instruction))
12595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ReferenceLabel(rel32, label, is_model);
12605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
12615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // TODO(sra): we could simply append all the labels in index order to
12625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // incorporate some costing for entropy (bigger deltas) that will be
12635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // introduced into the label address table by non-monotonic ordering.  This
12645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // would have some knock-on effects to parts of the algorithm that work on
12655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // single-occurrence labels.
12665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
12675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Solve(const Trace& model, size_t model_end) {
12695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::Time start_time = base::Time::Now();
12705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AssignmentProblem a(model, model_end);
12715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    a.Solve();
12725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    VLOG(1) << " Adjuster::Solve "
12735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            << (base::Time::Now() - start_time).InSecondsF();
12745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
12755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ReferenceLabel(Trace* trace, Label* label, bool is_model) {
12775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    trace->push_back(
12785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        label_info_maker_.MakeLabelInfo(label, is_model,
12795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        static_cast<uint32>(trace->size())));
12805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
12815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AssemblyProgram* prog_;         // Program to be adjusted, owned by caller.
12835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const AssemblyProgram* model_;  // Program to be mimicked, owned by caller.
12845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LabelInfoMaker label_info_maker_;
12865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
12885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(Adjuster);
12895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
12905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)////////////////////////////////////////////////////////////////////////////////
12925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace adjustment_method_2
12945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)AdjustmentMethod* AdjustmentMethod::MakeShingleAdjustmentMethod() {
12965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return new adjustment_method_2::Adjuster();
12975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
12985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace courgette
1300