1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34//
35// This file defines static methods and classes for comparing Protocol
36// Messages (see //google/protobuf/util/message_differencer.h for more
37// information).
38
39#include <google/protobuf/util/message_differencer.h>
40
41#include <algorithm>
42#include <memory>
43#ifndef _SHARED_PTR_H
44#include <google/protobuf/stubs/shared_ptr.h>
45#endif
46#include <utility>
47
48#include <google/protobuf/stubs/callback.h>
49#include <google/protobuf/stubs/common.h>
50#include <google/protobuf/stubs/logging.h>
51#include <google/protobuf/stubs/stringprintf.h>
52#include <google/protobuf/any.h>
53#include <google/protobuf/io/printer.h>
54#include <google/protobuf/io/zero_copy_stream.h>
55#include <google/protobuf/io/zero_copy_stream_impl.h>
56#include <google/protobuf/dynamic_message.h>
57#include <google/protobuf/text_format.h>
58#include <google/protobuf/util/field_comparator.h>
59#include <google/protobuf/stubs/strutil.h>
60
61namespace google {
62namespace protobuf {
63
64namespace util {
65
66// When comparing a repeated field as map, MultipleFieldMapKeyComparator can
67// be used to specify multiple fields as key for key comparison.
68// Two elements of a repeated field will be regarded as having the same key
69// iff they have the same value for every specified key field.
70// Note that you can also specify only one field as key.
71class MessageDifferencer::MultipleFieldsMapKeyComparator
72    : public MessageDifferencer::MapKeyComparator {
73 public:
74  MultipleFieldsMapKeyComparator(
75      MessageDifferencer* message_differencer,
76      const vector<vector<const FieldDescriptor*> >& key_field_paths)
77        : message_differencer_(message_differencer),
78          key_field_paths_(key_field_paths) {
79    GOOGLE_CHECK(!key_field_paths_.empty());
80    for (int i = 0; i < key_field_paths_.size(); ++i) {
81      GOOGLE_CHECK(!key_field_paths_[i].empty());
82    }
83  }
84  MultipleFieldsMapKeyComparator(
85      MessageDifferencer* message_differencer,
86      const FieldDescriptor* key)
87        : message_differencer_(message_differencer) {
88    vector<const FieldDescriptor*> key_field_path;
89    key_field_path.push_back(key);
90    key_field_paths_.push_back(key_field_path);
91  }
92  virtual bool IsMatch(
93      const Message& message1,
94      const Message& message2,
95      const vector<SpecificField>& parent_fields) const {
96    for (int i = 0; i < key_field_paths_.size(); ++i) {
97      if (!IsMatchInternal(message1, message2, parent_fields,
98                           key_field_paths_[i], 0)) {
99        return false;
100      }
101    }
102    return true;
103  }
104 private:
105  bool IsMatchInternal(
106      const Message& message1,
107      const Message& message2,
108      const vector<SpecificField>& parent_fields,
109      const vector<const FieldDescriptor*>& key_field_path,
110      int path_index) const {
111    const FieldDescriptor* field = key_field_path[path_index];
112    vector<SpecificField> current_parent_fields(parent_fields);
113    if (path_index == key_field_path.size() - 1) {
114      if (field->is_repeated()) {
115        if (!message_differencer_->CompareRepeatedField(
116            message1, message2, field, &current_parent_fields)) {
117          return false;
118        }
119      } else {
120        if (!message_differencer_->CompareFieldValueUsingParentFields(
121            message1, message2, field, -1, -1, &current_parent_fields)) {
122          return false;
123        }
124      }
125      return true;
126    } else {
127      const Reflection* reflection1 = message1.GetReflection();
128      const Reflection* reflection2 = message2.GetReflection();
129      bool has_field1 = reflection1->HasField(message1, field);
130      bool has_field2 = reflection2->HasField(message2, field);
131      if (!has_field1 && !has_field2) {
132        return true;
133      }
134      if (has_field1 != has_field2) {
135        return false;
136      }
137      SpecificField specific_field;
138      specific_field.field = field;
139      current_parent_fields.push_back(specific_field);
140      return IsMatchInternal(
141          reflection1->GetMessage(message1, field),
142          reflection2->GetMessage(message2, field),
143          current_parent_fields,
144          key_field_path,
145          path_index + 1);
146    }
147  }
148  MessageDifferencer* message_differencer_;
149  vector<vector<const FieldDescriptor*> > key_field_paths_;
150  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
151};
152
153bool MessageDifferencer::Equals(const Message& message1,
154                                const Message& message2) {
155  MessageDifferencer differencer;
156
157  return differencer.Compare(message1, message2);
158}
159
160bool MessageDifferencer::Equivalent(const Message& message1,
161                                    const Message& message2) {
162  MessageDifferencer differencer;
163  differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
164
165  return differencer.Compare(message1, message2);
166}
167
168bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
169                                             const Message& message2) {
170  MessageDifferencer differencer;
171  differencer.set_float_comparison(
172      MessageDifferencer::APPROXIMATE);
173
174  return differencer.Compare(message1, message2);
175}
176
177bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
178                                                 const Message& message2) {
179  MessageDifferencer differencer;
180  differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
181  differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
182
183  return differencer.Compare(message1, message2);
184}
185
186// ===========================================================================
187
188MessageDifferencer::MessageDifferencer()
189    : reporter_(NULL),
190      field_comparator_(NULL),
191      message_field_comparison_(EQUAL),
192      scope_(FULL),
193      repeated_field_comparison_(AS_LIST),
194      report_matches_(false),
195      output_string_(NULL) { }
196
197MessageDifferencer::~MessageDifferencer() {
198  for (int i = 0; i < owned_key_comparators_.size(); ++i) {
199    delete owned_key_comparators_[i];
200  }
201  for (int i = 0; i < ignore_criteria_.size(); ++i) {
202    delete ignore_criteria_[i];
203  }
204}
205
206void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
207  GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
208  field_comparator_ = comparator;
209}
210
211void MessageDifferencer::set_message_field_comparison(
212    MessageFieldComparison comparison) {
213  message_field_comparison_ = comparison;
214}
215
216void MessageDifferencer::set_scope(Scope scope) {
217  scope_ = scope;
218}
219
220MessageDifferencer::Scope MessageDifferencer::scope() {
221  return scope_;
222}
223
224void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
225  default_field_comparator_.set_float_comparison(
226      comparison == EXACT ?
227      DefaultFieldComparator::EXACT : DefaultFieldComparator::APPROXIMATE);
228}
229
230void MessageDifferencer::set_repeated_field_comparison(
231    RepeatedFieldComparison comparison) {
232  repeated_field_comparison_ = comparison;
233}
234
235void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
236  GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
237                               << field->full_name();
238  const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
239  GOOGLE_CHECK(key_comparator == NULL)
240      << "Cannot treat this repeated field as both Map and Set for"
241      << " comparison.  Field name is: " << field->full_name();
242  GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
243      << "Cannot treat the same field as both SET and LIST. Field name is: "
244      << field->full_name();
245  set_fields_.insert(field);
246}
247
248void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
249  GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
250                              << field->full_name();
251  const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
252  GOOGLE_CHECK(key_comparator == NULL)
253      << "Cannot treat this repeated field as both Map and Set for"
254      << " comparison.  Field name is: " << field->full_name();
255  GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
256      << "Cannot treat the same field as both SET and LIST. Field name is: "
257      << field->full_name();
258  list_fields_.insert(field);
259}
260
261void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
262                                    const FieldDescriptor* key) {
263  GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
264                               << field->full_name();
265  GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
266      << "Field has to be message type.  Field name is: "
267      << field->full_name();
268  GOOGLE_CHECK(key->containing_type() == field->message_type())
269      << key->full_name()
270      << " must be a direct subfield within the repeated field "
271      << field->full_name() << ", not " << key->containing_type()->full_name();
272  GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
273      << "Cannot treat this repeated field as both Map and Set for "
274      << "comparison.";
275  GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
276      << "Cannot treat this repeated field as both Map and List for "
277      << "comparison.";
278  MapKeyComparator* key_comparator =
279      new MultipleFieldsMapKeyComparator(this, key);
280  owned_key_comparators_.push_back(key_comparator);
281  map_field_key_comparator_[field] = key_comparator;
282}
283
284void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
285    const FieldDescriptor* field,
286    const vector<const FieldDescriptor*>& key_fields) {
287  vector<vector<const FieldDescriptor*> > key_field_paths;
288  for (int i = 0; i < key_fields.size(); ++i) {
289    vector<const FieldDescriptor*> key_field_path;
290    key_field_path.push_back(key_fields[i]);
291    key_field_paths.push_back(key_field_path);
292  }
293  TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
294}
295
296void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
297    const FieldDescriptor* field,
298    const vector<vector<const FieldDescriptor*> >& key_field_paths) {
299  GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
300                              << field->full_name();
301  GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
302      << "Field has to be message type.  Field name is: "
303      << field->full_name();
304  for (int i = 0; i < key_field_paths.size(); ++i) {
305    const vector<const FieldDescriptor*>& key_field_path = key_field_paths[i];
306    for (int j = 0; j < key_field_path.size(); ++j) {
307      const FieldDescriptor* parent_field =
308          j == 0 ? field : key_field_path[j - 1];
309      const FieldDescriptor* child_field = key_field_path[j];
310      GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
311          << child_field->full_name()
312          << " must be a direct subfield within the field: "
313          << parent_field->full_name();
314      if (j != 0) {
315        GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
316            << parent_field->full_name() << " has to be of type message.";
317        GOOGLE_CHECK(!parent_field->is_repeated())
318            << parent_field->full_name() << " cannot be a repeated field.";
319      }
320    }
321  }
322  GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
323      << "Cannot treat this repeated field as both Map and Set for "
324      << "comparison.";
325  MapKeyComparator* key_comparator =
326      new MultipleFieldsMapKeyComparator(this, key_field_paths);
327  owned_key_comparators_.push_back(key_comparator);
328  map_field_key_comparator_[field] = key_comparator;
329}
330
331void MessageDifferencer::TreatAsMapUsingKeyComparator(
332    const FieldDescriptor* field,
333    const MapKeyComparator* key_comparator) {
334  GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
335                               << field->full_name();
336  GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
337      << "Field has to be message type.  Field name is: "
338      << field->full_name();
339  GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
340      << "Cannot treat this repeated field as both Map and Set for "
341      << "comparison.";
342  map_field_key_comparator_[field] = key_comparator;
343}
344
345void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
346  ignore_criteria_.push_back(ignore_criteria);
347}
348
349void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
350  ignored_fields_.insert(field);
351}
352
353void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
354                                              double fraction, double margin) {
355  default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
356}
357
358void MessageDifferencer::ReportDifferencesToString(string* output) {
359  GOOGLE_DCHECK(output) << "Specified output string was NULL";
360
361  output_string_ = output;
362  output_string_->clear();
363}
364
365void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
366  // If an output string is set, clear it to prevent
367  // it superceding the specified reporter.
368  if (output_string_) {
369    output_string_ = NULL;
370  }
371
372  reporter_ = reporter;
373}
374
375bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
376                                     const FieldDescriptor* field2) {
377  // Handle sentinel values (i.e. make sure NULLs are always ordered
378  // at the end of the list).
379  if (field1 == NULL) {
380    return false;
381  }
382
383  if (field2 == NULL) {
384    return true;
385  }
386
387  // Always order fields by their tag number
388  return (field1->number() < field2->number());
389}
390
391bool MessageDifferencer::Compare(const Message& message1,
392                                 const Message& message2) {
393  vector<SpecificField> parent_fields;
394
395  bool result = false;
396
397  // Setup the internal reporter if need be.
398  if (output_string_) {
399    io::StringOutputStream output_stream(output_string_);
400    StreamReporter reporter(&output_stream);
401    reporter_ = &reporter;
402    result = Compare(message1, message2, &parent_fields);
403    reporter_ = NULL;
404  } else {
405    result = Compare(message1, message2, &parent_fields);
406  }
407
408  return result;
409}
410
411bool MessageDifferencer::CompareWithFields(
412    const Message& message1,
413    const Message& message2,
414    const vector<const FieldDescriptor*>& message1_fields_arg,
415    const vector<const FieldDescriptor*>& message2_fields_arg) {
416  if (message1.GetDescriptor() != message2.GetDescriptor()) {
417    GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
418                << "descriptors.";
419    return false;
420  }
421
422  vector<SpecificField> parent_fields;
423
424  bool result = false;
425
426  vector<const FieldDescriptor*> message1_fields(message1_fields_arg);
427  vector<const FieldDescriptor*> message2_fields(message2_fields_arg);
428
429  std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
430  std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
431  // Append NULL sentinel values.
432  message1_fields.push_back(NULL);
433  message2_fields.push_back(NULL);
434
435  // Setup the internal reporter if need be.
436  if (output_string_) {
437    io::StringOutputStream output_stream(output_string_);
438    StreamReporter reporter(&output_stream);
439    reporter_ = &reporter;
440    result = CompareRequestedFieldsUsingSettings(
441        message1, message2, message1_fields, message2_fields, &parent_fields);
442    reporter_ = NULL;
443  } else {
444    result = CompareRequestedFieldsUsingSettings(
445        message1, message2, message1_fields, message2_fields, &parent_fields);
446  }
447
448  return result;
449}
450
451bool MessageDifferencer::Compare(
452    const Message& message1,
453    const Message& message2,
454    vector<SpecificField>* parent_fields) {
455  const Descriptor* descriptor1 = message1.GetDescriptor();
456  const Descriptor* descriptor2 = message2.GetDescriptor();
457  if (descriptor1 != descriptor2) {
458    GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
459                << "descriptors. "
460                << descriptor1->full_name() << " vs "
461                << descriptor2->full_name();
462    return false;
463  }
464  // Expand google.protobuf.Any payload if possible.
465  if (descriptor1->full_name() == internal::kAnyFullTypeName) {
466    google::protobuf::scoped_ptr<Message> data1;
467    google::protobuf::scoped_ptr<Message> data2;
468    if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
469      return Compare(*data1, *data2, parent_fields);
470    }
471  }
472  const Reflection* reflection1 = message1.GetReflection();
473  const Reflection* reflection2 = message2.GetReflection();
474
475  // Retrieve all the set fields, including extensions.
476  vector<const FieldDescriptor*> message1_fields;
477  vector<const FieldDescriptor*> message2_fields;
478
479  reflection1->ListFields(message1, &message1_fields);
480  reflection2->ListFields(message2, &message2_fields);
481
482  // Add sentinel values to deal with the
483  // case where the number of the fields in
484  // each list are different.
485  message1_fields.push_back(NULL);
486  message2_fields.push_back(NULL);
487
488  bool unknown_compare_result = true;
489  // Ignore unknown fields in EQUIVALENT mode
490  if (message_field_comparison_ != EQUIVALENT) {
491    const google::protobuf::UnknownFieldSet* unknown_field_set1 =
492        &reflection1->GetUnknownFields(message1);
493    const google::protobuf::UnknownFieldSet* unknown_field_set2 =
494        &reflection2->GetUnknownFields(message2);
495    if (!CompareUnknownFields(message1, message2,
496                              *unknown_field_set1, *unknown_field_set2,
497                              parent_fields)) {
498      if (reporter_ == NULL) {
499        return false;
500      };
501      unknown_compare_result = false;
502    }
503  }
504
505  return CompareRequestedFieldsUsingSettings(
506      message1, message2,
507      message1_fields, message2_fields,
508      parent_fields) && unknown_compare_result;
509}
510
511bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
512    const Message& message1,
513    const Message& message2,
514    const vector<const FieldDescriptor*>& message1_fields,
515    const vector<const FieldDescriptor*>& message2_fields,
516    vector<SpecificField>* parent_fields) {
517  if (scope_ == FULL) {
518    if (message_field_comparison_ == EQUIVALENT) {
519      // We need to merge the field lists of both messages (i.e.
520      // we are merely checking for a difference in field values,
521      // rather than the addition or deletion of fields).
522      vector<const FieldDescriptor*> fields_union;
523      CombineFields(message1_fields, FULL, message2_fields, FULL,
524                    &fields_union);
525      return CompareWithFieldsInternal(message1, message2, fields_union,
526                                       fields_union, parent_fields);
527    } else {
528      // Simple equality comparison, use the unaltered field lists.
529      return CompareWithFieldsInternal(message1, message2, message1_fields,
530                                       message2_fields, parent_fields);
531    }
532  } else {
533    if (message_field_comparison_ == EQUIVALENT) {
534      // We use the list of fields for message1 for both messages when
535      // comparing.  This way, extra fields in message2 are ignored,
536      // and missing fields in message2 use their default value.
537      return CompareWithFieldsInternal(message1, message2, message1_fields,
538                                       message1_fields, parent_fields);
539    } else {
540      // We need to consider the full list of fields for message1
541      // but only the intersection for message2.  This way, any fields
542      // only present in message2 will be ignored, but any fields only
543      // present in message1 will be marked as a difference.
544      vector<const FieldDescriptor*> fields_intersection;
545      CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL,
546                    &fields_intersection);
547      return CompareWithFieldsInternal(message1, message2, message1_fields,
548                                       fields_intersection, parent_fields);
549    }
550  }
551}
552
553void MessageDifferencer::CombineFields(
554    const vector<const FieldDescriptor*>& fields1,
555    Scope fields1_scope,
556    const vector<const FieldDescriptor*>& fields2,
557    Scope fields2_scope,
558    vector<const FieldDescriptor*>* combined_fields) {
559
560  int index1 = 0;
561  int index2 = 0;
562
563  while (index1 < fields1.size() && index2 < fields2.size()) {
564    const FieldDescriptor* field1 = fields1[index1];
565    const FieldDescriptor* field2 = fields2[index2];
566
567    if (FieldBefore(field1, field2)) {
568      if (fields1_scope == FULL) {
569        combined_fields->push_back(fields1[index1]);
570      }
571      ++index1;
572    } else if (FieldBefore(field2, field1)) {
573      if (fields2_scope == FULL) {
574        combined_fields->push_back(fields2[index2]);
575      }
576      ++index2;
577    } else {
578      combined_fields->push_back(fields1[index1]);
579      ++index1;
580      ++index2;
581    }
582  }
583}
584
585bool MessageDifferencer::CompareWithFieldsInternal(
586    const Message& message1,
587    const Message& message2,
588    const vector<const FieldDescriptor*>& message1_fields,
589    const vector<const FieldDescriptor*>& message2_fields,
590    vector<SpecificField>* parent_fields) {
591  bool isDifferent = false;
592  int field_index1 = 0;
593  int field_index2 = 0;
594
595  const Reflection* reflection1 = message1.GetReflection();
596  const Reflection* reflection2 = message2.GetReflection();
597
598  while (true) {
599    const FieldDescriptor* field1 = message1_fields[field_index1];
600    const FieldDescriptor* field2 = message2_fields[field_index2];
601
602    // Once we have reached sentinel values, we are done the comparison.
603    if (field1 == NULL && field2 == NULL) {
604      break;
605    }
606
607    // Check for differences in the field itself.
608    if (FieldBefore(field1, field2)) {
609      // Field 1 is not in the field list for message 2.
610      if (IsIgnored(message1, message2, field1, *parent_fields)) {
611        // We are ignoring field1. Report the ignore and move on to
612        // the next field in message1_fields.
613        if (reporter_ != NULL) {
614          SpecificField specific_field;
615          specific_field.field = field1;
616
617          parent_fields->push_back(specific_field);
618          reporter_->ReportIgnored(message1, message2, *parent_fields);
619          parent_fields->pop_back();
620        }
621        ++field_index1;
622        continue;
623      }
624
625      if (reporter_ != NULL) {
626        int count = field1->is_repeated() ?
627            reflection1->FieldSize(message1, field1) : 1;
628
629        for (int i = 0; i < count; ++i) {
630          SpecificField specific_field;
631          specific_field.field = field1;
632          specific_field.index = field1->is_repeated() ? i : -1;
633
634          parent_fields->push_back(specific_field);
635          reporter_->ReportDeleted(message1, message2, *parent_fields);
636          parent_fields->pop_back();
637        }
638
639        isDifferent = true;
640      } else {
641        return false;
642      }
643
644      ++field_index1;
645      continue;
646    } else if (FieldBefore(field2, field1)) {
647      // Field 2 is not in the field list for message 1.
648      if (IsIgnored(message1, message2, field2, *parent_fields)) {
649        // We are ignoring field2. Report the ignore and move on to
650        // the next field in message2_fields.
651        if (reporter_ != NULL) {
652          SpecificField specific_field;
653          specific_field.field = field2;
654
655          parent_fields->push_back(specific_field);
656          reporter_->ReportIgnored(message1, message2, *parent_fields);
657          parent_fields->pop_back();
658        }
659        ++field_index2;
660        continue;
661      }
662
663      if (reporter_ != NULL) {
664        int count = field2->is_repeated() ?
665            reflection2->FieldSize(message2, field2) : 1;
666
667        for (int i = 0; i < count; ++i) {
668          SpecificField specific_field;
669          specific_field.field = field2;
670          specific_field.index = field2->is_repeated() ? i : -1;
671          specific_field.new_index = specific_field.index;
672
673          parent_fields->push_back(specific_field);
674          reporter_->ReportAdded(message1, message2, *parent_fields);
675          parent_fields->pop_back();
676        }
677
678        isDifferent = true;
679      } else {
680        return false;
681      }
682
683      ++field_index2;
684      continue;
685    }
686
687    // By this point, field1 and field2 are guarenteed to point to the same
688    // field, so we can now compare the values.
689    if (IsIgnored(message1, message2, field1, *parent_fields)) {
690      // Ignore this field. Report and move on.
691      if (reporter_ != NULL) {
692        SpecificField specific_field;
693        specific_field.field = field1;
694
695        parent_fields->push_back(specific_field);
696        reporter_->ReportIgnored(message1, message2, *parent_fields);
697        parent_fields->pop_back();
698      }
699
700      ++field_index1;
701      ++field_index2;
702      continue;
703    }
704
705    bool fieldDifferent = false;
706    if (field1->is_repeated()) {
707      fieldDifferent = !CompareRepeatedField(message1, message2, field1,
708                                             parent_fields);
709      if (fieldDifferent) {
710        if (reporter_ == NULL) return false;
711        isDifferent = true;
712      }
713    } else {
714      fieldDifferent = !CompareFieldValueUsingParentFields(
715          message1, message2, field1, -1, -1, parent_fields);
716
717      // If we have found differences, either report them or terminate if
718      // no reporter is present.
719      if (fieldDifferent && reporter_ == NULL) {
720        return false;
721      }
722
723      if (reporter_ != NULL) {
724        SpecificField specific_field;
725        specific_field.field = field1;
726        parent_fields->push_back(specific_field);
727        if (fieldDifferent) {
728          reporter_->ReportModified(message1, message2, *parent_fields);
729          isDifferent = true;
730        } else if (report_matches_) {
731          reporter_->ReportMatched(message1, message2, *parent_fields);
732        }
733        parent_fields->pop_back();
734      }
735    }
736    // Increment the field indicies.
737    ++field_index1;
738    ++field_index2;
739  }
740
741  return !isDifferent;
742}
743
744bool MessageDifferencer::IsMatch(const FieldDescriptor* repeated_field,
745                                 const MapKeyComparator* key_comparator,
746                                 const Message* message1,
747                                 const Message* message2,
748                                 const vector<SpecificField>& parent_fields,
749                                 int index1, int index2) {
750  vector<SpecificField> current_parent_fields(parent_fields);
751  if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
752    return CompareFieldValueUsingParentFields(
753        *message1, *message2, repeated_field, index1, index2,
754        &current_parent_fields);
755  }
756  // Back up the Reporter and output_string_.  They will be reset in the
757  // following code.
758  Reporter* backup_reporter = reporter_;
759  string* output_string = output_string_;
760  reporter_ = NULL;
761  output_string_ = NULL;
762  bool match;
763
764  if (key_comparator == NULL) {
765    match = CompareFieldValueUsingParentFields(
766        *message1, *message2, repeated_field, index1, index2,
767        &current_parent_fields);
768  } else {
769    const Reflection* reflection1 = message1->GetReflection();
770    const Reflection* reflection2 = message2->GetReflection();
771    const Message& m1 =
772        reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
773    const Message& m2 =
774        reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
775    SpecificField specific_field;
776    specific_field.field = repeated_field;
777    current_parent_fields.push_back(specific_field);
778    match = key_comparator->IsMatch(m1, m2, current_parent_fields);
779  }
780
781  reporter_ = backup_reporter;
782  output_string_ = output_string;
783  return match;
784}
785
786bool MessageDifferencer::CompareRepeatedField(
787    const Message& message1,
788    const Message& message2,
789    const FieldDescriptor* repeated_field,
790    vector<SpecificField>* parent_fields) {
791  // the input FieldDescriptor is guaranteed to be repeated field.
792  const Reflection* reflection1 = message1.GetReflection();
793  const Reflection* reflection2 = message2.GetReflection();
794  const int count1 = reflection1->FieldSize(message1, repeated_field);
795  const int count2 = reflection2->FieldSize(message2, repeated_field);
796  const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
797
798  // If the field is not treated as subset and no detailed reports is needed,
799  // we do a quick check on the number of the elements to avoid unnecessary
800  // comparison.
801  if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
802    return false;
803  }
804  // A match can never be found if message1 has more items than message2.
805  if (count1 > count2 && reporter_ == NULL) {
806    return false;
807  }
808
809  // These two list are used for store the index of the correspondent
810  // element in peer repeated field.
811  vector<int> match_list1;
812  vector<int> match_list2;
813
814  // Try to match indices of the repeated fields. Return false if match fails
815  // and there's no detailed report needed.
816  if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
817                                 *parent_fields, &match_list1, &match_list2) &&
818      reporter_ == NULL) {
819    return false;
820  }
821
822  bool fieldDifferent = false;
823  SpecificField specific_field;
824  specific_field.field = repeated_field;
825
826  // At this point, we have already matched pairs of fields (with the reporting
827  // to be done later). Now to check if the paired elements are different.
828  for (int i = 0; i < count1; i++) {
829    if (match_list1[i] == -1) continue;
830    specific_field.index = i;
831    specific_field.new_index = match_list1[i];
832
833    const bool result = CompareFieldValueUsingParentFields(
834        message1, message2, repeated_field, i, specific_field.new_index,
835        parent_fields);
836
837    // If we have found differences, either report them or terminate if
838    // no reporter is present. Note that ReportModified, ReportMoved, and
839    // ReportMatched are all mutually exclusive.
840    if (!result) {
841      if (reporter_ == NULL) return false;
842      parent_fields->push_back(specific_field);
843      reporter_->ReportModified(message1, message2, *parent_fields);
844      parent_fields->pop_back();
845      fieldDifferent = true;
846    } else if (reporter_ != NULL &&
847               specific_field.index != specific_field.new_index) {
848      parent_fields->push_back(specific_field);
849      reporter_->ReportMoved(message1, message2, *parent_fields);
850      parent_fields->pop_back();
851    } else if (report_matches_ && reporter_ != NULL) {
852      parent_fields->push_back(specific_field);
853      reporter_->ReportMatched(message1, message2, *parent_fields);
854      parent_fields->pop_back();
855    }
856  }
857
858  // Report any remaining additions or deletions.
859  for (int i = 0; i < count2; ++i) {
860    if (match_list2[i] != -1) continue;
861    if (!treated_as_subset) {
862      fieldDifferent = true;
863    }
864
865    if (reporter_ == NULL) continue;
866    specific_field.index = i;
867    specific_field.new_index = i;
868    parent_fields->push_back(specific_field);
869    reporter_->ReportAdded(message1, message2, *parent_fields);
870    parent_fields->pop_back();
871  }
872
873  for (int i = 0; i < count1; ++i) {
874    if (match_list1[i] != -1) continue;
875    specific_field.index = i;
876    parent_fields->push_back(specific_field);
877    reporter_->ReportDeleted(message1, message2, *parent_fields);
878    parent_fields->pop_back();
879    fieldDifferent = true;
880  }
881  return !fieldDifferent;
882}
883
884bool MessageDifferencer::CompareFieldValue(const Message& message1,
885                                           const Message& message2,
886                                           const FieldDescriptor* field,
887                                           int index1,
888                                           int index2) {
889  return CompareFieldValueUsingParentFields(message1, message2, field, index1,
890                                            index2, NULL);
891}
892
893bool MessageDifferencer::CompareFieldValueUsingParentFields(
894    const Message& message1, const Message& message2,
895    const FieldDescriptor* field, int index1, int index2,
896    vector<SpecificField>* parent_fields) {
897  FieldContext field_context(parent_fields);
898  FieldComparator::ComparisonResult result = GetFieldComparisonResult(
899      message1, message2, field, index1, index2, &field_context);
900
901  if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
902      result == FieldComparator::RECURSE) {
903    // Get the nested messages and compare them using one of the Compare
904    // methods.
905    const Reflection* reflection1 = message1.GetReflection();
906    const Reflection* reflection2 = message2.GetReflection();
907    const Message& m1 = field->is_repeated() ?
908        reflection1->GetRepeatedMessage(message1, field, index1) :
909        reflection1->GetMessage(message1, field);
910    const Message& m2 = field->is_repeated() ?
911        reflection2->GetRepeatedMessage(message2, field, index2) :
912        reflection2->GetMessage(message2, field);
913
914    // parent_fields is used in calls to Reporter methods.
915    if (parent_fields != NULL) {
916      // Append currently compared field to the end of parent_fields.
917      SpecificField specific_field;
918      specific_field.field = field;
919      specific_field.index = index1;
920      specific_field.new_index = index2;
921      parent_fields->push_back(specific_field);
922      const bool compare_result = Compare(m1, m2, parent_fields);
923      parent_fields->pop_back();
924      return compare_result;
925    } else {
926      // Recreates parent_fields as if m1 and m2 had no parents.
927      return Compare(m1, m2);
928    }
929  } else {
930    return (result == FieldComparator::SAME);
931  }
932}
933
934bool MessageDifferencer::CheckPathChanged(
935    const vector<SpecificField>& field_path) {
936  for (int i = 0; i < field_path.size(); ++i) {
937    if (field_path[i].index != field_path[i].new_index) return true;
938  }
939  return false;
940}
941
942bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
943  if (!field->is_repeated()) return false;
944  if (field->is_map()) return true;
945  if (repeated_field_comparison_ == AS_SET)
946    return list_fields_.find(field) == list_fields_.end();
947  return (set_fields_.find(field) != set_fields_.end());
948}
949
950bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
951  return scope_ == PARTIAL &&
952      (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
953}
954
955bool MessageDifferencer::IsIgnored(
956    const Message& message1,
957    const Message& message2,
958    const FieldDescriptor* field,
959    const vector<SpecificField>& parent_fields) {
960  if (ignored_fields_.find(field) != ignored_fields_.end()) {
961    return true;
962  }
963  for (int i = 0; i < ignore_criteria_.size(); ++i) {
964    if (ignore_criteria_[i]->IsIgnored(message1, message2, field,
965                                       parent_fields)) {
966      return true;
967    }
968  }
969  return false;
970}
971
972bool MessageDifferencer::IsUnknownFieldIgnored(
973    const Message& message1, const Message& message2,
974    const SpecificField& field, const vector<SpecificField>& parent_fields) {
975  for (int i = 0; i < ignore_criteria_.size(); ++i) {
976    if (ignore_criteria_[i]->IsUnknownFieldIgnored(message1, message2, field,
977                                                   parent_fields)) {
978      return true;
979    }
980  }
981  return false;
982}
983
984const MessageDifferencer::MapKeyComparator* MessageDifferencer
985    ::GetMapKeyComparator(const FieldDescriptor* field) {
986  if (!field->is_repeated()) return NULL;
987  if (map_field_key_comparator_.find(field) !=
988      map_field_key_comparator_.end()) {
989    return map_field_key_comparator_[field];
990  }
991  return NULL;
992}
993
994namespace {
995
996typedef pair<int, const UnknownField*> IndexUnknownFieldPair;
997
998struct UnknownFieldOrdering {
999  inline bool operator()(const IndexUnknownFieldPair& a,
1000                         const IndexUnknownFieldPair& b) const {
1001    if (a.second->number() < b.second->number()) return true;
1002    if (a.second->number() > b.second->number()) return false;
1003    return a.second->type() < b.second->type();
1004  }
1005};
1006
1007}  // namespace
1008
1009bool MessageDifferencer::UnpackAny(const Message& any,
1010                                   google::protobuf::scoped_ptr<Message>* data) {
1011  const Reflection* reflection = any.GetReflection();
1012  const FieldDescriptor* type_url_field;
1013  const FieldDescriptor* value_field;
1014  if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
1015    return false;
1016  }
1017  const string& type_url = reflection->GetString(any, type_url_field);
1018  string full_type_name;
1019  if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1020    return false;
1021  }
1022
1023  const google::protobuf::Descriptor* desc =
1024      any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1025          full_type_name);
1026  if (desc == NULL) {
1027    GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
1028    return false;
1029  }
1030
1031  if (dynamic_message_factory_ == NULL) {
1032    dynamic_message_factory_.reset(new DynamicMessageFactory());
1033  }
1034  data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
1035  string serialized_value = reflection->GetString(any, value_field);
1036  if (!(*data)->ParseFromString(serialized_value)) {
1037    GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
1038    return false;
1039  }
1040  return true;
1041}
1042
1043bool MessageDifferencer::CompareUnknownFields(
1044    const Message& message1, const Message& message2,
1045    const google::protobuf::UnknownFieldSet& unknown_field_set1,
1046    const google::protobuf::UnknownFieldSet& unknown_field_set2,
1047    vector<SpecificField>* parent_field) {
1048  // Ignore unknown fields in EQUIVALENT mode.
1049  if (message_field_comparison_ == EQUIVALENT) return true;
1050
1051  if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
1052    return true;
1053  }
1054
1055  bool is_different = false;
1056
1057  // We first sort the unknown fields by field number and type (in other words,
1058  // in tag order), making sure to preserve ordering of values with the same
1059  // tag.  This allows us to report only meaningful differences between the
1060  // two sets -- that is, differing values for the same tag.  We use
1061  // IndexUnknownFieldPairs to keep track of the field's original index for
1062  // reporting purposes.
1063  vector<IndexUnknownFieldPair> fields1;  // unknown_field_set1, sorted
1064  vector<IndexUnknownFieldPair> fields2;  // unknown_field_set2, sorted
1065  fields1.reserve(unknown_field_set1.field_count());
1066  fields2.reserve(unknown_field_set2.field_count());
1067
1068  for (int i = 0; i < unknown_field_set1.field_count(); i++) {
1069    fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
1070  }
1071  for (int i = 0; i < unknown_field_set2.field_count(); i++) {
1072    fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
1073  }
1074
1075  UnknownFieldOrdering is_before;
1076  std::stable_sort(fields1.begin(), fields1.end(), is_before);
1077  std::stable_sort(fields2.begin(), fields2.end(), is_before);
1078
1079  // In order to fill in SpecificField::index, we have to keep track of how
1080  // many values we've seen with the same field number and type.
1081  // current_repeated points at the first field in this range, and
1082  // current_repeated_start{1,2} are the indexes of the first field in the
1083  // range within fields1 and fields2.
1084  const UnknownField* current_repeated = NULL;
1085  int current_repeated_start1 = 0;
1086  int current_repeated_start2 = 0;
1087
1088  // Now that we have two sorted lists, we can detect fields which appear only
1089  // in one list or the other by traversing them simultaneously.
1090  int index1 = 0;
1091  int index2 = 0;
1092  while (index1 < fields1.size() || index2 < fields2.size()) {
1093    enum { ADDITION, DELETION, MODIFICATION, COMPARE_GROUPS,
1094      NO_CHANGE } change_type;
1095
1096    // focus_field is the field we're currently reporting on.  (In the case
1097    // of a modification, it's the field on the left side.)
1098    const UnknownField* focus_field;
1099    bool match = false;
1100
1101    if (index2 == fields2.size() ||
1102        (index1 < fields1.size() &&
1103          is_before(fields1[index1], fields2[index2]))) {
1104      // fields1[index1] is not present in fields2.
1105      change_type = DELETION;
1106      focus_field = fields1[index1].second;
1107    } else if (index1 == fields1.size() ||
1108               is_before(fields2[index2], fields1[index1])) {
1109      // fields2[index2] is not present in fields1.
1110      if (scope_ == PARTIAL) {
1111        // Ignore.
1112        ++index2;
1113        continue;
1114      }
1115      change_type = ADDITION;
1116      focus_field = fields2[index2].second;
1117    } else {
1118      // Field type and number are the same.  See if the values differ.
1119      change_type = MODIFICATION;
1120      focus_field = fields1[index1].second;
1121
1122      switch (focus_field->type()) {
1123        case UnknownField::TYPE_VARINT:
1124          match = fields1[index1].second->varint() ==
1125                  fields2[index2].second->varint();
1126          break;
1127        case UnknownField::TYPE_FIXED32:
1128          match = fields1[index1].second->fixed32() ==
1129                  fields2[index2].second->fixed32();
1130          break;
1131        case UnknownField::TYPE_FIXED64:
1132          match = fields1[index1].second->fixed64() ==
1133                  fields2[index2].second->fixed64();
1134          break;
1135        case UnknownField::TYPE_LENGTH_DELIMITED:
1136          match = fields1[index1].second->length_delimited() ==
1137                  fields2[index2].second->length_delimited();
1138          break;
1139        case UnknownField::TYPE_GROUP:
1140          // We must deal with this later, after building the SpecificField.
1141          change_type = COMPARE_GROUPS;
1142          break;
1143      }
1144      if (match && change_type != COMPARE_GROUPS) {
1145        change_type = NO_CHANGE;
1146      }
1147    }
1148
1149    if (current_repeated == NULL ||
1150        focus_field->number() != current_repeated->number() ||
1151        focus_field->type() != current_repeated->type()) {
1152      // We've started a new repeated field.
1153      current_repeated = focus_field;
1154      current_repeated_start1 = index1;
1155      current_repeated_start2 = index2;
1156    }
1157
1158    if (change_type == NO_CHANGE && reporter_ == NULL) {
1159      // Fields were already compared and matched and we have no reporter.
1160      ++index1;
1161      ++index2;
1162      continue;
1163    }
1164
1165    // Build the SpecificField.  This is slightly complicated.
1166    SpecificField specific_field;
1167    specific_field.unknown_field_number = focus_field->number();
1168    specific_field.unknown_field_type = focus_field->type();
1169
1170    specific_field.unknown_field_set1 = &unknown_field_set1;
1171    specific_field.unknown_field_set2 = &unknown_field_set2;
1172
1173    if (change_type != ADDITION) {
1174      specific_field.unknown_field_index1 = fields1[index1].first;
1175    }
1176    if (change_type != DELETION) {
1177      specific_field.unknown_field_index2 = fields2[index2].first;
1178    }
1179
1180    // Calculate the field index.
1181    if (change_type == ADDITION) {
1182      specific_field.index = index2 - current_repeated_start2;
1183      specific_field.new_index = index2 - current_repeated_start2;
1184    } else {
1185      specific_field.index = index1 - current_repeated_start1;
1186      specific_field.new_index = index2 - current_repeated_start2;
1187    }
1188
1189    if (IsUnknownFieldIgnored(message1, message2, specific_field,
1190                              *parent_field)) {
1191      if (reporter_ != NULL) {
1192        parent_field->push_back(specific_field);
1193        reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
1194        parent_field->pop_back();
1195      }
1196      return true;
1197    }
1198
1199    if (change_type == ADDITION || change_type == DELETION ||
1200        change_type == MODIFICATION) {
1201      if (reporter_ == NULL) {
1202        // We found a difference and we have no reproter.
1203        return false;
1204      }
1205      is_different = true;
1206    }
1207
1208    parent_field->push_back(specific_field);
1209
1210    switch (change_type) {
1211      case ADDITION:
1212        reporter_->ReportAdded(message1, message2, *parent_field);
1213        ++index2;
1214        break;
1215      case DELETION:
1216        reporter_->ReportDeleted(message1, message2, *parent_field);
1217        ++index1;
1218        break;
1219      case MODIFICATION:
1220        reporter_->ReportModified(message1, message2, *parent_field);
1221        ++index1;
1222        ++index2;
1223        break;
1224      case COMPARE_GROUPS:
1225        if (!CompareUnknownFields(message1, message2,
1226                                  fields1[index1].second->group(),
1227                                  fields2[index2].second->group(),
1228                                  parent_field)) {
1229          if (reporter_ == NULL) return false;
1230          is_different = true;
1231          reporter_->ReportModified(message1, message2, *parent_field);
1232        }
1233        ++index1;
1234        ++index2;
1235        break;
1236      case NO_CHANGE:
1237        ++index1;
1238        ++index2;
1239        if (report_matches_) {
1240          reporter_->ReportMatched(message1, message2, *parent_field);
1241        }
1242    }
1243
1244    parent_field->pop_back();
1245  }
1246
1247  return !is_different;
1248}
1249
1250namespace {
1251
1252// Find maximum bipartite matching using the argumenting path algorithm.
1253class MaximumMatcher {
1254 public:
1255  typedef ResultCallback2<bool, int, int> NodeMatchCallback;
1256  // MaximumMatcher takes ownership of the passed in callback and uses it to
1257  // determine whether a node on the left side of the bipartial graph matches
1258  // a node on the right side. count1 is the number of nodes on the left side
1259  // of the graph and count2 to is the number of nodes on the right side.
1260  // Every node is referred to using 0-based indices.
1261  // If a maximum match is found, the result will be stored in match_list1 and
1262  // match_list2. match_list1[i] == j means the i-th node on the left side is
1263  // matched to the j-th node on the right side and match_list2[x] == y means
1264  // the x-th node on the right side is matched to y-th node on the left side.
1265  // match_list1[i] == -1 means the node is not matched. Same with match_list2.
1266  MaximumMatcher(int count1, int count2, NodeMatchCallback* callback,
1267                 vector<int>* match_list1, vector<int>* match_list2);
1268  // Find a maximum match and return the number of matched node pairs.
1269  // If early_return is true, this method will return 0 immediately when it
1270  // finds that not all nodes on the left side can be matched.
1271  int FindMaximumMatch(bool early_return);
1272 private:
1273  // Determines whether the node on the left side of the bipartial graph
1274  // matches the one on the right side.
1275  bool Match(int left, int right);
1276  // Find an argumenting path starting from the node v on the left side. If a
1277  // path can be found, update match_list2_ to reflect the path and return
1278  // true.
1279  bool FindArgumentPathDFS(int v, vector<bool>* visited);
1280
1281  int count1_;
1282  int count2_;
1283  google::protobuf::scoped_ptr<NodeMatchCallback> match_callback_;
1284  map<pair<int, int>, bool> cached_match_results_;
1285  vector<int>* match_list1_;
1286  vector<int>* match_list2_;
1287  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
1288};
1289
1290MaximumMatcher::MaximumMatcher(int count1, int count2,
1291                               NodeMatchCallback* callback,
1292                               vector<int>* match_list1,
1293                               vector<int>* match_list2)
1294    : count1_(count1), count2_(count2), match_callback_(callback),
1295      match_list1_(match_list1), match_list2_(match_list2) {
1296  match_list1_->assign(count1, -1);
1297  match_list2_->assign(count2, -1);
1298}
1299
1300int MaximumMatcher::FindMaximumMatch(bool early_return) {
1301  int result = 0;
1302  for (int i = 0; i < count1_; ++i) {
1303    vector<bool> visited(count1_);
1304    if (FindArgumentPathDFS(i, &visited)) {
1305      ++result;
1306    } else if (early_return) {
1307      return 0;
1308    }
1309  }
1310  // Backfill match_list1_ as we only filled match_list2_ when finding
1311  // argumenting pathes.
1312  for (int i = 0; i < count2_; ++i) {
1313    if ((*match_list2_)[i] != -1) {
1314      (*match_list1_)[(*match_list2_)[i]] = i;
1315    }
1316  }
1317  return result;
1318}
1319
1320bool MaximumMatcher::Match(int left, int right) {
1321  pair<int, int> p(left, right);
1322  map<pair<int, int>, bool>::iterator it = cached_match_results_.find(p);
1323  if (it != cached_match_results_.end()) {
1324    return it->second;
1325  }
1326  cached_match_results_[p] = match_callback_->Run(left, right);
1327  return cached_match_results_[p];
1328}
1329
1330bool MaximumMatcher::FindArgumentPathDFS(int v, vector<bool>* visited) {
1331  (*visited)[v] = true;
1332  // We try to match those un-matched nodes on the right side first. This is
1333  // the step that the navie greedy matching algorithm uses. In the best cases
1334  // where the greedy algorithm can find a maximum matching, we will always
1335  // find a match in this step and the performance will be identical to the
1336  // greedy algorithm.
1337  for (int i = 0; i < count2_; ++i) {
1338    int matched = (*match_list2_)[i];
1339    if (matched == -1 && Match(v, i)) {
1340      (*match_list2_)[i] = v;
1341      return true;
1342    }
1343  }
1344  // Then we try those already matched nodes and see if we can find an
1345  // alternaive match for the node matched to them.
1346  // The greedy algorithm will stop before this and fail to produce the
1347  // correct result.
1348  for (int i = 0; i < count2_; ++i) {
1349    int matched = (*match_list2_)[i];
1350    if (matched != -1 && Match(v, i)) {
1351      if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
1352        (*match_list2_)[i] = v;
1353        return true;
1354      }
1355    }
1356  }
1357  return false;
1358}
1359
1360}  // namespace
1361
1362bool MessageDifferencer::MatchRepeatedFieldIndices(
1363    const Message& message1,
1364    const Message& message2,
1365    const FieldDescriptor* repeated_field,
1366    const vector<SpecificField>& parent_fields,
1367    vector<int>* match_list1,
1368    vector<int>* match_list2) {
1369  const int count1 =
1370      message1.GetReflection()->FieldSize(message1, repeated_field);
1371  const int count2 =
1372      message2.GetReflection()->FieldSize(message2, repeated_field);
1373  const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
1374
1375  match_list1->assign(count1, -1);
1376  match_list2->assign(count2, -1);
1377
1378  SpecificField specific_field;
1379  specific_field.field = repeated_field;
1380
1381  bool success = true;
1382  // Find potential match if this is a special repeated field.
1383  if (key_comparator != NULL || IsTreatedAsSet(repeated_field)) {
1384    if (scope_ == PARTIAL) {
1385      // When partial matching is enabled, Compare(a, b) && Compare(a, c)
1386      // doesn't neccessarily imply Compare(b, c). Therefore a naive greedy
1387      // algorithm will fail to find a maximum matching.
1388      // Here we use the argumenting path algorithm.
1389      MaximumMatcher::NodeMatchCallback* callback =
1390          ::google::protobuf::internal::NewPermanentCallback(
1391              this, &MessageDifferencer::IsMatch,
1392              repeated_field, key_comparator,
1393              &message1, &message2, parent_fields);
1394      MaximumMatcher matcher(count1, count2, callback, match_list1,
1395                             match_list2);
1396      // If diff info is not needed, we should end the matching process as
1397      // soon as possible if not all items can be matched.
1398      bool early_return = (reporter_ == NULL);
1399      int match_count = matcher.FindMaximumMatch(early_return);
1400      if (match_count != count1 && reporter_ == NULL) return false;
1401      success = success && (match_count == count1);
1402    } else {
1403      for (int i = 0; i < count1; ++i) {
1404        // Indicates any matched elements for this repeated field.
1405        bool match = false;
1406
1407        specific_field.index = i;
1408        specific_field.new_index = i;
1409
1410        for (int j = 0; j < count2; j++) {
1411          if (match_list2->at(j) != -1) continue;
1412          specific_field.index = i;
1413          specific_field.new_index = j;
1414
1415          match = IsMatch(repeated_field, key_comparator,
1416                          &message1, &message2, parent_fields, i, j);
1417
1418          if (match) {
1419            match_list1->at(specific_field.index) = specific_field.new_index;
1420            match_list2->at(specific_field.new_index) = specific_field.index;
1421            break;
1422          }
1423        }
1424        if (!match && reporter_ == NULL) return false;
1425        success = success && match;
1426      }
1427    }
1428  } else {
1429    // If this field should be treated as list, just label the match_list.
1430    for (int i = 0; i < count1 && i < count2; i++) {
1431      match_list1->at(i) = i;
1432      match_list2->at(i) = i;
1433    }
1434  }
1435
1436  return success;
1437}
1438
1439FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
1440    const Message& message1, const Message& message2,
1441    const FieldDescriptor* field, int index1, int index2,
1442    const FieldContext* field_context) {
1443  FieldComparator* comparator = field_comparator_ != NULL ?
1444      field_comparator_ : &default_field_comparator_;
1445  return comparator->Compare(message1, message2, field,
1446                             index1, index2, field_context);
1447}
1448
1449// ===========================================================================
1450
1451MessageDifferencer::Reporter::Reporter() { }
1452MessageDifferencer::Reporter::~Reporter() {}
1453
1454// ===========================================================================
1455
1456MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
1457MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
1458
1459// ===========================================================================
1460
1461MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
1462MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
1463
1464// ===========================================================================
1465
1466// Note that the printer's delimiter is not used, because if we are given a
1467// printer, we don't know its delimiter.
1468MessageDifferencer::StreamReporter::StreamReporter(
1469    io::ZeroCopyOutputStream* output) : printer_(new io::Printer(output, '$')),
1470                                        delete_printer_(true),
1471                                        report_modified_aggregates_(false) { }
1472
1473MessageDifferencer::StreamReporter::StreamReporter(
1474    io::Printer* printer) : printer_(printer),
1475                            delete_printer_(false),
1476                            report_modified_aggregates_(false) { }
1477
1478MessageDifferencer::StreamReporter::~StreamReporter() {
1479  if (delete_printer_) delete printer_;
1480}
1481
1482void MessageDifferencer::StreamReporter::PrintPath(
1483    const vector<SpecificField>& field_path, bool left_side) {
1484  for (int i = 0; i < field_path.size(); ++i) {
1485    if (i > 0) {
1486      printer_->Print(".");
1487    }
1488
1489    SpecificField specific_field = field_path[i];
1490
1491    if (specific_field.field != NULL) {
1492      if (specific_field.field->is_extension()) {
1493        printer_->Print("($name$)", "name",
1494                        specific_field.field->full_name());
1495      } else {
1496        printer_->PrintRaw(specific_field.field->name());
1497      }
1498    } else {
1499      printer_->PrintRaw(SimpleItoa(specific_field.unknown_field_number));
1500    }
1501    if (left_side && specific_field.index >= 0) {
1502      printer_->Print("[$name$]", "name", SimpleItoa(specific_field.index));
1503    }
1504    if (!left_side && specific_field.new_index >= 0) {
1505      printer_->Print("[$name$]", "name", SimpleItoa(specific_field.new_index));
1506    }
1507  }
1508}
1509
1510void MessageDifferencer::
1511StreamReporter::PrintValue(const Message& message,
1512                           const vector<SpecificField>& field_path,
1513                           bool left_side) {
1514  const SpecificField& specific_field = field_path.back();
1515  const FieldDescriptor* field = specific_field.field;
1516  if (field != NULL) {
1517    string output;
1518    int index = left_side ? specific_field.index : specific_field.new_index;
1519    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1520      const Reflection* reflection = message.GetReflection();
1521      const Message& field_message = field->is_repeated() ?
1522          reflection->GetRepeatedMessage(message, field, index) :
1523          reflection->GetMessage(message, field);
1524      output = field_message.ShortDebugString();
1525      if (output.empty()) {
1526        printer_->Print("{ }");
1527      } else {
1528        printer_->Print("{ $name$ }", "name", output);
1529      }
1530    } else {
1531      TextFormat::PrintFieldValueToString(message, field, index, &output);
1532      printer_->PrintRaw(output);
1533    }
1534  } else {
1535    const UnknownFieldSet* unknown_fields =
1536        (left_side ?
1537         specific_field.unknown_field_set1 :
1538         specific_field.unknown_field_set2);
1539    const UnknownField* unknown_field = &unknown_fields->field(
1540        left_side ?
1541        specific_field.unknown_field_index1 :
1542        specific_field.unknown_field_index2);
1543    PrintUnknownFieldValue(unknown_field);
1544  }
1545}
1546
1547void MessageDifferencer::
1548StreamReporter::PrintUnknownFieldValue(const UnknownField* unknown_field) {
1549  GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
1550
1551  string output;
1552  switch (unknown_field->type()) {
1553    case UnknownField::TYPE_VARINT:
1554      output = SimpleItoa(unknown_field->varint());
1555      break;
1556    case UnknownField::TYPE_FIXED32:
1557      output = StrCat("0x", strings::Hex(unknown_field->fixed32(),
1558                                         strings::ZERO_PAD_8));
1559      break;
1560    case UnknownField::TYPE_FIXED64:
1561      output = StrCat("0x", strings::Hex(unknown_field->fixed64(),
1562                                         strings::ZERO_PAD_16));
1563      break;
1564    case UnknownField::TYPE_LENGTH_DELIMITED:
1565      output = StringPrintf("\"%s\"",
1566          CEscape(unknown_field->length_delimited()).c_str());
1567      break;
1568    case UnknownField::TYPE_GROUP:
1569      // TODO(kenton):  Print the contents of the group like we do for
1570      //   messages.  Requires an equivalent of ShortDebugString() for
1571      //   UnknownFieldSet.
1572      output = "{ ... }";
1573      break;
1574  }
1575  printer_->PrintRaw(output);
1576}
1577
1578void MessageDifferencer::StreamReporter::Print(const string& str) {
1579  printer_->Print(str.c_str());
1580}
1581
1582void MessageDifferencer::StreamReporter::ReportAdded(
1583    const Message& message1,
1584    const Message& message2,
1585    const vector<SpecificField>& field_path) {
1586  printer_->Print("added: ");
1587  PrintPath(field_path, false);
1588  printer_->Print(": ");
1589  PrintValue(message2, field_path, false);
1590  printer_->Print("\n");  // Print for newlines.
1591}
1592
1593void MessageDifferencer::StreamReporter::ReportDeleted(
1594    const Message& message1,
1595    const Message& message2,
1596    const vector<SpecificField>& field_path) {
1597  printer_->Print("deleted: ");
1598  PrintPath(field_path, true);
1599  printer_->Print(": ");
1600  PrintValue(message1, field_path, true);
1601  printer_->Print("\n");  // Print for newlines
1602}
1603
1604void MessageDifferencer::StreamReporter::ReportModified(
1605    const Message& message1,
1606    const Message& message2,
1607    const vector<SpecificField>& field_path) {
1608  if (!report_modified_aggregates_ && field_path.back().field == NULL) {
1609    if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
1610      // Any changes to the subfields have already been printed.
1611      return;
1612    }
1613  } else if (!report_modified_aggregates_) {
1614    if (field_path.back().field->cpp_type() ==
1615        FieldDescriptor::CPPTYPE_MESSAGE) {
1616      // Any changes to the subfields have already been printed.
1617      return;
1618    }
1619  }
1620
1621  printer_->Print("modified: ");
1622  PrintPath(field_path, true);
1623  if (CheckPathChanged(field_path)) {
1624    printer_->Print(" -> ");
1625    PrintPath(field_path, false);
1626  }
1627  printer_->Print(": ");
1628  PrintValue(message1, field_path, true);
1629  printer_->Print(" -> ");
1630  PrintValue(message2, field_path, false);
1631  printer_->Print("\n");  // Print for newlines.
1632}
1633
1634void MessageDifferencer::StreamReporter::ReportMoved(
1635    const Message& message1,
1636    const Message& message2,
1637    const vector<SpecificField>& field_path) {
1638  printer_->Print("moved: ");
1639  PrintPath(field_path, true);
1640  printer_->Print(" -> ");
1641  PrintPath(field_path, false);
1642  printer_->Print(" : ");
1643  PrintValue(message1, field_path, true);
1644  printer_->Print("\n");  // Print for newlines.
1645}
1646
1647void MessageDifferencer::StreamReporter::ReportMatched(
1648    const Message& message1,
1649    const Message& message2,
1650    const vector<SpecificField>& field_path) {
1651  printer_->Print("matched: ");
1652  PrintPath(field_path, true);
1653  if (CheckPathChanged(field_path)) {
1654    printer_->Print(" -> ");
1655    PrintPath(field_path, false);
1656  }
1657  printer_->Print(" : ");
1658  PrintValue(message1, field_path, true);
1659  printer_->Print("\n");  // Print for newlines.
1660}
1661
1662void MessageDifferencer::StreamReporter::ReportIgnored(
1663    const Message& message1,
1664    const Message& message2,
1665    const vector<SpecificField>& field_path) {
1666  printer_->Print("ignored: ");
1667  PrintPath(field_path, true);
1668  if (CheckPathChanged(field_path)) {
1669    printer_->Print(" -> ");
1670    PrintPath(field_path, false);
1671  }
1672  printer_->Print("\n");  // Print for newlines.
1673}
1674
1675void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
1676    const Message& message1, const Message& message2,
1677    const vector<SpecificField>& field_path) {
1678  printer_->Print("ignored: ");
1679  PrintPath(field_path, true);
1680  if (CheckPathChanged(field_path)) {
1681    printer_->Print(" -> ");
1682    PrintPath(field_path, false);
1683  }
1684  printer_->Print("\n");  // Print for newlines.
1685}
1686
1687}  // namespace util
1688}  // namespace protobuf
1689}  // namespace google
1690