1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
6#define CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
7
8#include <map>
9#include <stdio.h>
10#include <string>
11#include <vector>
12
13namespace base {
14class FilePath;
15}
16
17namespace convert_dict {
18
19class AffReader {
20 public:
21  explicit AffReader(const base::FilePath& path);
22  ~AffReader();
23
24  bool Read();
25
26  // Returns whether this file uses indexed affixes, or, on false, whether the
27  // rule string will be specified literally in the .dic file. This must be
28  // called after Read().
29  bool has_indexed_affixes() const { return has_indexed_affixes_; }
30
31  // Returns a string representing the encoding of the dictionary. This will
32  // default to ISO-8859-1 if the .aff file does not specify it.
33  const char* encoding() const { return encoding_.c_str(); }
34
35  // Converts the given string from the file encoding to UTF-8, returning true
36  // on success.
37  bool EncodingToUTF8(const std::string& encoded, std::string* utf8) const;
38
39  // Adds a new affix string, returning the index. If it already exists, returns
40  // the index of the existing one. This is used to convert .dic files which
41  // list the
42  // You must not call this until after Read();
43  int GetAFIndexForAFString(const std::string& af_string);
44
45  // Getters for the computed data.
46  const std::string& comments() const { return intro_comment_; }
47  const std::vector<std::string>& affix_rules() const { return affix_rules_; }
48  const std::vector< std::pair<std::string, std::string> >&
49      replacements() const {
50    return replacements_;
51  }
52  const std::vector<std::string>& other_commands() const {
53    return other_commands_;
54  }
55
56  // Returns the affix groups ("AF" lines) for this file. The indices into this
57  // are 1-based, but we don't use the 0th item, so lookups will have to
58  // subtract one to get the index. This is how hunspell stores this data.
59  std::vector<std::string> GetAffixGroups() const;
60
61 private:
62  // Command-specific handlers. These are given the string folling the
63  // command. The input rule may be modified arbitrarily by the function.
64  int AddAffixGroup(std::string* rule);  // Returns the new affix group ID.
65  void AddAffix(std::string* rule);  // SFX/PFX
66  void AddReplacement(std::string* rule);
67  // void HandleFlag(std::string* rule);
68
69  // Used to handle "other" commands. The "raw" just saves the line as-is.
70  // The "encoded" version converts the line to UTF-8 and saves it.
71  void HandleRawCommand(const std::string& line);
72  void HandleEncodedCommand(const std::string& line);
73
74  FILE* file_;
75
76  // Comments from the beginning of the file. This is everything before the
77  // first command. We want to store this since it often contains the copyright
78  // information.
79  std::string intro_comment_;
80
81  // Encoding of the source words.
82  std::string encoding_;
83
84  // Affix rules. These are populated by "AF" commands. The .dic file can refer
85  // to these by index. They are indexed by their string value (the list of
86  // characters representing rules), and map to the numeric affix IDs.
87  //
88  // These can also be added using GetAFIndexForAFString.
89  std::map<std::string, int> affix_groups_;
90
91  // True when the affixes were specified in the .aff file using indices. The
92  // dictionary reader uses this to see how it should treat the stuff after the
93  // word on each line.
94  bool has_indexed_affixes_;
95
96  // SFX and PFX commands. This is a list of each of those lines in the order
97  // they appear in the file. They have been re-encoded.
98  std::vector<std::string> affix_rules_;
99
100  // Replacement commands. The first string is a possible input, and the second
101  // is the replacment.
102  std::vector< std::pair<std::string, std::string> > replacements_;
103
104  // All other commands.
105  std::vector<std::string> other_commands_;
106};
107
108}  // namespace convert_dict
109
110#endif  // CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
111