15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdio.h> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector> 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace base { 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class FilePath; 152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace convert_dict { 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AffReader { 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) explicit AffReader(const base::FilePath& path); 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~AffReader(); 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Read(); 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns whether this file uses indexed affixes, or, on false, whether the 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // rule string will be specified literally in the .dic file. This must be 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // called after Read(). 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool has_indexed_affixes() const { return has_indexed_affixes_; } 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns a string representing the encoding of the dictionary. This will 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // default to ISO-8859-1 if the .aff file does not specify it. 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* encoding() const { return encoding_.c_str(); } 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Converts the given string from the file encoding to UTF-8, returning true 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // on success. 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool EncodingToUTF8(const std::string& encoded, std::string* utf8) const; 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Adds a new affix string, returning the index. If it already exists, returns 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the index of the existing one. This is used to convert .dic files which 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // list the 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // You must not call this until after Read(); 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int GetAFIndexForAFString(const std::string& af_string); 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Getters for the computed data. 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& comments() const { return intro_comment_; } 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::vector<std::string>& affix_rules() const { return affix_rules_; } 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::vector< std::pair<std::string, std::string> >& 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements() const { 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return replacements_; 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::vector<std::string>& other_commands() const { 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return other_commands_; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the affix groups ("AF" lines) for this file. The indices into this 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // are 1-based, but we don't use the 0th item, so lookups will have to 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // subtract one to get the index. This is how hunspell stores this data. 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<std::string> GetAffixGroups() const; 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Command-specific handlers. These are given the string folling the 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // command. The input rule may be modified arbitrarily by the function. 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int AddAffixGroup(std::string* rule); // Returns the new affix group ID. 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void AddAffix(std::string* rule); // SFX/PFX 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void AddReplacement(std::string* rule); 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // void HandleFlag(std::string* rule); 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Used to handle "other" commands. The "raw" just saves the line as-is. 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The "encoded" version converts the line to UTF-8 and saves it. 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void HandleRawCommand(const std::string& line); 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void HandleEncodedCommand(const std::string& line); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FILE* file_; 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Comments from the beginning of the file. This is everything before the 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // first command. We want to store this since it often contains the copyright 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // information. 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string intro_comment_; 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Encoding of the source words. 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string encoding_; 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Affix rules. These are populated by "AF" commands. The .dic file can refer 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to these by index. They are indexed by their string value (the list of 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // characters representing rules), and map to the numeric affix IDs. 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // These can also be added using GetAFIndexForAFString. 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::map<std::string, int> affix_groups_; 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // True when the affixes were specified in the .aff file using indices. The 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // dictionary reader uses this to see how it should treat the stuff after the 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // word on each line. 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool has_indexed_affixes_; 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SFX and PFX commands. This is a list of each of those lines in the order 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // they appear in the file. They have been re-encoded. 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<std::string> affix_rules_; 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Replacement commands. The first string is a possible input, and the second 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // is the replacment. 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector< std::pair<std::string, std::string> > replacements_; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // All other commands. 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<std::string> other_commands_; 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace convert_dict 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__ 111