1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <fcntl.h>
6#include <fontconfig/fontconfig.h>
7#include <sys/stat.h>
8#include <sys/types.h>
9
10#include <string>
11
12#include "base/posix/eintr_wrapper.h"
13#include "ppapi/c/trusted/ppb_browser_font_trusted.h"
14#include "third_party/npapi/bindings/npapi_extensions.h"
15
16namespace {
17
18// MSCharSetToFontconfig translates a Microsoft charset identifier to a
19// fontconfig language set by appending to |langset|.
20// Returns true if |langset| is Latin/Greek/Cyrillic.
21bool MSCharSetToFontconfig(FcLangSet* langset, unsigned fdwCharSet) {
22  // We have need to translate raw fdwCharSet values into terms that
23  // fontconfig can understand. (See the description of fdwCharSet in the MSDN
24  // documentation for CreateFont:
25  // http://msdn.microsoft.com/en-us/library/dd183499(VS.85).aspx )
26  //
27  // Although the argument is /called/ 'charset', the actual values conflate
28  // character sets (which are sets of Unicode code points) and character
29  // encodings (which are algorithms for turning a series of bits into a
30  // series of code points.) Sometimes the values will name a language,
31  // sometimes they'll name an encoding. In the latter case I'm assuming that
32  // they mean the set of code points in the domain of that encoding.
33  //
34  // fontconfig deals with ISO 639-1 language codes:
35  //   http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
36  //
37  // So, for each of the documented fdwCharSet values I've had to take a
38  // guess at the set of ISO 639-1 languages intended.
39
40  bool is_lgc = false;
41  switch (fdwCharSet) {
42    case NPCharsetAnsi:
43    // These values I don't really know what to do with, so I'm going to map
44    // them to English also.
45    case NPCharsetDefault:
46    case NPCharsetMac:
47    case NPCharsetOEM:
48    case NPCharsetSymbol:
49      is_lgc = true;
50      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("en"));
51      break;
52    case NPCharsetBaltic:
53      // The three baltic languages.
54      is_lgc = true;
55      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("et"));
56      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("lv"));
57      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("lt"));
58      break;
59    case NPCharsetChineseBIG5:
60      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("zh-tw"));
61      break;
62    case NPCharsetGB2312:
63      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("zh-cn"));
64      break;
65    case NPCharsetEastEurope:
66      // A scattering of eastern European languages.
67      is_lgc = true;
68      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("pl"));
69      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("cs"));
70      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("sk"));
71      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("hu"));
72      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("hr"));
73      break;
74    case NPCharsetGreek:
75      is_lgc = true;
76      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("el"));
77      break;
78    case NPCharsetHangul:
79    case NPCharsetJohab:
80      // Korean
81      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("ko"));
82      break;
83    case NPCharsetRussian:
84      is_lgc = true;
85      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("ru"));
86      break;
87    case NPCharsetShiftJIS:
88      // Japanese
89      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("ja"));
90      break;
91    case NPCharsetTurkish:
92      is_lgc = true;
93      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("tr"));
94      break;
95    case NPCharsetVietnamese:
96      is_lgc = true;
97      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("vi"));
98      break;
99    case NPCharsetArabic:
100      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("ar"));
101      break;
102    case NPCharsetHebrew:
103      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("he"));
104      break;
105    case NPCharsetThai:
106      FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("th"));
107      break;
108      // default:
109      // Don't add any languages in that case that we don't recognise the
110      // constant.
111  }
112  return is_lgc;
113}
114
115}  // namespace
116
117namespace content {
118
119int MatchFontFaceWithFallback(const std::string& face,
120                              bool is_bold,
121                              bool is_italic,
122                              uint32 charset,
123                              uint32 fallback_family) {
124  FcLangSet* langset = FcLangSetCreate();
125  bool is_lgc = MSCharSetToFontconfig(langset, charset);
126  FcPattern* pattern = FcPatternCreate();
127  FcPatternAddString(
128      pattern, FC_FAMILY, reinterpret_cast<const FcChar8*>(face.c_str()));
129
130  // TODO(thestig) Check if we can access Chrome's per-script font preference
131  // here and select better default fonts for non-LGC case.
132  std::string generic_font_name;
133  if (is_lgc) {
134    switch (fallback_family) {
135      case PP_BROWSERFONT_TRUSTED_FAMILY_SERIF:
136        generic_font_name = "Times New Roman";
137        break;
138      case PP_BROWSERFONT_TRUSTED_FAMILY_SANSSERIF:
139        generic_font_name = "Arial";
140        break;
141      case PP_BROWSERFONT_TRUSTED_FAMILY_MONOSPACE:
142        generic_font_name = "Courier New";
143        break;
144    }
145  }
146  if (!generic_font_name.empty()) {
147    const FcChar8* fc_generic_font_name =
148        reinterpret_cast<const FcChar8*>(generic_font_name.c_str());
149    FcPatternAddString(pattern, FC_FAMILY, fc_generic_font_name);
150  }
151
152  if (is_bold)
153    FcPatternAddInteger(pattern, FC_WEIGHT, FC_WEIGHT_BOLD);
154  if (is_italic)
155    FcPatternAddInteger(pattern, FC_SLANT, FC_SLANT_ITALIC);
156  FcPatternAddLangSet(pattern, FC_LANG, langset);
157  FcPatternAddBool(pattern, FC_SCALABLE, FcTrue);
158  FcConfigSubstitute(NULL, pattern, FcMatchPattern);
159  FcDefaultSubstitute(pattern);
160
161  FcResult result;
162  FcFontSet* font_set = FcFontSort(0, pattern, 0, 0, &result);
163  int font_fd = -1;
164  int good_enough_index = -1;
165  bool good_enough_index_set = false;
166
167  if (font_set) {
168    for (int i = 0; i < font_set->nfont; ++i) {
169      FcPattern* current = font_set->fonts[i];
170
171      // Older versions of fontconfig have a bug where they cannot select
172      // only scalable fonts so we have to manually filter the results.
173      FcBool is_scalable;
174      if (FcPatternGetBool(current, FC_SCALABLE, 0, &is_scalable) !=
175              FcResultMatch ||
176          !is_scalable) {
177        continue;
178      }
179
180      FcChar8* c_filename;
181      if (FcPatternGetString(current, FC_FILE, 0, &c_filename) !=
182          FcResultMatch) {
183        continue;
184      }
185
186      // We only want to return sfnt (TrueType) based fonts. We don't have a
187      // very good way of detecting this so we'll filter based on the
188      // filename.
189      bool is_sfnt = false;
190      static const char kSFNTExtensions[][5] = {".ttf", ".otc", ".TTF", ".ttc",
191                                                ""};
192      const size_t filename_len = strlen(reinterpret_cast<char*>(c_filename));
193      for (unsigned j = 0;; j++) {
194        if (kSFNTExtensions[j][0] == 0) {
195          // None of the extensions matched.
196          break;
197        }
198        const size_t ext_len = strlen(kSFNTExtensions[j]);
199        if (filename_len > ext_len &&
200            memcmp(c_filename + filename_len - ext_len,
201                   kSFNTExtensions[j],
202                   ext_len) == 0) {
203          is_sfnt = true;
204          break;
205        }
206      }
207
208      if (!is_sfnt)
209        continue;
210
211      // This font is good enough to pass muster, but we might be able to do
212      // better with subsequent ones.
213      if (!good_enough_index_set) {
214        good_enough_index = i;
215        good_enough_index_set = true;
216      }
217
218      FcValue matrix;
219      bool have_matrix = FcPatternGet(current, FC_MATRIX, 0, &matrix) == 0;
220
221      if (is_italic && have_matrix) {
222        // we asked for an italic font, but fontconfig is giving us a
223        // non-italic font with a transformation matrix.
224        continue;
225      }
226
227      FcValue embolden;
228      const bool have_embolden =
229          FcPatternGet(current, FC_EMBOLDEN, 0, &embolden) == 0;
230
231      if (is_bold && have_embolden) {
232        // we asked for a bold font, but fontconfig gave us a non-bold font
233        // and asked us to apply fake bolding.
234        continue;
235      }
236
237      font_fd =
238          HANDLE_EINTR(open(reinterpret_cast<char*>(c_filename), O_RDONLY));
239      if (font_fd >= 0)
240        break;
241    }
242  }
243
244  if (font_fd == -1 && good_enough_index_set) {
245    // We didn't find a font that we liked, so we fallback to something
246    // acceptable.
247    FcPattern* current = font_set->fonts[good_enough_index];
248    FcChar8* c_filename;
249    FcPatternGetString(current, FC_FILE, 0, &c_filename);
250    font_fd = HANDLE_EINTR(open(reinterpret_cast<char*>(c_filename), O_RDONLY));
251  }
252
253  if (font_set)
254    FcFontSetDestroy(font_set);
255  FcPatternDestroy(pattern);
256
257  return font_fd;
258}
259
260}  // namespace content
261