math_semantic_util.js revision cedac228d2dd51db4b79ea1e72c7f249408ee061
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5/**
6 * @fileoverview Utility functions for semantic tree computations.
7 */
8
9goog.provide('cvox.SemanticUtil');
10
11
12/**
13 * @constructor
14 */
15cvox.SemanticUtil = function() { };
16
17
18/**
19 * Merges keys of objects into an array.
20 * @param {...Object.<string, string>} objects Optional objects.
21 * @return {Array.<string>} Array of all keys of the objects.
22 */
23cvox.SemanticUtil.objectsToKeys = function(objects) {
24  objects = Array.prototype.slice.call(arguments, 0);
25  var keys = [];
26  return keys.concat.apply(keys, objects.map(Object.keys));
27};
28
29
30/**
31 * Merges values of objects into an array.
32 * @param {...Object.<string, string>} objects Optional objects.
33 * @return {Array.<string>} Array of all values of the objects.
34 */
35cvox.SemanticUtil.objectsToValues = function(objects) {
36  objects = Array.prototype.slice.call(arguments, 0);
37  var result = [];
38  var collectValues = function(obj) {
39    for (var key in obj) {
40      result.push(obj[key]);
41    }
42  };
43  objects.forEach(collectValues);
44  return result;
45};
46
47
48/**
49 * Transforms a unicode character into numeric representation. Returns null if
50 * the input string is not a valid unicode character.
51 * @param {string} unicode Character.
52 * @return {?number} The decimal representation if it exists.
53 */
54cvox.SemanticUtil.unicodeToNumber = function(unicode) {
55  if (!unicode || unicode.length > 2) {
56    return null;
57  }
58  // Treating surrogate pairs.
59  if (unicode.length == 2) {
60    var hi = unicode.charCodeAt(0);
61    var low = unicode.charCodeAt(1);
62    if (0xD800 <= hi && hi <= 0xDBFF && !isNaN(low)) {
63      return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
64    }
65    return null;
66  }
67  return unicode.charCodeAt(0);
68};
69
70
71/**
72 * Transforms a numberic representation of a unicode character into its
73 * corresponding string.
74 * @param {number} number Unicode point.
75 * @return {string} The string representation.
76 */
77cvox.SemanticUtil.numberToUnicode = function(number) {
78  if (number >= 0x10000) {
79    var hi = (number - 0x10000) / 0x0400 + 0xD800;
80    var lo = (number - 0x10000) % 0x0400 + 0xDC00;
81    return String.fromCharCode(hi, lo);
82  }
83  return String.fromCharCode(number);
84};
85
86
87/**
88 * Returns the tagname of an element node in upper case.
89 * @param {Element} node The node.
90 * @return {string} The node's tagname.
91 */
92cvox.SemanticUtil.tagName = function(node) {
93  return node.tagName.toUpperCase();
94};
95
96
97/**
98 * List of MathML Tags that are to be ignored.
99 * @type {Array.<string>}
100 * @const
101 */
102cvox.SemanticUtil.IGNORETAGS = [
103  'MERROR', 'MPHANTOM', 'MSPACE', 'MACTION', 'MALIGNGROUP', 'MALIGNMARK',
104  'MACTION'
105];
106
107
108/**
109 * List of MathML Tags to be ignore if they have no children.
110 * @type {Array.<string>}
111 * @const
112 */
113cvox.SemanticUtil.EMPTYTAGS = ['MATH', 'MROW', 'MPADDED', 'MSTYLE'];
114
115
116/**
117 * Removes elements from a list of MathML nodes that are either to be ignored or
118 * ignored if they have empty children.
119 * Observe that this is currently not recursive, i.e. will not take care of
120 * pathological cases, where content is hidden in incorrectly used tags!
121 * @param {Array.<Element>} nodes The node list to be cleaned.
122 * @return {Array.<Element>} The cleansed list.
123 */
124cvox.SemanticUtil.purgeNodes = function(nodes) {
125  var nodeArray = [];
126  for (var i = 0, node; node = nodes[i]; i++) {
127    var tagName = cvox.SemanticUtil.tagName(node);
128    if (cvox.SemanticUtil.IGNORETAGS.indexOf(tagName) != -1) continue;
129    if (cvox.SemanticUtil.EMPTYTAGS.indexOf(tagName) != -1 &&
130        node.childNodes.length == 0)
131    continue;
132    nodeArray.push(node);
133  }
134  return nodeArray;
135};
136