math_semantic_util.js revision cedac228d2dd51db4b79ea1e72c7f249408ee061
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5/** 6 * @fileoverview Utility functions for semantic tree computations. 7 */ 8 9goog.provide('cvox.SemanticUtil'); 10 11 12/** 13 * @constructor 14 */ 15cvox.SemanticUtil = function() { }; 16 17 18/** 19 * Merges keys of objects into an array. 20 * @param {...Object.<string, string>} objects Optional objects. 21 * @return {Array.<string>} Array of all keys of the objects. 22 */ 23cvox.SemanticUtil.objectsToKeys = function(objects) { 24 objects = Array.prototype.slice.call(arguments, 0); 25 var keys = []; 26 return keys.concat.apply(keys, objects.map(Object.keys)); 27}; 28 29 30/** 31 * Merges values of objects into an array. 32 * @param {...Object.<string, string>} objects Optional objects. 33 * @return {Array.<string>} Array of all values of the objects. 34 */ 35cvox.SemanticUtil.objectsToValues = function(objects) { 36 objects = Array.prototype.slice.call(arguments, 0); 37 var result = []; 38 var collectValues = function(obj) { 39 for (var key in obj) { 40 result.push(obj[key]); 41 } 42 }; 43 objects.forEach(collectValues); 44 return result; 45}; 46 47 48/** 49 * Transforms a unicode character into numeric representation. Returns null if 50 * the input string is not a valid unicode character. 51 * @param {string} unicode Character. 52 * @return {?number} The decimal representation if it exists. 53 */ 54cvox.SemanticUtil.unicodeToNumber = function(unicode) { 55 if (!unicode || unicode.length > 2) { 56 return null; 57 } 58 // Treating surrogate pairs. 59 if (unicode.length == 2) { 60 var hi = unicode.charCodeAt(0); 61 var low = unicode.charCodeAt(1); 62 if (0xD800 <= hi && hi <= 0xDBFF && !isNaN(low)) { 63 return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; 64 } 65 return null; 66 } 67 return unicode.charCodeAt(0); 68}; 69 70 71/** 72 * Transforms a numberic representation of a unicode character into its 73 * corresponding string. 74 * @param {number} number Unicode point. 75 * @return {string} The string representation. 76 */ 77cvox.SemanticUtil.numberToUnicode = function(number) { 78 if (number >= 0x10000) { 79 var hi = (number - 0x10000) / 0x0400 + 0xD800; 80 var lo = (number - 0x10000) % 0x0400 + 0xDC00; 81 return String.fromCharCode(hi, lo); 82 } 83 return String.fromCharCode(number); 84}; 85 86 87/** 88 * Returns the tagname of an element node in upper case. 89 * @param {Element} node The node. 90 * @return {string} The node's tagname. 91 */ 92cvox.SemanticUtil.tagName = function(node) { 93 return node.tagName.toUpperCase(); 94}; 95 96 97/** 98 * List of MathML Tags that are to be ignored. 99 * @type {Array.<string>} 100 * @const 101 */ 102cvox.SemanticUtil.IGNORETAGS = [ 103 'MERROR', 'MPHANTOM', 'MSPACE', 'MACTION', 'MALIGNGROUP', 'MALIGNMARK', 104 'MACTION' 105]; 106 107 108/** 109 * List of MathML Tags to be ignore if they have no children. 110 * @type {Array.<string>} 111 * @const 112 */ 113cvox.SemanticUtil.EMPTYTAGS = ['MATH', 'MROW', 'MPADDED', 'MSTYLE']; 114 115 116/** 117 * Removes elements from a list of MathML nodes that are either to be ignored or 118 * ignored if they have empty children. 119 * Observe that this is currently not recursive, i.e. will not take care of 120 * pathological cases, where content is hidden in incorrectly used tags! 121 * @param {Array.<Element>} nodes The node list to be cleaned. 122 * @return {Array.<Element>} The cleansed list. 123 */ 124cvox.SemanticUtil.purgeNodes = function(nodes) { 125 var nodeArray = []; 126 for (var i = 0, node; node = nodes[i]; i++) { 127 var tagName = cvox.SemanticUtil.tagName(node); 128 if (cvox.SemanticUtil.IGNORETAGS.indexOf(tagName) != -1) continue; 129 if (cvox.SemanticUtil.EMPTYTAGS.indexOf(tagName) != -1 && 130 node.childNodes.length == 0) 131 continue; 132 nodeArray.push(node); 133 } 134 return nodeArray; 135}; 136