camera/docs/metadata_helpers.py

#
# Copyright (C) 2012 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""
A set of helpers for rendering Mako templates with a Metadata model.
"""

import metadata_model
import re
import markdown
import textwrap
import sys
import bs4
# Monkey-patch BS4. WBR element must not have an end tag.
bs4.builder.HTMLTreeBuilder.empty_element_tags.add("wbr")

from collections import OrderedDict

# Relative path from HTML file to the base directory used by <img> tags
IMAGE_SRC_METADATA="images/camera2/metadata/"

# Prepend this path to each <img src="foo"> in javadocs
JAVADOC_IMAGE_SRC_METADATA="../../../../" + IMAGE_SRC_METADATA

_context_buf = None

def _is_sec_or_ins(x):
  return isinstance(x, metadata_model.Section) or    \
         isinstance(x, metadata_model.InnerNamespace)

##
## Metadata Helpers
##

def find_all_sections(root):
  """
  Find all descendants that are Section or InnerNamespace instances.

  Args:
    root: a Metadata instance

  Returns:
    A list of Section/InnerNamespace instances

  Remarks:
    These are known as "sections" in the generated C code.
  """
  return root.find_all(_is_sec_or_ins)

def find_parent_section(entry):
  """
  Find the closest ancestor that is either a Section or InnerNamespace.

  Args:
    entry: an Entry or Clone node

  Returns:
    An instance of Section or InnerNamespace
  """
  return entry.find_parent_first(_is_sec_or_ins)

# find uniquely named entries (w/o recursing through inner namespaces)
def find_unique_entries(node):
  """
  Find all uniquely named entries, without recursing through inner namespaces.

  Args:
    node: a Section or InnerNamespace instance

  Yields:
    A sequence of MergedEntry nodes representing an entry

  Remarks:
    This collapses multiple entries with the same fully qualified name into
    one entry (e.g. if there are multiple entries in different kinds).
  """
  if not isinstance(node, metadata_model.Section) and    \
     not isinstance(node, metadata_model.InnerNamespace):
      raise TypeError("expected node to be a Section or InnerNamespace")

  d = OrderedDict()
  # remove the 'kinds' from the path between sec and the closest entries
  # then search the immediate children of the search path
  search_path = isinstance(node, metadata_model.Section) and node.kinds \
                or [node]
  for i in search_path:
      for entry in i.entries:
          d[entry.name] = entry

  for k,v in d.iteritems():
      yield v.merge()

def path_name(node):
  """
  Calculate a period-separated string path from the root to this element,
  by joining the names of each node and excluding the Metadata/Kind nodes
  from the path.

  Args:
    node: a Node instance

  Returns:
    A string path
  """

  isa = lambda x,y: isinstance(x, y)
  fltr = lambda x: not isa(x, metadata_model.Metadata) and \
                   not isa(x, metadata_model.Kind)

  path = node.find_parents(fltr)
  path = list(path)
  path.reverse()
  path.append(node)

  return ".".join((i.name for i in path))

def has_descendants_with_enums(node):
  """
  Determine whether or not the current node is or has any descendants with an
  Enum node.

  Args:
    node: a Node instance

  Returns:
    True if it finds an Enum node in the subtree, False otherwise
  """
  return bool(node.find_first(lambda x: isinstance(x, metadata_model.Enum)))

def get_children_by_throwing_away_kind(node, member='entries'):
  """
  Get the children of this node by compressing the subtree together by removing
  the kind and then combining any children nodes with the same name together.

  Args:
    node: An instance of Section, InnerNamespace, or Kind

  Returns:
    An iterable over the combined children of the subtree of node,
    as if the Kinds never existed.

  Remarks:
    Not recursive. Call this function repeatedly on each child.
  """

  if isinstance(node, metadata_model.Section):
    # Note that this makes jump from Section to Kind,
    # skipping the Kind entirely in the tree.
    node_to_combine = node.combine_kinds_into_single_node()
  else:
    node_to_combine = node

  combined_kind = node_to_combine.combine_children_by_name()

  return (i for i in getattr(combined_kind, member))

def get_children_by_filtering_kind(section, kind_name, member='entries'):
  """
  Takes a section and yields the children of the merged kind under this section.

  Args:
    section: An instance of Section
    kind_name: A name of the kind, i.e. 'dynamic' or 'static' or 'controls'

  Returns:
    An iterable over the children of the specified merged kind.
  """

  matched_kind = next((i for i in section.merged_kinds if i.name == kind_name), None)

  if matched_kind:
    return getattr(matched_kind, member)
  else:
    return ()

##
## Filters
##

# abcDef.xyz -> ABC_DEF_XYZ
def csym(name):
  """
  Convert an entry name string into an uppercase C symbol.

  Returns:
    A string

  Example:
    csym('abcDef.xyz') == 'ABC_DEF_XYZ'
  """
  newstr = name
  newstr = "".join([i.isupper() and ("_" + i) or i for i in newstr]).upper()
  newstr = newstr.replace(".", "_")
  return newstr

# abcDef.xyz -> abc_def_xyz
def csyml(name):
  """
  Convert an entry name string into a lowercase C symbol.

  Returns:
    A string

  Example:
    csyml('abcDef.xyz') == 'abc_def_xyz'
  """
  return csym(name).lower()

# pad with spaces to make string len == size. add new line if too big
def ljust(size, indent=4):
  """
  Creates a function that given a string will pad it with spaces to make
  the string length == size. Adds a new line if the string was too big.

  Args:
    size: an integer representing how much spacing should be added
    indent: an integer representing the initial indendation level

  Returns:
    A function that takes a string and returns a string.

  Example:
    ljust(8)("hello") == 'hello   '

  Remarks:
    Deprecated. Use pad instead since it works for non-first items in a
    Mako template.
  """
  def inner(what):
    newstr = what.ljust(size)
    if len(newstr) > size:
      return what + "\n" + "".ljust(indent + size)
    else:
      return newstr
  return inner

def _find_new_line():

  if _context_buf is None:
    raise ValueError("Context buffer was not set")

  buf = _context_buf
  x = -1 # since the first read is always ''
  cur_pos = buf.tell()
  while buf.tell() > 0 and buf.read(1) != '\n':
    buf.seek(cur_pos - x)
    x = x + 1

  buf.seek(cur_pos)

  return int(x)

# Pad the string until the buffer reaches the desired column.
# If string is too long, insert a new line with 'col' spaces instead
def pad(col):
  """
  Create a function that given a string will pad it to the specified column col.
  If the string overflows the column, put the string on a new line and pad it.

  Args:
    col: an integer specifying the column number

  Returns:
    A function that given a string will produce a padded string.

  Example:
    pad(8)("hello") == 'hello   '

  Remarks:
    This keeps track of the line written by Mako so far, so it will always
    align to the column number correctly.
  """
  def inner(what):
    wut = int(col)
    current_col = _find_new_line()

    if len(what) > wut - current_col:
      return what + "\n".ljust(col)
    else:
      return what.ljust(wut - current_col)
  return inner

# int32 -> TYPE_INT32, byte -> TYPE_BYTE, etc. note that enum -> TYPE_INT32
def ctype_enum(what):
  """
  Generate a camera_metadata_type_t symbol from a type string.

  Args:
    what: a type string

  Returns:
    A string representing the camera_metadata_type_t

  Example:
    ctype_enum('int32') == 'TYPE_INT32'
    ctype_enum('int64') == 'TYPE_INT64'
    ctype_enum('float') == 'TYPE_FLOAT'

  Remarks:
    An enum is coerced to a byte since the rest of the camera_metadata
    code doesn't support enums directly yet.
  """
  return 'TYPE_%s' %(what.upper())


# Calculate a java type name from an entry with a Typedef node
def _jtypedef_type(entry):
  typedef = entry.typedef
  additional = ''

  # Hacky way to deal with arrays. Assume that if we have
  # size 'Constant x N' the Constant is part of the Typedef size.
  # So something sized just 'Constant', 'Constant1 x Constant2', etc
  # is not treated as a real java array.
  if entry.container == 'array':
    has_variable_size = False
    for size in entry.container_sizes:
      try:
        size_int = int(size)
      except ValueError:
        has_variable_size = True

    if has_variable_size:
      additional = '[]'

  try:
    name = typedef.languages['java']

    return "%s%s" %(name, additional)
  except KeyError:
    return None

# Box if primitive. Otherwise leave unboxed.
def _jtype_box(type_name):
  mapping = {
    'boolean': 'Boolean',
    'byte': 'Byte',
    'int': 'Integer',
    'float': 'Float',
    'double': 'Double',
    'long': 'Long'
  }

  return mapping.get(type_name, type_name)

def jtype_unboxed(entry):
  """
  Calculate the Java type from an entry type string, to be used whenever we
  need the regular type in Java. It's not boxed, so it can't be used as a
  generic type argument when the entry type happens to resolve to a primitive.

  Remarks:
    Since Java generics cannot be instantiated with primitives, this version
    is not applicable in that case. Use jtype_boxed instead for that.

  Returns:
    The string representing the Java type.
  """
  if not isinstance(entry, metadata_model.Entry):
    raise ValueError("Expected entry to be an instance of Entry")

  metadata_type = entry.type

  java_type = None

  if entry.typedef:
    typedef_name = _jtypedef_type(entry)
    if typedef_name:
      java_type = typedef_name # already takes into account arrays

  if not java_type:
    if not java_type and entry.enum and metadata_type == 'byte':
      # Always map byte enums to Java ints, unless there's a typedef override
      base_type = 'int'

    else:
      mapping = {
        'int32': 'int',
        'int64': 'long',
        'float': 'float',
        'double': 'double',
        'byte': 'byte',
        'rational': 'Rational'
      }

      base_type = mapping[metadata_type]

    # Convert to array (enums, basic types)
    if entry.container == 'array':
      additional = '[]'
    else:
      additional = ''

    java_type = '%s%s' %(base_type, additional)

  # Now box this sucker.
  return java_type

def jtype_boxed(entry):
  """
  Calculate the Java type from an entry type string, to be used as a generic
  type argument in Java. The type is guaranteed to inherit from Object.

  It will only box when absolutely necessary, i.e. int -> Integer[], but
  int[] -> int[].

  Remarks:
    Since Java generics cannot be instantiated with primitives, this version
    will use boxed types when absolutely required.

  Returns:
    The string representing the boxed Java type.
  """
  unboxed_type = jtype_unboxed(entry)
  return _jtype_box(unboxed_type)

def _jtype_primitive(what):
  """
  Calculate the Java type from an entry type string.

  Remarks:
    Makes a special exception for Rational, since it's a primitive in terms of
    the C-library camera_metadata type system.

  Returns:
    The string representing the primitive type
  """
  mapping = {
    'int32': 'int',
    'int64': 'long',
    'float': 'float',
    'double': 'double',
    'byte': 'byte',
    'rational': 'Rational'
  }

  try:
    return mapping[what]
  except KeyError as e:
    raise ValueError("Can't map '%s' to a primitive, not supported" %what)

def jclass(entry):
  """
  Calculate the java Class reference string for an entry.

  Args:
    entry: an Entry node

  Example:
    <entry name="some_int" type="int32"/>
    <entry name="some_int_array" type="int32" container='array'/>

    jclass(some_int) == 'int.class'
    jclass(some_int_array) == 'int[].class'

  Returns:
    The ClassName.class string
  """

  return "%s.class" %jtype_unboxed(entry)

def jidentifier(what):
  """
  Convert the input string into a valid Java identifier.

  Args:
    what: any identifier string

  Returns:
    String with added underscores if necessary.
  """
  if re.match("\d", what):
    return "_%s" %what
  else:
    return what

def enum_calculate_value_string(enum_value):
  """
  Calculate the value of the enum, even if it does not have one explicitly
  defined.

  This looks back for the first enum value that has a predefined value and then
  applies addition until we get the right value, using C-enum semantics.

  Args:
    enum_value: an EnumValue node with a valid Enum parent

  Example:
    <enum>
      <value>X</value>
      <value id="5">Y</value>
      <value>Z</value>
    </enum>

    enum_calculate_value_string(X) == '0'
    enum_calculate_Value_string(Y) == '5'
    enum_calculate_value_string(Z) == '6'

  Returns:
    String that represents the enum value as an integer literal.
  """

  enum_value_siblings = list(enum_value.parent.values)
  this_index = enum_value_siblings.index(enum_value)

  def is_hex_string(instr):
    return bool(re.match('0x[a-f0-9]+$', instr, re.IGNORECASE))

  base_value = 0
  base_offset = 0
  emit_as_hex = False

  this_id = enum_value_siblings[this_index].id
  while this_index != 0 and not this_id:
    this_index -= 1
    base_offset += 1
    this_id = enum_value_siblings[this_index].id

  if this_id:
    base_value = int(this_id, 0)  # guess base
    emit_as_hex = is_hex_string(this_id)

  if emit_as_hex:
    return "0x%X" %(base_value + base_offset)
  else:
    return "%d" %(base_value + base_offset)

def enumerate_with_last(iterable):
  """
  Enumerate a sequence of iterable, while knowing if this element is the last in
  the sequence or not.

  Args:
    iterable: an Iterable of some sequence

  Yields:
    (element, bool) where the bool is True iff the element is last in the seq.
  """
  it = (i for i in iterable)

  first = next(it)  # OK: raises exception if it is empty

  second = first  # for when we have only 1 element in iterable

  try:
    while True:
      second = next(it)
      # more elements remaining.
      yield (first, False)
      first = second
  except StopIteration:
    # last element. no more elements left
    yield (second, True)

def pascal_case(what):
  """
  Convert the first letter of a string to uppercase, to make the identifier
  conform to PascalCase.

  If there are dots, remove the dots, and capitalize the letter following
  where the dot was. Letters that weren't following dots are left unchanged,
  except for the first letter of the string (which is made upper-case).

  Args:
    what: a string representing some identifier

  Returns:
    String with first letter capitalized

  Example:
    pascal_case("helloWorld") == "HelloWorld"
    pascal_case("foo") == "Foo"
    pascal_case("hello.world") = "HelloWorld"
    pascal_case("fooBar.fooBar") = "FooBarFooBar"
  """
  return "".join([s[0:1].upper() + s[1:] for s in what.split('.')])

def jkey_identifier(what):
  """
  Return a Java identifier from a property name.

  Args:
    what: a string representing a property name.

  Returns:
    Java identifier corresponding to the property name. May need to be
    prepended with the appropriate Java class name by the caller of this
    function. Note that the outer namespace is stripped from the property
    name.

  Example:
    jkey_identifier("android.lens.facing") == "LENS_FACING"
  """
  return csym(what[what.find('.') + 1:])

def jenum_value(enum_entry, enum_value):
  """
  Calculate the Java name for an integer enum value

  Args:
    enum: An enum-typed Entry node
    value: An EnumValue node for the enum

  Returns:
    String representing the Java symbol
  """

  cname = csym(enum_entry.name)
  return cname[cname.find('_') + 1:] + '_' + enum_value.name

def javadoc(metadata, indent = 4):
  """
  Returns a function to format a markdown syntax text block as a
  javadoc comment section, given a set of metadata

  Args:
    metadata: A Metadata instance, representing the the top-level root
      of the metadata for cross-referencing
    indent: baseline level of indentation for javadoc block
  Returns:
    A function that transforms a String text block as follows:
    - Indent and * for insertion into a Javadoc comment block
    - Trailing whitespace removed
    - Entire body rendered via markdown to generate HTML
    - All tag names converted to appropriate Javadoc {@link} with @see
      for each tag

  Example:
    "This is a comment for Javadoc\n" +
    "     with multiple lines, that should be   \n" +
    "     formatted better\n" +
    "\n" +
    "    That covers multiple lines as well\n"
    "    And references android.control.mode\n"

    transforms to
    "    * <p>This is a comment for Javadoc\n" +
    "    * with multiple lines, that should be\n" +
    "    * formatted better</p>\n" +
    "    * <p>That covers multiple lines as well</p>\n" +
    "    * and references {@link CaptureRequest#CONTROL_MODE android.control.mode}\n" +
    "    *\n" +
    "    * @see CaptureRequest#CONTROL_MODE\n"
  """
  def javadoc_formatter(text):
    comment_prefix = " " * indent + " * ";

    # render with markdown => HTML
    javatext = md(text, JAVADOC_IMAGE_SRC_METADATA)

    # Crossref tag names
    kind_mapping = {
        'static': 'CameraCharacteristics',
        'dynamic': 'CaptureResult',
        'controls': 'CaptureRequest' }

    # Convert metadata entry "android.x.y.z" to form
    # "{@link CaptureRequest#X_Y_Z android.x.y.z}"
    def javadoc_crossref_filter(node):
      if node.applied_visibility == 'public':
        return '{@link %s#%s %s}' % (kind_mapping[node.kind],
                                     jkey_identifier(node.name),
                                     node.name)
      else:
        return node.name

    # For each public tag "android.x.y.z" referenced, add a
    # "@see CaptureRequest#X_Y_Z"
    def javadoc_see_filter(node_set):
      node_set = (x for x in node_set if x.applied_visibility == 'public')

      text = '\n'
      for node in node_set:
        text = text + '\n@see %s#%s' % (kind_mapping[node.kind],
                                      jkey_identifier(node.name))

      return text if text != '\n' else ''

    javatext = filter_tags(javatext, metadata, javadoc_crossref_filter, javadoc_see_filter)

    def line_filter(line):
      # Indent each line
      # Add ' * ' to it for stylistic reasons
      # Strip right side of trailing whitespace
      return (comment_prefix + line.lstrip()).rstrip()

    # Process each line with above filter
    javatext = "\n".join(line_filter(i) for i in javatext.split("\n")) + "\n"

    return javatext

  return javadoc_formatter

def dedent(text):
  """
  Remove all common indentation from every line but the 0th.
  This will avoid getting <code> blocks when rendering text via markdown.
  Ignoring the 0th line will also allow the 0th line not to be aligned.

  Args:
    text: A string of text to dedent.

  Returns:
    String dedented by above rules.

  For example:
    assertEquals("bar\nline1\nline2",   dedent("bar\n  line1\n  line2"))
    assertEquals("bar\nline1\nline2",   dedent(" bar\n  line1\n  line2"))
    assertEquals("bar\n  line1\nline2", dedent(" bar\n    line1\n  line2"))
  """
  text = textwrap.dedent(text)
  text_lines = text.split('\n')
  text_not_first = "\n".join(text_lines[1:])
  text_not_first = textwrap.dedent(text_not_first)
  text = text_lines[0] + "\n" + text_not_first

  return text

def md(text, img_src_prefix=""):
    """
    Run text through markdown to produce HTML.

    This also removes all common indentation from every line but the 0th.
    This will avoid getting <code> blocks in markdown.
    Ignoring the 0th line will also allow the 0th line not to be aligned.

    Args:
      text: A markdown-syntax using block of text to format.
      img_src_prefix: An optional string to prepend to each <img src="target"/>

    Returns:
      String rendered by markdown and other rules applied (see above).

    For example, this avoids the following situation:

      <!-- Input -->

      <!--- can't use dedent directly since 'foo' has no indent -->
      <notes>foo
          bar
          bar
      </notes>

      <!-- Bad Output -- >
      <!-- if no dedent is done generated code looks like -->
      <p>foo
        <code><pre>
          bar
          bar</pre></code>
      </p>

    Instead we get the more natural expected result:

      <!-- Good Output -->
      <p>foo
      bar
      bar</p>

    """
    text = dedent(text)

    # full list of extensions at http://pythonhosted.org/Markdown/extensions/
    md_extensions = ['tables'] # make <table> with ASCII |_| tables
    # render with markdown
    text = markdown.markdown(text, md_extensions)

    # prepend a prefix to each <img src="foo"> -> <img src="${prefix}foo">
    text = re.sub(r'src="([^"]*)"', 'src="' + img_src_prefix + r'\1"', text)
    return text

def filter_tags(text, metadata, filter_function, summary_function = None):
    """
    Find all references to tags in the form outer_namespace.xxx.yyy[.zzz] in
    the provided text, and pass them through filter_function and summary_function.

    Used to linkify entry names in HMTL, javadoc output.

    Args:
      text: A string representing a block of text destined for output
      metadata: A Metadata instance, the root of the metadata properties tree
      filter_function: A Node->string function to apply to each node
        when found in text; the string returned replaces the tag name in text.
      summary_function: A Node list->string function that is provided the list of
        unique tag nodes found in text, and which must return a string that is
        then appended to the end of the text. The list is sorted alphabetically
        by node name.
    """

    tag_set = set()
    def name_match(name):
      return lambda node: node.name == name

    # Match outer_namespace.x.y or outer_namespace.x.y.z, making sure
    # to grab .z and not just outer_namespace.x.y.  (sloppy, but since we
    # check for validity, a few false positives don't hurt)
    for outer_namespace in metadata.outer_namespaces:

      tag_match = outer_namespace.name + \
        r"\.([a-zA-Z0-9\n]+)\.([a-zA-Z0-9\n]+)(\.[a-zA-Z0-9\n]+)?([/]?)"

      def filter_sub(match):
        whole_match = match.group(0)
        section1 = match.group(1)
        section2 = match.group(2)
        section3 = match.group(3)
        end_slash = match.group(4)

        # Don't linkify things ending in slash (urls, for example)
        if end_slash:
          return whole_match

        candidate = ""

        # First try a two-level match
        candidate2 = "%s.%s.%s" % (outer_namespace.name, section1, section2)
        got_two_level = False

        node = metadata.find_first(name_match(candidate2.replace('\n','')))
        if not node and '\n' in section2:
          # Linefeeds are ambiguous - was the intent to add a space,
          # or continue a lengthy name? Try the former now.
          candidate2b = "%s.%s.%s" % (outer_namespace.name, section1, section2[:section2.find('\n')])
          node = metadata.find_first(name_match(candidate2b))
          if node:
            candidate2 = candidate2b

        if node:
          # Have two-level match
          got_two_level = True
          candidate = candidate2
        elif section3:
          # Try three-level match
          candidate3 = "%s%s" % (candidate2, section3)
          node = metadata.find_first(name_match(candidate3.replace('\n','')))

          if not node and '\n' in section3:
            # Linefeeds are ambiguous - was the intent to add a space,
            # or continue a lengthy name? Try the former now.
            candidate3b = "%s%s" % (candidate2, section3[:section3.find('\n')])
            node = metadata.find_first(name_match(candidate3b))
            if node:
              candidate3 = candidate3b

          if node:
            # Have 3-level match
            candidate = candidate3

        # Replace match with crossref or complain if a likely match couldn't be matched

        if node:
          tag_set.add(node)
          return whole_match.replace(candidate,filter_function(node))
        else:
          print >> sys.stderr,\
            "  WARNING: Could not crossref likely reference {%s}" % (match.group(0))
          return whole_match

      text = re.sub(tag_match, filter_sub, text)

    if summary_function is not None:
      return text + summary_function(sorted(tag_set, key=lambda x: x.name))
    else:
      return text

def any_visible(section, kind_name, visibilities):
  """
  Determine if entries in this section have an applied visibility that's in
  the list of given visibilities.

  Args:
    section: A section of metadata
    kind_name: A name of the kind, i.e. 'dynamic' or 'static' or 'controls'
    visibilities: An iterable of visibilities to match against

  Returns:
    True if the section has any entries with any of the given visibilities. False otherwise.
  """

  for inner_namespace in get_children_by_filtering_kind(section, kind_name,
                                                        'namespaces'):
    if any(filter_visibility(inner_namespace.merged_entries, visibilities)):
      return True

  return any(filter_visibility(get_children_by_filtering_kind(section, kind_name,
                                                              'merged_entries'),
                               visibilities))


def filter_visibility(entries, visibilities):
  """
  Remove entries whose applied visibility is not in the supplied visibilities.

  Args:
    entries: An iterable of Entry nodes
    visibilities: An iterable of visibilities to filter against

  Yields:
    An iterable of Entry nodes
  """
  return (e for e in entries if e.applied_visibility in visibilities)

def remove_synthetic(entries):
  """
  Filter the given entries by removing those that are synthetic.

  Args:
    entries: An iterable of Entry nodes

  Yields:
    An iterable of Entry nodes
  """
  return (e for e in entries if not e.synthetic)

def wbr(text):
  """
  Insert word break hints for the browser in the form of <wbr> HTML tags.

  Word breaks are inserted inside an HTML node only, so the nodes themselves
  will not be changed. Attributes are also left unchanged.

  The following rules apply to insert word breaks:
  - For characters in [ '.', '/', '_' ]
  - For uppercase letters inside a multi-word X.Y.Z (at least 3 parts)

  Args:
    text: A string of text containing HTML content.

  Returns:
    A string with <wbr> inserted by the above rules.
  """
  SPLIT_CHARS_LIST = ['.', '_', '/']
  SPLIT_CHARS = r'([.|/|_/,]+)' # split by these characters
  CAP_LETTER_MIN = 3 # at least 3 components split by above chars, i.e. x.y.z
  def wbr_filter(text):
      new_txt = text

      # for johnyOrange.appleCider.redGuardian also insert wbr before the caps
      # => johny<wbr>Orange.apple<wbr>Cider.red<wbr>Guardian
      for words in text.split(" "):
        for char in SPLIT_CHARS_LIST:
          # match at least x.y.z, don't match x or x.y
          if len(words.split(char)) >= CAP_LETTER_MIN:
            new_word = re.sub(r"([a-z])([A-Z])", r"\1<wbr>\2", words)
            new_txt = new_txt.replace(words, new_word)

      # e.g. X/Y/Z -> X/<wbr>Y/<wbr>/Z. also for X.Y.Z, X_Y_Z.
      new_txt = re.sub(SPLIT_CHARS, r"\1<wbr>", new_txt)

      return new_txt

  # Do not mangle HTML when doing the replace by using BeatifulSoup
  # - Use the 'html.parser' to avoid inserting <html><body> when decoding
  soup = bs4.BeautifulSoup(text, features='html.parser')
  wbr_tag = lambda: soup.new_tag('wbr') # must generate new tag every time

  for navigable_string in soup.findAll(text=True):
      parent = navigable_string.parent

      # Insert each '$text<wbr>$foo' before the old '$text$foo'
      split_by_wbr_list = wbr_filter(navigable_string).split("<wbr>")
      for (split_string, last) in enumerate_with_last(split_by_wbr_list):
          navigable_string.insert_before(split_string)

          if not last:
            # Note that 'insert' will move existing tags to this spot
            # so make a new tag instead
            navigable_string.insert_before(wbr_tag())

      # Remove the old unmodified text
      navigable_string.extract()

  return soup.decode()