toc.py revision 6516b99bb74dfb7187a08f7090bf7ca22a006f15
1""" 2Table of Contents Extension for Python-Markdown 3* * * 4 5(c) 2008 [Jack Miller](http://codezen.org) 6 7Dependencies: 8* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) 9 10""" 11import markdown 12from markdown import etree 13import re 14 15class TocTreeprocessor(markdown.treeprocessors.Treeprocessor): 16 # Iterator wrapper to get parent and child all at once 17 def iterparent(self, root): 18 for parent in root.getiterator(): 19 for child in parent: 20 yield parent, child 21 22 def run(self, doc): 23 div = etree.Element("div") 24 div.attrib["class"] = "toc" 25 last_li = None 26 27 # Add title to the div 28 if self.config["title"][0]: 29 header = etree.SubElement(div, "span") 30 header.attrib["class"] = "toctitle" 31 header.text = self.config["title"][0] 32 33 level = 0 34 list_stack=[div] 35 header_rgx = re.compile("[Hh][123456]") 36 37 # Get a list of id attributes 38 used_ids = [] 39 for c in doc.getiterator(): 40 if "id" in c.attrib: 41 used_ids.append(c.attrib["id"]) 42 43 for (p, c) in self.iterparent(doc): 44 if not c.text: 45 continue 46 47 # To keep the output from screwing up the 48 # validation by putting a <div> inside of a <p> 49 # we actually replace the <p> in its entirety. 50 # We do not allow the marker inside a header as that 51 # would causes an enless loop of placing a new TOC 52 # inside previously generated TOC. 53 54 if c.text.find(self.config["marker"][0]) > -1 and not header_rgx.match(c.tag): 55 for i in range(len(p)): 56 if p[i] == c: 57 p[i] = div 58 break 59 60 if header_rgx.match(c.tag): 61 tag_level = int(c.tag[-1]) 62 63 while tag_level < level: 64 list_stack.pop() 65 level -= 1 66 67 if tag_level > level: 68 newlist = etree.Element("ul") 69 if last_li: 70 last_li.append(newlist) 71 else: 72 list_stack[-1].append(newlist) 73 list_stack.append(newlist) 74 level += 1 75 76 # Do not override pre-existing ids 77 if not "id" in c.attrib: 78 id = self.config["slugify"][0](c.text) 79 if id in used_ids: 80 ctr = 1 81 while "%s_%d" % (id, ctr) in used_ids: 82 ctr += 1 83 id = "%s_%d" % (id, ctr) 84 used_ids.append(id) 85 c.attrib["id"] = id 86 else: 87 id = c.attrib["id"] 88 89 # List item link, to be inserted into the toc div 90 last_li = etree.Element("li") 91 link = etree.SubElement(last_li, "a") 92 link.text = c.text 93 link.attrib["href"] = '#' + id 94 95 if int(self.config["anchorlink"][0]): 96 anchor = etree.SubElement(c, "a") 97 anchor.text = c.text 98 anchor.attrib["href"] = "#" + id 99 anchor.attrib["class"] = "toclink" 100 c.text = "" 101 102 list_stack[-1].append(last_li) 103 104class TocExtension(markdown.Extension): 105 def __init__(self, configs): 106 self.config = { "marker" : ["[TOC]", 107 "Text to find and replace with Table of Contents -" 108 "Defaults to \"[TOC]\""], 109 "slugify" : [self.slugify, 110 "Function to generate anchors based on header text-" 111 "Defaults to a built in slugify function."], 112 "title" : [None, 113 "Title to insert into TOC <div> - " 114 "Defaults to None"], 115 "anchorlink" : [0, 116 "1 if header should be a self link" 117 "Defaults to 0"]} 118 119 for key, value in configs: 120 self.setConfig(key, value) 121 122 # This is exactly the same as Django's slugify 123 def slugify(self, value): 124 """ Slugify a string, to make it URL friendly. """ 125 import unicodedata 126 value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') 127 value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) 128 return re.sub('[-\s]+','-',value) 129 130 def extendMarkdown(self, md, md_globals): 131 tocext = TocTreeprocessor(md) 132 tocext.config = self.config 133 md.treeprocessors.add("toc", tocext, "_begin") 134 135def makeExtension(configs={}): 136 return TocExtension(configs=configs) 137