1#!/usr/bin/env php
2/*
3 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 *
5 *  Use of this source code is governed by a BSD-style license
6 *  that can be found in the LICENSE file in the root of the source
7 *  tree. An additional intellectual property rights grant can be found
8 *  in the file PATENTS.  All contributing project authors may
9 *  be found in the AUTHORS file in the root of the source tree.
10 */
11
12
13<?php
14
15/* This script converts markdown to doxygen htmlonly syntax, nesting the
16 * content inside a \page. It expects input on stdin and outputs on stdout.
17 *
18 * Usage: gen_example_doxy.php <page_identifier> "<page description>"
19 */
20
21
22$geshi_path = dirname($argv[0])."/includes/geshi/geshi/"; // Language files
23$tmp_token  = '<!-- I wanna rock you, Chaka Khan -->';
24
25// Include prerequisites or exit
26if(!include_once('includes/PHP-Markdown-Extra-1.2.3/markdown.php'))
27  die("Cannot load Markdown transformer.\n");
28if(!include_once('includes/PHP-SmartyPants-1.5.1e/smartypants.php'))
29  die("Cannot load SmartyPants transformer.\n");
30if(!include_once('includes/geshi/geshi.php'))
31  die("Cannot load GeSHi transformer.\n");
32// ASCIIMathPHP?
33// HTML::Toc?
34// Tidy?
35// Prince?
36
37/**
38 *  Generate XHTML body
39 *
40 */
41
42$page_body = file_get_contents('php://stdin');
43
44// Transform any MathML expressions in the body text
45$regexp = '/\[\[(.*?)\]\]/'; // Double square bracket delimiters
46$page_body = preg_replace_callback($regexp, 'ASCIIMathPHPCallback', $page_body);
47
48// Fix ASCIIMathPHP's output
49$page_body = fix_asciiMath($page_body);
50
51// Wrap block-style <math> elements in <p>, since Markdown doesn't.
52$page_body = preg_replace('/\n(<math.*<\/math>)\n/', '<p class="eq_para">$1</p>', $page_body);
53
54// Transform the body text to HTML
55$page_body = Markdown($page_body);
56
57// Preprocess code blocks
58// Decode XML entities. GeSHi doesn't anticipate that
59// Markdown has already done this.
60$regexp = '|<pre><code>(.*?)<\/code><\/pre>|si';
61while (preg_match($regexp, $page_body, $matches) > 0)
62{
63  // Replace 1st match with token
64  $page_body = preg_replace($regexp, $tmp_token, $page_body, 1);
65  $block_new = $matches[1];
66  // Un-encode ampersand entities
67  $block_new = decode_markdown($block_new);
68  // Replace token with revised string
69  $page_body = preg_replace("|$tmp_token|", '<div class="codeblock">'.$block_new.'</div>', $page_body);
70}
71
72// Run GeSHi over code blocks
73$regexp   = '|<div class="codeblock">(.*?)<\/div>|si';
74$language = 'c';
75
76while (preg_match($regexp, $page_body, $matches))
77{
78  $geshi = new GeSHi($matches[1], $language);
79  $geshi->set_language_path($geshi_path);
80  $block_new = $geshi->parse_code();
81  // Strip annoying final newline
82  $block_new = preg_replace('|\n&nbsp;<\/pre>|', '</pre>' , $block_new);
83  // Remove style attribute (TODO: Research this in GeSHi)
84  $block_new = preg_replace('| style="font-family:monospace;"|', '' , $block_new);
85  $page_body = preg_replace($regexp, $block_new, $page_body, 1);
86  unset($geshi);    // Clean up
87}
88unset($block_new);  // Clean up
89
90// Apply typographic flourishes
91$page_body = SmartyPants($page_body);
92
93
94/**
95 *  Generate Doxygen Body
96 *
97 */
98$page_id=(isset($argv[1]))?$argv[1]:"";
99$page_desc=(isset($argv[2]))?$argv[2]:"";
100print "/*!\\page ".$page_id." ".$page_desc."\n\\htmlonly\n";
101print $page_body;
102print "\\endhtmlonly\n*/\n";
103
104// ---------------------------------------------------------
105
106/**
107 * decode_markdown()
108 *
109 * Markdown encodes '&', '<' and '>' in detected code
110 * blocks, as a convenience. This will restore the
111 * encoded entities to ordinary characters, since a
112 * downstream transformer (like GeSHi) may not
113 * anticipate this.
114 *
115 **********************************************************/
116
117function decode_markdown($input)
118{
119  $out = FALSE;
120
121  $entities   = array ('|&amp;|'
122                      ,'|&lt;|'
123                      ,'|&gt;|'
124                      );
125  $characters = array ('&'
126                      ,'<'
127                      ,'>'
128                      );
129  $input = preg_replace($entities, $characters, $input);
130  $out = $input;
131
132  return $out;
133}
134
135
136/**
137 * ASCIIMathML parser
138 * http://tinyurl.com/ASCIIMathPHP
139 *
140 * @PARAM mtch_arr array - Array of ASCIIMath expressions
141 *   as returned by preg_replace_callback([pattern]). First
142 *   dimension is the full matched string (with delimiter);
143 *   2nd dimension is the undelimited contents (typically
144 *   a capture group).
145 *
146 **********************************************************/
147
148function ASCIIMathPHPCallback($mtch_arr)
149{
150  $txt = trim($mtch_arr[1]);
151
152  include('includes/ASCIIMathPHP-2.0/ASCIIMathPHP-2.0.cfg.php');
153  require_once('includes/ASCIIMathPHP-2.0/ASCIIMathPHP-2.0.class.php');
154
155  static $asciimath;
156
157  if (!isset($asciimath)) $asciimath = new ASCIIMathPHP($symbol_arr);
158
159  $math_attr_arr = array('displaystyle' => 'true');
160
161  $asciimath->setExpr($txt);
162  $asciimath->genMathML($math_attr_arr);
163
164  return($asciimath->getMathML());
165}
166
167/**
168 * fix_asciiMath()
169 *
170 * ASCIIMath pretty-prints its output, with linefeeds
171 * and tabs. Causes unexpected behavior in some renderers.
172 * This flattens <math> blocks.
173 *
174 * @PARAM page_body str - The <body> element of an
175 * XHTML page to transform.
176 *
177 **********************************************************/
178
179function fix_asciiMath($page_body)
180{
181  $out = FALSE;
182
183  // Remove linefeeds and whitespace in <math> elements
184  $tags_bad  = array('/(<math.*?>)\n*\s*/'
185                    , '/(<mstyle.*?>)\n*\s*/'
186                    , '/(<\/mstyle>)\n*\s*/'
187                    , '/(<mrow.*?>)\n*\s*/'
188                    , '/(<\/mrow>)\n*\s*/'
189                    , '/(<mo.*?>)\n*\s*/'
190                    , '/(<\/mo>)\n*\s*/'
191                    , '/(<mi.*?>)\n*\s*/'
192                    , '/(<\/mi>)\n*\s*/'
193                    , '/(<mn.*?>)\n*\s*/'
194                    , '/(<\/mn>)\n*\s*/'
195                    , '/(<mtext.*?>)\n*\s*/'
196                    , '/(<\/mtext>)\n*\s*/'
197                    , '/(<msqrt.*?>)\n*\s*/'
198                    , '/(<\/msqrt>)\n*\s*/'
199                    , '/(<mfrac.*?>)\n*\s*/'
200                    , '/(<\/mfrac>)\n*\s*/'
201                    );
202  $tags_good = array( '$1'
203                    , '$1'
204                    , '$1'
205                    , '$1'
206                    , '$1'
207                    , '$1'
208                    , '$1'
209                    , '$1'
210                    , '$1'
211                    , '$1'
212                    , '$1'
213                    , '$1'
214                    , '$1'
215                    , '$1'
216                    , '$1'
217                    , '$1'
218                    , '$1'
219                    );
220  $out = preg_replace($tags_bad, $tags_good, $page_body);
221
222  return $out;
223
224}
225