1: <?php
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18: class Markdown_Parser {
19:
20:
21:
22: public $nested_brackets_depth = 6;
23: public $nested_brackets_re;
24:
25: public $nested_url_parenthesis_depth = 4;
26: public $nested_url_parenthesis_re;
27:
28:
29: public $escape_chars = '\`*_{}[]()>#+-.!';
30: public $escape_chars_re;
31:
32:
33: public $empty_element_suffix = ' />';
34: public $tab_width = 4;
35:
36:
37: public $no_markup = false;
38: public $no_entities = false;
39:
40:
41: public $predef_urls = array();
42: public $predef_titles = array();
43:
44: public function __construct() {
45:
46:
47:
48: $this->_initDetab();
49: $this->prepareItalicsAndBold();
50:
51: $this->nested_brackets_re =
52: str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
53: str_repeat('\])*', $this->nested_brackets_depth);
54:
55: $this->nested_url_parenthesis_re =
56: str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
57: str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
58:
59: $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
60:
61:
62: asort($this->document_gamut);
63: asort($this->block_gamut);
64: asort($this->span_gamut);
65: }
66:
67:
68:
69: public $urls = array();
70: public $titles = array();
71: public $html_hashes = array();
72:
73:
74: public $in_anchor = false;
75:
76:
77: public function setup() {
78:
79:
80:
81:
82:
83: $this->urls = $this->predef_urls;
84: $this->titles = $this->predef_titles;
85: $this->html_hashes = array();
86:
87: $in_anchor = false;
88: }
89:
90: public function teardown() {
91:
92:
93:
94:
95: $this->urls = array();
96: $this->titles = array();
97: $this->html_hashes = array();
98: }
99:
100:
101: public function transform($text) {
102:
103:
104:
105:
106: $this->setup();
107:
108:
109: $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
110:
111:
112:
113: $text = preg_replace('{\r\n?}', "\n", $text);
114:
115:
116: $text .= "\n\n";
117:
118:
119: $text = $this->detab($text);
120:
121:
122: $text = $this->hashHTMLBlocks($text);
123:
124:
125:
126:
127:
128: $text = preg_replace('/^[ ]+$/m', '', $text);
129:
130:
131: foreach ($this->document_gamut as $method => $priority) {
132: $text = $this->$method($text);
133: }
134:
135: $this->teardown();
136:
137: return $text . "\n";
138: }
139:
140: public $document_gamut = array(
141:
142: "stripLinkDefinitions" => 20,
143:
144: "runBasicBlockGamut" => 30,
145: );
146:
147:
148: public function stripLinkDefinitions($text) {
149:
150:
151:
152:
153: $less_than_tab = $this->tab_width - 1;
154:
155:
156: $text = preg_replace_callback('{
157: ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
158: [ ]*
159: \n? # maybe *one* newline
160: [ ]*
161: (?:
162: <(.+?)> # url = $2
163: |
164: (\S+?) # url = $3
165: )
166: [ ]*
167: \n? # maybe one newline
168: [ ]*
169: (?:
170: (?<=\s) # lookbehind for whitespace
171: ["(]
172: (.*?) # title = $4
173: [")]
174: [ ]*
175: )? # title is optional
176: (?:\n+|\Z)
177: }xm',
178: array(&$this, '_stripLinkDefinitions_callback'),
179: $text);
180: return $text;
181: }
182: public function _stripLinkDefinitions_callback($matches) {
183: $link_id = strtolower($matches[1]);
184: $url = $matches[2] == '' ? $matches[3] : $matches[2];
185: $this->urls[$link_id] = $url;
186: $this->titles[$link_id] =& $matches[4];
187: return '';
188: }
189:
190:
191: public function hashHTMLBlocks($text) {
192: if ($this->no_markup) return $text;
193:
194: $less_than_tab = $this->tab_width - 1;
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209: $block_tags_a_re = 'ins|del';
210: $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
211: 'script|noscript|form|fieldset|iframe|math';
212:
213:
214: $nested_tags_level = 4;
215: $attr = '
216: (?> # optional tag attributes
217: \s # starts with whitespace
218: (?>
219: [^>"/]+ # text outside quotes
220: |
221: /+(?!>) # slash not followed by ">"
222: |
223: "[^"]*" # text inside double quotes (tolerate ">")
224: |
225: \'[^\']*\' # text inside single quotes (tolerate ">")
226: )*
227: )?
228: ';
229: $content =
230: str_repeat('
231: (?>
232: [^<]+ # content without tag
233: |
234: <\2 # nested opening tag
235: '.$attr.' # attributes
236: (?>
237: />
238: |
239: >', $nested_tags_level).
240: '.*?'.
241: str_repeat('
242: </\2\s*> # closing nested tag
243: )
244: |
245: <(?!/\2\s*> # other tags with a different name
246: )
247: )*',
248: $nested_tags_level);
249: $content2 = str_replace('\2', '\3', $content);
250:
251:
252:
253:
254:
255:
256:
257:
258:
259:
260:
261:
262: $text = preg_replace_callback('{(?>
263: (?>
264: (?<=\n\n) # Starting after a blank line
265: | # or
266: \A\n? # the beginning of the doc
267: )
268: ( # save in $1
269:
270: # Match from `\n<tag>` to `</tag>\n`, handling nested tags
271: # in between.
272:
273: [ ]{0,'.$less_than_tab.'}
274: <('.$block_tags_b_re.')# start tag = $2
275: '.$attr.'> # attributes followed by > and \n
276: '.$content.' # content, support nesting
277: </\2> # the matching end tag
278: [ ]* # trailing spaces/tabs
279: (?=\n+|\Z) # followed by a newline or end of document
280:
281: | # Special version for tags of group a.
282:
283: [ ]{0,'.$less_than_tab.'}
284: <('.$block_tags_a_re.')# start tag = $3
285: '.$attr.'>[ ]*\n # attributes followed by >
286: '.$content2.' # content, support nesting
287: </\3> # the matching end tag
288: [ ]* # trailing spaces/tabs
289: (?=\n+|\Z) # followed by a newline or end of document
290:
291: | # Special case just for <hr />. It was easier to make a special
292: # case than to make the other regex more complicated.
293:
294: [ ]{0,'.$less_than_tab.'}
295: <(hr) # start tag = $2
296: '.$attr.' # attributes
297: /?> # the matching end tag
298: [ ]*
299: (?=\n{2,}|\Z) # followed by a blank line or end of document
300:
301: | # Special case for standalone HTML comments:
302:
303: [ ]{0,'.$less_than_tab.'}
304: (?s:
305: <!-- .*? -->
306: )
307: [ ]*
308: (?=\n{2,}|\Z) # followed by a blank line or end of document
309:
310: | # PHP and ASP-style processor instructions (<? and <%)
311:
312: [ ]{0,'.$less_than_tab.'}
313: (?s:
314: <([?%]) # $2
315: .*?
316: \2>
317: )
318: [ ]*
319: (?=\n{2,}|\Z) # followed by a blank line or end of document
320:
321: )
322: )}Sxmi',
323: array(&$this, '_hashHTMLBlocks_callback'),
324: $text);
325:
326: return $text;
327: }
328: public function _hashHTMLBlocks_callback($matches) {
329: $text = $matches[1];
330: $key = $this->hashBlock($text);
331: return "\n\n$key\n\n";
332: }
333:
334:
335: public function hashPart($text, $boundary = 'X') {
336:
337:
338:
339:
340:
341:
342:
343:
344:
345:
346:
347:
348: $text = $this->unhash($text);
349:
350:
351: static $i = 0;
352: $key = "$boundary\x1A" . ++$i . $boundary;
353: $this->html_hashes[$key] = $text;
354: return $key;
355: }
356:
357:
358: public function hashBlock($text) {
359:
360:
361:
362: return $this->hashPart($text, 'B');
363: }
364:
365:
366: public $block_gamut = array(
367:
368:
369:
370:
371: "doHeaders" => 10,
372: "doHorizontalRules" => 20,
373:
374: "doLists" => 40,
375: "doCodeBlocks" => 50,
376: "doBlockQuotes" => 60,
377: );
378:
379: public function runBlockGamut($text) {
380:
381:
382:
383:
384:
385:
386:
387:
388: $text = $this->hashHTMLBlocks($text);
389:
390: return $this->runBasicBlockGamut($text);
391: }
392:
393: public function runBasicBlockGamut($text) {
394:
395:
396:
397:
398:
399: foreach ($this->block_gamut as $method => $priority) {
400: $text = $this->$method($text);
401: }
402:
403:
404: $text = $this->formParagraphs($text);
405:
406: return $text;
407: }
408:
409:
410: public function doHorizontalRules($text) {
411:
412: return preg_replace(
413: '{
414: ^[ ]{0,3} # Leading space
415: ([-*_]) # $1: First marker
416: (?> # Repeated marker group
417: [ ]{0,2} # Zero, one, or two spaces.
418: \1 # Marker character
419: ){2,} # Group repeated at least twice
420: [ ]* # Tailing spaces
421: $ # End of line.
422: }mx',
423: "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
424: $text);
425: }
426:
427:
428: public $span_gamut = array(
429:
430:
431:
432:
433:
434:
435: "parseSpan" => -30,
436:
437:
438:
439: "doImages" => 10,
440: "doAnchors" => 20,
441:
442:
443:
444:
445: "doAutoLinks" => 30,
446: "encodeAmpsAndAngles" => 40,
447:
448: "doItalicsAndBold" => 50,
449: "doHardBreaks" => 60,
450: );
451:
452: public function runSpanGamut($text) {
453:
454:
455:
456: foreach ($this->span_gamut as $method => $priority) {
457: $text = $this->$method($text);
458: }
459:
460: return $text;
461: }
462:
463:
464: public function doHardBreaks($text) {
465:
466: return preg_replace_callback('/ {2,}\n/',
467: array(&$this, '_doHardBreaks_callback'), $text);
468: }
469: public function _doHardBreaks_callback($matches) {
470: return $this->hashPart("<br$this->empty_element_suffix\n");
471: }
472:
473:
474: public function doAnchors($text) {
475:
476:
477:
478: if ($this->in_anchor) return $text;
479: $this->in_anchor = true;
480:
481:
482:
483:
484: $text = preg_replace_callback('{
485: ( # wrap whole match in $1
486: \[
487: ('.$this->nested_brackets_re.') # link text = $2
488: \]
489:
490: [ ]? # one optional space
491: (?:\n[ ]*)? # one optional newline followed by spaces
492:
493: \[
494: (.*?) # id = $3
495: \]
496: )
497: }xs',
498: array(&$this, '_doAnchors_reference_callback'), $text);
499:
500:
501:
502:
503: $text = preg_replace_callback('{
504: ( # wrap whole match in $1
505: \[
506: ('.$this->nested_brackets_re.') # link text = $2
507: \]
508: \( # literal paren
509: [ \n]*
510: (?:
511: <(.+?)> # href = $3
512: |
513: ('.$this->nested_url_parenthesis_re.') # href = $4
514: )
515: [ \n]*
516: ( # $5
517: ([\'"]) # quote char = $6
518: (.*?) # Title = $7
519: \6 # matching quote
520: [ \n]* # ignore any spaces/tabs between closing quote and )
521: )? # title is optional
522: \)
523: )
524: }xs',
525: array(&$this, '_doAnchors_inline_callback'), $text);
526:
527:
528:
529:
530:
531:
532: $text = preg_replace_callback('{
533: ( # wrap whole match in $1
534: \[
535: ([^\[\]]+) # link text = $2; can\'t contain [ or ]
536: \]
537: )
538: }xs',
539: array(&$this, '_doAnchors_reference_callback'), $text);
540:
541: $this->in_anchor = false;
542: return $text;
543: }
544: public function _doAnchors_reference_callback($matches) {
545: $whole_match = $matches[1];
546: $link_text = $matches[2];
547: $link_id =& $matches[3];
548:
549: if ($link_id == "") {
550:
551: $link_id = $link_text;
552: }
553:
554:
555: $link_id = strtolower($link_id);
556: $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
557:
558: if (isset($this->urls[$link_id])) {
559: $url = $this->urls[$link_id];
560: $url = $this->encodeAttribute($url);
561:
562: $result = "<a href=\"$url\"";
563: if ( isset( $this->titles[$link_id] ) ) {
564: $title = $this->titles[$link_id];
565: $title = $this->encodeAttribute($title);
566: $result .= " title=\"$title\"";
567: }
568:
569: $link_text = $this->runSpanGamut($link_text);
570: $result .= ">$link_text</a>";
571: $result = $this->hashPart($result);
572: }
573: else {
574: $result = $whole_match;
575: }
576: return $result;
577: }
578: public function _doAnchors_inline_callback($matches) {
579: $whole_match = $matches[1];
580: $link_text = $this->runSpanGamut($matches[2]);
581: $url = $matches[3] == '' ? $matches[4] : $matches[3];
582: $title =& $matches[7];
583:
584: $url = $this->encodeAttribute($url);
585:
586: $result = "<a href=\"$url\"";
587: if (isset($title)) {
588: $title = $this->encodeAttribute($title);
589: $result .= " title=\"$title\"";
590: }
591:
592: $link_text = $this->runSpanGamut($link_text);
593: $result .= ">$link_text</a>";
594:
595: return $this->hashPart($result);
596: }
597:
598:
599: public function doImages($text) {
600:
601:
602:
603:
604:
605:
606: $text = preg_replace_callback('{
607: ( # wrap whole match in $1
608: !\[
609: ('.$this->nested_brackets_re.') # alt text = $2
610: \]
611:
612: [ ]? # one optional space
613: (?:\n[ ]*)? # one optional newline followed by spaces
614:
615: \[
616: (.*?) # id = $3
617: \]
618:
619: )
620: }xs',
621: array(&$this, '_doImages_reference_callback'), $text);
622:
623:
624:
625:
626:
627: $text = preg_replace_callback('{
628: ( # wrap whole match in $1
629: !\[
630: ('.$this->nested_brackets_re.') # alt text = $2
631: \]
632: \s? # One optional whitespace character
633: \( # literal paren
634: [ \n]*
635: (?:
636: <(\S*)> # src url = $3
637: |
638: ('.$this->nested_url_parenthesis_re.') # src url = $4
639: )
640: [ \n]*
641: ( # $5
642: ([\'"]) # quote char = $6
643: (.*?) # title = $7
644: \6 # matching quote
645: [ \n]*
646: )? # title is optional
647: \)
648: )
649: }xs',
650: array(&$this, '_doImages_inline_callback'), $text);
651:
652: return $text;
653: }
654: public function _doImages_reference_callback($matches) {
655: $whole_match = $matches[1];
656: $alt_text = $matches[2];
657: $link_id = strtolower($matches[3]);
658:
659: if ($link_id == "") {
660: $link_id = strtolower($alt_text);
661: }
662:
663: $alt_text = $this->encodeAttribute($alt_text);
664: if (isset($this->urls[$link_id])) {
665: $url = $this->encodeAttribute($this->urls[$link_id]);
666: $result = "<img src=\"$url\" alt=\"$alt_text\"";
667: if (isset($this->titles[$link_id])) {
668: $title = $this->titles[$link_id];
669: $title = $this->encodeAttribute($title);
670: $result .= " title=\"$title\"";
671: }
672: $result .= $this->empty_element_suffix;
673: $result = $this->hashPart($result);
674: }
675: else {
676:
677: $result = $whole_match;
678: }
679:
680: return $result;
681: }
682: public function _doImages_inline_callback($matches) {
683: $whole_match = $matches[1];
684: $alt_text = $matches[2];
685: $url = $matches[3] == '' ? $matches[4] : $matches[3];
686: $title =& $matches[7];
687:
688: $alt_text = $this->encodeAttribute($alt_text);
689: $url = $this->encodeAttribute($url);
690: $result = "<img src=\"$url\" alt=\"$alt_text\"";
691: if (isset($title)) {
692: $title = $this->encodeAttribute($title);
693: $result .= " title=\"$title\"";
694: }
695: $result .= $this->empty_element_suffix;
696:
697: return $this->hashPart($result);
698: }
699:
700:
701: public function ($text) {
702:
703:
704:
705:
706:
707:
708:
709: $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
710: array(&$this, '_doHeaders_callback_setext'), $text);
711:
712:
713:
714:
715:
716:
717:
718:
719: $text = preg_replace_callback('{
720: ^(\#{1,6}) # $1 = string of #\'s
721: [ ]*
722: (.+?) # $2 = Header text
723: [ ]*
724: \#* # optional closing #\'s (not counted)
725: \n+
726: }xm',
727: array(&$this, '_doHeaders_callback_atx'), $text);
728:
729: return $text;
730: }
731: public function ($matches) {
732:
733: if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
734: return $matches[0];
735:
736: $level = $matches[2]{0} == '=' ? 1 : 2;
737: $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
738: return "\n" . $this->hashBlock($block) . "\n\n";
739: }
740: public function ($matches) {
741: $level = strlen($matches[1]);
742: $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
743: return "\n" . $this->hashBlock($block) . "\n\n";
744: }
745:
746:
747: public function doLists($text) {
748:
749:
750:
751: $less_than_tab = $this->tab_width - 1;
752:
753:
754: $marker_ul_re = '[*+-]';
755: $marker_ol_re = '\d+[\.]';
756: $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
757:
758: $markers_relist = array(
759: $marker_ul_re => $marker_ol_re,
760: $marker_ol_re => $marker_ul_re,
761: );
762:
763: foreach ($markers_relist as $marker_re => $other_marker_re) {
764:
765: $whole_list_re = '
766: ( # $1 = whole list
767: ( # $2
768: ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
769: ('.$marker_re.') # $4 = first list item marker
770: [ ]+
771: )
772: (?s:.+?)
773: ( # $5
774: \z
775: |
776: \n{2,}
777: (?=\S)
778: (?! # Negative lookahead for another list item marker
779: [ ]*
780: '.$marker_re.'[ ]+
781: )
782: |
783: (?= # Lookahead for another kind of list
784: \n
785: \3 # Must have the same indentation
786: '.$other_marker_re.'[ ]+
787: )
788: )
789: )
790: ';
791:
792:
793:
794:
795: if ($this->list_level) {
796: $text = preg_replace_callback('{
797: ^
798: '.$whole_list_re.'
799: }mx',
800: array(&$this, '_doLists_callback'), $text);
801: }
802: else {
803: $text = preg_replace_callback('{
804: (?:(?<=\n)\n|\A\n?) # Must eat the newline
805: '.$whole_list_re.'
806: }mx',
807: array(&$this, '_doLists_callback'), $text);
808: }
809: }
810:
811: return $text;
812: }
813:
814: public function _doLists_callback($matches) {
815:
816: $marker_ul_re = '[*+-]';
817: $marker_ol_re = '\d+[\.]';
818: $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
819:
820: $list = $matches[1];
821: $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
822:
823: $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
824:
825: $list .= "\n";
826: $result = $this->processListItems($list, $marker_any_re);
827:
828: $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
829: return "\n". $result ."\n\n";
830: }
831:
832: var $list_level = 0;
833:
834: public function processListItems($list_str, $marker_any_re) {
835:
836:
837:
838:
839:
840:
841:
842:
843:
844:
845:
846:
847:
848:
849:
850:
851:
852:
853:
854:
855:
856:
857:
858:
859:
860: $this->list_level++;
861:
862:
863: $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
864:
865: $list_str = preg_replace_callback('{
866: (\n)? # leading line = $1
867: (^[ ]*) # leading whitespace = $2
868: ('.$marker_any_re.' # list marker and space = $3
869: (?:[ ]+|(?=\n)) # space only required if item is not empty
870: )
871: ((?s:.*?)) # list item text = $4
872: (?:(\n+(?=\n))|\n) # tailing blank line = $5
873: (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
874: }xm',
875: array(&$this, '_processListItems_callback'), $list_str);
876:
877: $this->list_level--;
878: return $list_str;
879: }
880: public function _processListItems_callback($matches) {
881: $item = $matches[4];
882: $leading_line =& $matches[1];
883: $leading_space =& $matches[2];
884: $marker_space = $matches[3];
885: $tailing_blank_line =& $matches[5];
886:
887: if ($leading_line || $tailing_blank_line ||
888: preg_match('/\n{2,}/', $item))
889: {
890:
891: $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
892: $item = $this->runBlockGamut($this->outdent($item)."\n");
893: }
894: else {
895:
896: $item = $this->doLists($this->outdent($item));
897: $item = preg_replace('/\n+$/', '', $item);
898: $item = $this->runSpanGamut($item);
899: }
900:
901: return "<li>" . $item . "</li>\n";
902: }
903:
904:
905: public function doCodeBlocks($text) {
906:
907:
908:
909: $text = preg_replace_callback('{
910: (?:\n\n|\A\n?)
911: ( # $1 = the code block -- one or more lines, starting with a space/tab
912: (?>
913: [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
914: .*\n+
915: )+
916: )
917: ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
918: }xm',
919: array(&$this, '_doCodeBlocks_callback'), $text);
920:
921: return $text;
922: }
923: public function _doCodeBlocks_callback($matches) {
924: $codeblock = $matches[1];
925:
926: $codeblock = $this->outdent($codeblock);
927: $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
928:
929:
930: $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
931:
932: $codeblock = "<pre><code>$codeblock\n</code></pre>";
933: return "\n\n".$this->hashBlock($codeblock)."\n\n";
934: }
935:
936:
937: public function makeCodeSpan($code) {
938:
939:
940:
941: $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
942: return $this->hashPart("<code>$code</code>");
943: }
944:
945:
946: public $em_relist = array(
947: '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
948: '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
949: '_' => '(?<=\S|^)(?<!_)_(?!_)',
950: );
951: public $strong_relist = array(
952: '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
953: '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
954: '__' => '(?<=\S|^)(?<!_)__(?!_)',
955: );
956: public $em_strong_relist = array(
957: '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
958: '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
959: '___' => '(?<=\S|^)(?<!_)___(?!_)',
960: );
961: public $em_strong_prepared_relist;
962:
963: public function prepareItalicsAndBold() {
964:
965:
966:
967:
968: foreach ($this->em_relist as $em => $em_re) {
969: foreach ($this->strong_relist as $strong => $strong_re) {
970:
971: $token_relist = array();
972: if (isset($this->em_strong_relist["$em$strong"])) {
973: $token_relist[] = $this->em_strong_relist["$em$strong"];
974: }
975: $token_relist[] = $em_re;
976: $token_relist[] = $strong_re;
977:
978:
979: $token_re = '{('. implode('|', $token_relist) .')}';
980: $this->em_strong_prepared_relist["$em$strong"] = $token_re;
981: }
982: }
983: }
984:
985: public function doItalicsAndBold($text) {
986: $token_stack = array('');
987: $text_stack = array('');
988: $em = '';
989: $strong = '';
990: $tree_char_em = false;
991:
992: while (1) {
993:
994:
995:
996:
997: $token_re = $this->em_strong_prepared_relist["$em$strong"];
998:
999:
1000:
1001:
1002:
1003: $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1004: $text_stack[0] .= $parts[0];
1005: $token =& $parts[1];
1006: $text =& $parts[2];
1007:
1008: if (empty($token)) {
1009:
1010:
1011: while ($token_stack[0]) {
1012: $text_stack[1] .= array_shift($token_stack);
1013: $text_stack[0] .= array_shift($text_stack);
1014: }
1015: break;
1016: }
1017:
1018: $token_len = strlen($token);
1019: if ($tree_char_em) {
1020:
1021: if ($token_len == 3) {
1022:
1023: array_shift($token_stack);
1024: $span = array_shift($text_stack);
1025: $span = $this->runSpanGamut($span);
1026: $span = "<strong><em>$span</em></strong>";
1027: $text_stack[0] .= $this->hashPart($span);
1028: $em = '';
1029: $strong = '';
1030: } else {
1031:
1032:
1033: $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1034: $tag = $token_len == 2 ? "strong" : "em";
1035: $span = $text_stack[0];
1036: $span = $this->runSpanGamut($span);
1037: $span = "<$tag>$span</$tag>";
1038: $text_stack[0] = $this->hashPart($span);
1039: $$tag = '';
1040: }
1041: $tree_char_em = false;
1042: } else if ($token_len == 3) {
1043: if ($em) {
1044:
1045:
1046: for ($i = 0; $i < 2; ++$i) {
1047: $shifted_token = array_shift($token_stack);
1048: $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1049: $span = array_shift($text_stack);
1050: $span = $this->runSpanGamut($span);
1051: $span = "<$tag>$span</$tag>";
1052: $text_stack[0] .= $this->hashPart($span);
1053: $$tag = '';
1054: }
1055: } else {
1056:
1057:
1058: $em = $token{0};
1059: $strong = "$em$em";
1060: array_unshift($token_stack, $token);
1061: array_unshift($text_stack, '');
1062: $tree_char_em = true;
1063: }
1064: } else if ($token_len == 2) {
1065: if ($strong) {
1066:
1067: if (strlen($token_stack[0]) == 1) {
1068: $text_stack[1] .= array_shift($token_stack);
1069: $text_stack[0] .= array_shift($text_stack);
1070: }
1071:
1072: array_shift($token_stack);
1073: $span = array_shift($text_stack);
1074: $span = $this->runSpanGamut($span);
1075: $span = "<strong>$span</strong>";
1076: $text_stack[0] .= $this->hashPart($span);
1077: $strong = '';
1078: } else {
1079: array_unshift($token_stack, $token);
1080: array_unshift($text_stack, '');
1081: $strong = $token;
1082: }
1083: } else {
1084:
1085: if ($em) {
1086: if (strlen($token_stack[0]) == 1) {
1087:
1088: array_shift($token_stack);
1089: $span = array_shift($text_stack);
1090: $span = $this->runSpanGamut($span);
1091: $span = "<em>$span</em>";
1092: $text_stack[0] .= $this->hashPart($span);
1093: $em = '';
1094: } else {
1095: $text_stack[0] .= $token;
1096: }
1097: } else {
1098: array_unshift($token_stack, $token);
1099: array_unshift($text_stack, '');
1100: $em = $token;
1101: }
1102: }
1103: }
1104: return $text_stack[0];
1105: }
1106:
1107:
1108: public function doBlockQuotes($text) {
1109: $text = preg_replace_callback('/
1110: ( # Wrap whole match in $1
1111: (?>
1112: ^[ ]*>[ ]? # ">" at the start of a line
1113: .+\n # rest of the first line
1114: (.+\n)* # subsequent consecutive lines
1115: \n* # blanks
1116: )+
1117: )
1118: /xm',
1119: array(&$this, '_doBlockQuotes_callback'), $text);
1120:
1121: return $text;
1122: }
1123:
1124: public function _doBlockQuotes_callback($matches) {
1125: $bq = $matches[1];
1126:
1127: $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1128: $bq = $this->runBlockGamut($bq);
1129:
1130: $bq = preg_replace('/^/m', " ", $bq);
1131:
1132:
1133: $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1134: array(&$this, '_doBlockQuotes_callback2'), $bq);
1135:
1136: return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1137: }
1138:
1139: public function _doBlockQuotes_callback2($matches) {
1140: $pre = $matches[1];
1141: $pre = preg_replace('/^ /m', '', $pre);
1142: return $pre;
1143: }
1144:
1145:
1146: public function formParagraphs($text) {
1147:
1148:
1149:
1150:
1151:
1152: $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1153:
1154: $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1155:
1156:
1157:
1158:
1159: foreach ($grafs as $key => $value) {
1160: if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1161:
1162: $value = $this->runSpanGamut($value);
1163: $value = preg_replace('/^([ ]*)/', "<p>", $value);
1164: $value .= "</p>";
1165: $grafs[$key] = $this->unhash($value);
1166: }
1167: else {
1168:
1169:
1170: $graf = $value;
1171: $block = $this->html_hashes[$graf];
1172: $graf = $block;
1173:
1174:
1175:
1176:
1177:
1178:
1179:
1180:
1181:
1182:
1183:
1184:
1185:
1186:
1187:
1188:
1189:
1190:
1191:
1192:
1193:
1194:
1195:
1196:
1197:
1198:
1199:
1200:
1201:
1202:
1203:
1204:
1205:
1206:
1207:
1208: $grafs[$key] = $graf;
1209: }
1210: }
1211:
1212: return implode("\n\n", $grafs);
1213: }
1214:
1215:
1216: public function encodeAttribute($text) {
1217:
1218:
1219:
1220:
1221: $text = $this->encodeAmpsAndAngles($text);
1222: $text = str_replace('"', '"', $text);
1223: return $text;
1224: }
1225:
1226:
1227: public function encodeAmpsAndAngles($text) {
1228:
1229:
1230:
1231:
1232:
1233: if ($this->no_entities) {
1234: $text = str_replace('&', '&', $text);
1235: } else {
1236:
1237:
1238: $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1239: '&', $text);;
1240: }
1241:
1242: $text = str_replace('<', '<', $text);
1243:
1244: return $text;
1245: }
1246:
1247:
1248: public function doAutoLinks($text) {
1249: $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1250: array(&$this, '_doAutoLinks_url_callback'), $text);
1251:
1252:
1253: $text = preg_replace_callback('{
1254: <
1255: (?:mailto:)?
1256: (
1257: (?:
1258: [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1259: |
1260: ".*?"
1261: )
1262: \@
1263: (?:
1264: [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1265: |
1266: \[[\d.a-fA-F:]+\] # IPv4 & IPv6
1267: )
1268: )
1269: >
1270: }xi',
1271: array(&$this, '_doAutoLinks_email_callback'), $text);
1272:
1273: return $text;
1274: }
1275: public function _doAutoLinks_url_callback($matches) {
1276: $url = $this->encodeAttribute($matches[1]);
1277: $link = "<a href=\"$url\">$url</a>";
1278: return $this->hashPart($link);
1279: }
1280: public function _doAutoLinks_email_callback($matches) {
1281: $address = $matches[1];
1282: $link = $this->encodeEmailAddress($address);
1283: return $this->hashPart($link);
1284: }
1285:
1286:
1287: public function encodeEmailAddress($addr) {
1288:
1289:
1290:
1291:
1292:
1293:
1294:
1295:
1296:
1297:
1298:
1299:
1300:
1301:
1302:
1303: $addr = "mailto:" . $addr;
1304: $chars = preg_split('/(?<!^)(?!$)/', $addr);
1305: $seed = (int)abs(crc32($addr) / strlen($addr));
1306:
1307: foreach ($chars as $key => $char) {
1308: $ord = ord($char);
1309:
1310: if ($ord < 128) {
1311: $r = ($seed * (1 + $key)) % 100;
1312:
1313:
1314: if ($r > 90 && $char != '@') ;
1315: else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1316: else $chars[$key] = '&#'.$ord.';';
1317: }
1318: }
1319:
1320: $addr = implode('', $chars);
1321: $text = implode('', array_slice($chars, 7));
1322: $addr = "<a href=\"$addr\">$text</a>";
1323:
1324: return $addr;
1325: }
1326:
1327:
1328: public function parseSpan($str) {
1329:
1330:
1331:
1332:
1333: $output = '';
1334:
1335: $span_re = '{
1336: (
1337: \\\\'.$this->escape_chars_re.'
1338: |
1339: (?<![`\\\\])
1340: `+ # code span marker
1341: '.( $this->no_markup ? '' : '
1342: |
1343: <!-- .*? --> # comment
1344: |
1345: <\?.*?\?> | <%.*?%> # processing instruction
1346: |
1347: <[/!$]?[-a-zA-Z0-9:_]+ # regular tags
1348: (?>
1349: \s
1350: (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1351: )?
1352: >
1353: ').'
1354: )
1355: }xs';
1356:
1357: while (1) {
1358:
1359:
1360:
1361:
1362:
1363: $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1364:
1365:
1366: if ($parts[0] != "") {
1367: $output .= $parts[0];
1368: }
1369:
1370:
1371: if (isset($parts[1])) {
1372: $output .= $this->handleSpanToken($parts[1], $parts[2]);
1373: $str = $parts[2];
1374: }
1375: else {
1376: break;
1377: }
1378: }
1379:
1380: return $output;
1381: }
1382:
1383:
1384: public function handleSpanToken($token, &$str) {
1385:
1386:
1387:
1388:
1389: switch ($token{0}) {
1390: case "\\":
1391: return $this->hashPart("&#". ord($token{1}). ";");
1392: case "`":
1393:
1394: if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1395: $str, $matches))
1396: {
1397: $str = $matches[2];
1398: $codespan = $this->makeCodeSpan($matches[1]);
1399: return $this->hashPart($codespan);
1400: }
1401: return $token;
1402: default:
1403: return $this->hashPart($token);
1404: }
1405: }
1406:
1407:
1408: public function outdent($text) {
1409:
1410:
1411:
1412: return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1413: }
1414:
1415:
1416:
1417:
1418: public $utf8_strlen = 'mb_strlen';
1419:
1420: public function detab($text) {
1421:
1422:
1423:
1424:
1425:
1426:
1427:
1428: $text = preg_replace_callback('/^.*\t.*$/m',
1429: array(&$this, '_detab_callback'), $text);
1430:
1431: return $text;
1432: }
1433: public function _detab_callback($matches) {
1434: $line = $matches[0];
1435: $strlen = $this->utf8_strlen;
1436:
1437:
1438: $blocks = explode("\t", $line);
1439:
1440: $line = $blocks[0];
1441: unset($blocks[0]);
1442: foreach ($blocks as $block) {
1443:
1444: $amount = $this->tab_width -
1445: $strlen($line, 'UTF-8') % $this->tab_width;
1446: $line .= str_repeat(" ", $amount) . $block;
1447: }
1448: return $line;
1449: }
1450: public function _initDetab() {
1451:
1452:
1453:
1454:
1455:
1456:
1457: if (function_exists($this->utf8_strlen)) return;
1458: $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1459: "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1460: $text, $m);');
1461: }
1462:
1463:
1464: public function unhash($text) {
1465:
1466:
1467:
1468: return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1469: array(&$this, '_unhash_callback'), $text);
1470: }
1471: public function _unhash_callback($matches) {
1472: return $this->html_hashes[$matches[0]];
1473: }
1474:
1475: }
1476:
1477:
1478:
1479:
1480:
1481:
1482: class extends Markdown_Parser {
1483:
1484:
1485: public $fn_id_prefix = "";
1486:
1487:
1488: public $fn_link_title = '';
1489: public $fn_backlink_title = '';
1490:
1491:
1492: public $fn_link_class = '';
1493: public $fn_backlink_class = '';
1494:
1495:
1496: public $predef_abbr = array();
1497:
1498:
1499: public function __construct() {
1500:
1501:
1502:
1503:
1504:
1505: $this->escape_chars .= ':|';
1506:
1507:
1508:
1509: $this->document_gamut += array(
1510: "doFencedCodeBlocks" => 5,
1511: "stripFootnotes" => 15,
1512: "stripAbbreviations" => 25,
1513: "appendFootnotes" => 50,
1514: );
1515: $this->block_gamut += array(
1516: "doFencedCodeBlocks" => 5,
1517: "doTables" => 15,
1518: "doDefLists" => 45,
1519: );
1520: $this->span_gamut += array(
1521: "doFootnotes" => 5,
1522: "doAbbreviations" => 70,
1523: );
1524:
1525: parent::__construct();
1526: }
1527:
1528:
1529:
1530: public = array();
1531: public = array();
1532: public $abbr_desciptions = array();
1533: public $abbr_word_re = '';
1534:
1535:
1536: public = 1;
1537:
1538:
1539: public function setup() {
1540:
1541:
1542:
1543: parent::setup();
1544:
1545: $this->footnotes = array();
1546: $this->footnotes_ordered = array();
1547: $this->abbr_desciptions = array();
1548: $this->abbr_word_re = '';
1549: $this->footnote_counter = 1;
1550:
1551: foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1552: if ($this->abbr_word_re)
1553: $this->abbr_word_re .= '|';
1554: $this->abbr_word_re .= preg_quote($abbr_word);
1555: $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1556: }
1557: }
1558:
1559: public function teardown() {
1560:
1561:
1562:
1563: $this->footnotes = array();
1564: $this->footnotes_ordered = array();
1565: $this->abbr_desciptions = array();
1566: $this->abbr_word_re = '';
1567:
1568: parent::teardown();
1569: }
1570:
1571:
1572:
1573:
1574:
1575: public $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1576:
1577:
1578: public $context_block_tags_re = 'script|noscript|math|ins|del';
1579:
1580:
1581: public $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1582:
1583:
1584:
1585: public $clean_tags_re = 'script|math';
1586:
1587:
1588: public $auto_close_tags_re = 'hr|img';
1589:
1590:
1591: public function hashHTMLBlocks($text) {
1592:
1593:
1594:
1595:
1596:
1597:
1598:
1599:
1600:
1601:
1602:
1603:
1604:
1605:
1606:
1607:
1608:
1609:
1610: list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1611:
1612: return $text;
1613: }
1614: public function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1615: $enclosing_tag_re = '', $span = false)
1616: {
1617:
1618:
1619:
1620:
1621:
1622:
1623:
1624:
1625:
1626:
1627:
1628:
1629:
1630:
1631:
1632:
1633:
1634:
1635:
1636:
1637:
1638:
1639:
1640:
1641:
1642: if ($text === '') return array('', '');
1643:
1644:
1645: $newline_before_re = '/(?:^\n?|\n\n)*$/';
1646: $newline_after_re =
1647: '{
1648: ^ # Start of text following the tag.
1649: (?>[ ]*<!--.*?-->)? # Optional comment.
1650: [ ]*\n # Must be followed by newline.
1651: }xs';
1652:
1653:
1654: $block_tag_re =
1655: '{
1656: ( # $2: Capture hole tag.
1657: </? # Any opening or closing tag.
1658: (?> # Tag name.
1659: '.$this->block_tags_re.' |
1660: '.$this->context_block_tags_re.' |
1661: '.$this->clean_tags_re.' |
1662: (?!\s)'.$enclosing_tag_re.'
1663: )
1664: (?:
1665: (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
1666: (?>
1667: ".*?" | # Double quotes (can contain `>`)
1668: \'.*?\' | # Single quotes (can contain `>`)
1669: .+? # Anything but quotes and `>`.
1670: )*?
1671: )?
1672: > # End of tag.
1673: |
1674: <!-- .*? --> # HTML Comment
1675: |
1676: <\?.*?\?> | <%.*?%> # Processing instruction
1677: |
1678: <!\[CDATA\[.*?\]\]> # CData Block
1679: |
1680: # Code span marker
1681: `+
1682: '. ( !$span ? ' # If not in span.
1683: |
1684: # Indented code block
1685: (?: ^[ ]*\n | ^ | \n[ ]*\n )
1686: [ ]{'.($indent+4).'}[^\n]* \n
1687: (?>
1688: (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1689: )*
1690: |
1691: # Fenced code block marker
1692: (?> ^ | \n )
1693: [ ]{0,'.($indent).'}~~~+[ ]*\n
1694: ' : '' ). ' # End (if not is span).
1695: )
1696: }xs';
1697:
1698:
1699: $depth = 0;
1700: $parsed = "";
1701:
1702:
1703:
1704:
1705:
1706: do {
1707:
1708:
1709:
1710:
1711:
1712:
1713: $parts = preg_split($block_tag_re, $text, 2,
1714: PREG_SPLIT_DELIM_CAPTURE);
1715:
1716:
1717:
1718: if ($span) {
1719: $void = $this->hashPart("", ':');
1720: $newline = "$void\n";
1721: $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1722: }
1723:
1724: $parsed .= $parts[0];
1725:
1726:
1727: if (count($parts) < 3) {
1728: $text = "";
1729: break;
1730: }
1731:
1732: $tag = $parts[1];
1733: $text = $parts[2];
1734: $tag_re = preg_quote($tag);
1735:
1736:
1737:
1738:
1739: if ($tag{0} == "`") {
1740:
1741: $tag_re = preg_quote($tag);
1742: if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1743: $text, $matches))
1744: {
1745:
1746: $parsed .= $tag . $matches[0];
1747: $text = substr($text, strlen($matches[0]));
1748: }
1749: else {
1750:
1751: $parsed .= $tag;
1752: }
1753: }
1754:
1755:
1756:
1757: else if (preg_match('{^\n?[ ]{0,'.($indent+3).'}~}', $tag)) {
1758:
1759: $tag_re = preg_quote(trim($tag));
1760: if (preg_match('{^(?>.*\n)+?[ ]{0,'.($indent).'}'.$tag_re.'[ ]*\n}', $text,
1761: $matches))
1762: {
1763:
1764: $parsed .= $tag . $matches[0];
1765: $text = substr($text, strlen($matches[0]));
1766: }
1767: else {
1768:
1769: $parsed .= $tag;
1770: }
1771: }
1772:
1773:
1774:
1775: else if ($tag{0} == "\n" || $tag{0} == " ") {
1776:
1777:
1778: $parsed .= $tag;
1779: }
1780:
1781:
1782:
1783:
1784:
1785: else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1786: ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1787: preg_match($newline_before_re, $parsed) &&
1788: preg_match($newline_after_re, $text) )
1789: )
1790: {
1791:
1792: list($block_text, $text) =
1793: $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1794:
1795:
1796: $parsed .= "\n\n$block_text\n\n";
1797: }
1798:
1799:
1800:
1801:
1802: else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1803: $tag{1} == '!' || $tag{1} == '?')
1804: {
1805:
1806:
1807: list($block_text, $text) =
1808: $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1809:
1810: $parsed .= $block_text;
1811: }
1812:
1813:
1814:
1815: else if ($enclosing_tag_re !== '' &&
1816:
1817: preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
1818: {
1819:
1820:
1821:
1822: if ($tag{1} == '/') $depth--;
1823: else if ($tag{strlen($tag)-2} != '/') $depth++;
1824:
1825: if ($depth < 0) {
1826:
1827:
1828:
1829:
1830: $text = $tag . $text;
1831: break;
1832: }
1833:
1834: $parsed .= $tag;
1835: }
1836: else {
1837: $parsed .= $tag;
1838: }
1839: } while ($depth >= 0);
1840:
1841: return array($parsed, $text);
1842: }
1843: public function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
1844:
1845:
1846:
1847:
1848:
1849:
1850:
1851:
1852:
1853:
1854: if ($text === '') return array('', '');
1855:
1856:
1857: $markdown_attr_re = '
1858: {
1859: \s* # Eat whitespace before the `markdown` attribute
1860: markdown
1861: \s*=\s*
1862: (?>
1863: (["\']) # $1: quote delimiter
1864: (.*?) # $2: attribute value
1865: \1 # matching delimiter
1866: |
1867: ([^\s>]*) # $3: unquoted attribute value
1868: )
1869: () # $4: make $3 always defined (avoid warnings)
1870: }xs';
1871:
1872:
1873: $tag_re = '{
1874: ( # $2: Capture hole tag.
1875: </? # Any opening or closing tag.
1876: [\w:$]+ # Tag name.
1877: (?:
1878: (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
1879: (?>
1880: ".*?" | # Double quotes (can contain `>`)
1881: \'.*?\' | # Single quotes (can contain `>`)
1882: .+? # Anything but quotes and `>`.
1883: )*?
1884: )?
1885: > # End of tag.
1886: |
1887: <!-- .*? --> # HTML Comment
1888: |
1889: <\?.*?\?> | <%.*?%> # Processing instruction
1890: |
1891: <!\[CDATA\[.*?\]\]> # CData Block
1892: )
1893: }xs';
1894:
1895: $original_text = $text;
1896:
1897: $depth = 0;
1898: $block_text = "";
1899: $parsed = "";
1900:
1901:
1902:
1903:
1904:
1905: if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
1906: $base_tag_name_re = $matches[1];
1907:
1908:
1909:
1910:
1911: do {
1912:
1913:
1914:
1915:
1916:
1917:
1918: $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1919:
1920: if (count($parts) < 3) {
1921:
1922:
1923:
1924:
1925:
1926:
1927: return array($original_text{0}, substr($original_text, 1));
1928: }
1929:
1930: $block_text .= $parts[0];
1931: $tag = $parts[1];
1932: $text = $parts[2];
1933:
1934:
1935:
1936:
1937:
1938: if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
1939: $tag{1} == '!' || $tag{1} == '?')
1940: {
1941:
1942: $block_text .= $tag;
1943: }
1944: else {
1945:
1946:
1947:
1948:
1949: if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
1950: if ($tag{1} == '/') $depth--;
1951: else if ($tag{strlen($tag)-2} != '/') $depth++;
1952: }
1953:
1954:
1955:
1956:
1957: if ($md_attr &&
1958: preg_match($markdown_attr_re, $tag, $attr_m) &&
1959: preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
1960: {
1961:
1962: $tag = preg_replace($markdown_attr_re, '', $tag);
1963:
1964:
1965: $this->mode = $attr_m[2] . $attr_m[3];
1966: $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
1967: preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
1968:
1969:
1970: if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
1971: $strlen = $this->utf8_strlen;
1972: $indent = $strlen($matches[1], 'UTF-8');
1973: } else {
1974: $indent = 0;
1975: }
1976:
1977:
1978: $block_text .= $tag;
1979: $parsed .= $this->$hash_method($block_text);
1980:
1981:
1982:
1983: preg_match('/^<([\w:$]*)\b/', $tag, $matches);
1984: $tag_name_re = $matches[1];
1985:
1986:
1987: list ($block_text, $text)
1988: = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
1989: $tag_name_re, $span_mode);
1990:
1991:
1992: if ($indent > 0) {
1993: $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
1994: $block_text);
1995: }
1996:
1997:
1998: if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
1999: else $parsed .= "$block_text";
2000:
2001:
2002: $block_text = "";
2003: }
2004: else $block_text .= $tag;
2005: }
2006:
2007: } while ($depth > 0);
2008:
2009:
2010:
2011:
2012: $parsed .= $this->$hash_method($block_text);
2013:
2014: return array($parsed, $text);
2015: }
2016:
2017:
2018: public function hashClean($text) {
2019:
2020:
2021:
2022:
2023:
2024: return $this->hashPart($text, 'C');
2025: }
2026:
2027:
2028: public function ($text) {
2029:
2030:
2031:
2032:
2033:
2034:
2035:
2036:
2037:
2038:
2039: $text = preg_replace_callback(
2040: '{
2041: (^.+?) # $1: Header text
2042: (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute
2043: [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
2044: }mx',
2045: array(&$this, '_doHeaders_callback_setext'), $text);
2046:
2047:
2048:
2049:
2050:
2051:
2052:
2053:
2054: $text = preg_replace_callback('{
2055: ^(\#{1,6}) # $1 = string of #\'s
2056: [ ]*
2057: (.+?) # $2 = Header text
2058: [ ]*
2059: \#* # optional closing #\'s (not counted)
2060: (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2061: [ ]*
2062: \n+
2063: }xm',
2064: array(&$this, '_doHeaders_callback_atx'), $text);
2065:
2066: return $text;
2067: }
2068: public function ($attr) {
2069: if (empty($attr)) return "";
2070: return " id=\"$attr\"";
2071: }
2072: public function ($matches) {
2073: if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2074: return $matches[0];
2075: $level = $matches[3]{0} == '=' ? 1 : 2;
2076: $attr = $this->_doHeaders_attr($id =& $matches[2]);
2077: $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2078: return "\n" . $this->hashBlock($block) . "\n\n";
2079: }
2080: public function ($matches) {
2081: $level = strlen($matches[1]);
2082: $attr = $this->_doHeaders_attr($id =& $matches[3]);
2083: $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2084: return "\n" . $this->hashBlock($block) . "\n\n";
2085: }
2086:
2087:
2088: public function doTables($text) {
2089:
2090:
2091:
2092: $less_than_tab = $this->tab_width - 1;
2093:
2094:
2095:
2096:
2097:
2098:
2099:
2100:
2101: $text = preg_replace_callback('
2102: {
2103: ^ # Start of a line
2104: [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2105: [|] # Optional leading pipe (present)
2106: (.+) \n # $1: Header row (at least one pipe)
2107:
2108: [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2109: [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
2110:
2111: ( # $3: Cells
2112: (?>
2113: [ ]* # Allowed whitespace.
2114: [|] .* \n # Row content.
2115: )*
2116: )
2117: (?=\n|\Z) # Stop at final double newline.
2118: }xm',
2119: array(&$this, '_doTable_leadingPipe_callback'), $text);
2120:
2121:
2122:
2123:
2124:
2125:
2126:
2127:
2128:
2129: $text = preg_replace_callback('
2130: {
2131: ^ # Start of a line
2132: [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2133: (\S.*[|].*) \n # $1: Header row (at least one pipe)
2134:
2135: [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2136: ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
2137:
2138: ( # $3: Cells
2139: (?>
2140: .* [|] .* \n # Row content
2141: )*
2142: )
2143: (?=\n|\Z) # Stop at final double newline.
2144: }xm',
2145: array(&$this, '_DoTable_callback'), $text);
2146:
2147: return $text;
2148: }
2149: public function _doTable_leadingPipe_callback($matches) {
2150: $head = $matches[1];
2151: $underline = $matches[2];
2152: $content = $matches[3];
2153:
2154:
2155: $content = preg_replace('/^ *[|]/m', '', $content);
2156:
2157: return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2158: }
2159: public function _doTable_callback($matches) {
2160: $head = $matches[1];
2161: $underline = $matches[2];
2162: $content = $matches[3];
2163:
2164:
2165: $head = preg_replace('/[|] *$/m', '', $head);
2166: $underline = preg_replace('/[|] *$/m', '', $underline);
2167: $content = preg_replace('/[|] *$/m', '', $content);
2168:
2169:
2170: $separators = preg_split('/ *[|] */', $underline);
2171: foreach ($separators as $n => $s) {
2172: if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
2173: else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2174: else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2175: else $attr[$n] = '';
2176: }
2177:
2178:
2179:
2180: $head = $this->parseSpan($head);
2181: $headers = preg_split('/ *[|] */', $head);
2182: $col_count = count($headers);
2183:
2184:
2185: $text = "<table>\n";
2186: $text .= "<thead>\n";
2187: $text .= "<tr>\n";
2188: foreach ($headers as $n => $header)
2189: $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2190: $text .= "</tr>\n";
2191: $text .= "</thead>\n";
2192:
2193:
2194: $rows = explode("\n", trim($content, "\n"));
2195:
2196: $text .= "<tbody>\n";
2197: foreach ($rows as $row) {
2198:
2199:
2200: $row = $this->parseSpan($row);
2201:
2202:
2203: $row_cells = preg_split('/ *[|] */', $row, $col_count);
2204: $row_cells = array_pad($row_cells, $col_count, '');
2205:
2206: $text .= "<tr>\n";
2207: foreach ($row_cells as $n => $cell)
2208: $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2209: $text .= "</tr>\n";
2210: }
2211: $text .= "</tbody>\n";
2212: $text .= "</table>";
2213:
2214: return $this->hashBlock($text) . "\n";
2215: }
2216:
2217:
2218: public function doDefLists($text) {
2219:
2220:
2221:
2222: $less_than_tab = $this->tab_width - 1;
2223:
2224:
2225: $whole_list_re = '(?>
2226: ( # $1 = whole list
2227: ( # $2
2228: [ ]{0,'.$less_than_tab.'}
2229: ((?>.*\S.*\n)+) # $3 = defined term
2230: \n?
2231: [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2232: )
2233: (?s:.+?)
2234: ( # $4
2235: \z
2236: |
2237: \n{2,}
2238: (?=\S)
2239: (?! # Negative lookahead for another term
2240: [ ]{0,'.$less_than_tab.'}
2241: (?: \S.*\n )+? # defined term
2242: \n?
2243: [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2244: )
2245: (?! # Negative lookahead for another definition
2246: [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2247: )
2248: )
2249: )
2250: )';
2251:
2252: $text = preg_replace_callback('{
2253: (?>\A\n?|(?<=\n\n))
2254: '.$whole_list_re.'
2255: }mx',
2256: array(&$this, '_doDefLists_callback'), $text);
2257:
2258: return $text;
2259: }
2260: public function _doDefLists_callback($matches) {
2261:
2262: $list = $matches[1];
2263:
2264:
2265:
2266: $result = trim($this->processDefListItems($list));
2267: $result = "<dl>\n" . $result . "\n</dl>";
2268: return $this->hashBlock($result) . "\n\n";
2269: }
2270:
2271:
2272: public function processDefListItems($list_str) {
2273:
2274:
2275:
2276:
2277: $less_than_tab = $this->tab_width - 1;
2278:
2279:
2280: $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2281:
2282:
2283: $list_str = preg_replace_callback('{
2284: (?>\A\n?|\n\n+) # leading line
2285: ( # definition terms = $1
2286: [ ]{0,'.$less_than_tab.'} # leading whitespace
2287: (?![:][ ]|[ ]) # negative lookahead for a definition
2288: # mark (colon) or more whitespace.
2289: (?> \S.* \n)+? # actual term (not whitespace).
2290: )
2291: (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
2292: # with a definition mark.
2293: }xm',
2294: array(&$this, '_processDefListItems_callback_dt'), $list_str);
2295:
2296:
2297: $list_str = preg_replace_callback('{
2298: \n(\n+)? # leading line = $1
2299: ( # marker space = $2
2300: [ ]{0,'.$less_than_tab.'} # whitespace before colon
2301: [:][ ]+ # definition mark (colon)
2302: )
2303: ((?s:.+?)) # definition text = $3
2304: (?= \n+ # stop at next definition mark,
2305: (?: # next term or end of text
2306: [ ]{0,'.$less_than_tab.'} [:][ ] |
2307: <dt> | \z
2308: )
2309: )
2310: }xm',
2311: array(&$this, '_processDefListItems_callback_dd'), $list_str);
2312:
2313: return $list_str;
2314: }
2315: public function _processDefListItems_callback_dt($matches) {
2316: $terms = explode("\n", trim($matches[1]));
2317: $text = '';
2318: foreach ($terms as $term) {
2319: $term = $this->runSpanGamut(trim($term));
2320: $text .= "\n<dt>" . $term . "</dt>";
2321: }
2322: return $text . "\n";
2323: }
2324: public function _processDefListItems_callback_dd($matches) {
2325: $leading_line = $matches[1];
2326: $marker_space = $matches[2];
2327: $def = $matches[3];
2328:
2329: if ($leading_line || preg_match('/\n{2,}/', $def)) {
2330:
2331: $def = str_repeat(' ', strlen($marker_space)) . $def;
2332: $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2333: $def = "\n". $def ."\n";
2334: }
2335: else {
2336: $def = rtrim($def);
2337: $def = $this->runSpanGamut($this->outdent($def));
2338: }
2339:
2340: return "\n<dd>" . $def . "</dd>\n";
2341: }
2342:
2343:
2344: public function doFencedCodeBlocks($text) {
2345:
2346:
2347:
2348:
2349:
2350:
2351:
2352: $less_than_tab = $this->tab_width;
2353:
2354: $text = preg_replace_callback('{
2355: (?:\n|\A)
2356: # 1: Opening marker
2357: (
2358: ~{3,} # Marker: three tilde or more.
2359: )
2360: [ ]* \n # Whitespace and newline following marker.
2361:
2362: # 2: Content
2363: (
2364: (?>
2365: (?!\1 [ ]* \n) # Not a closing marker.
2366: .*\n+
2367: )+
2368: )
2369:
2370: # Closing marker.
2371: \1 [ ]* \n
2372: }xm',
2373: array(&$this, '_doFencedCodeBlocks_callback'), $text);
2374:
2375: return $text;
2376: }
2377: public function _doFencedCodeBlocks_callback($matches) {
2378: $codeblock = $matches[2];
2379: $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2380: $codeblock = preg_replace_callback('/^\n+/',
2381: array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2382: $codeblock = "<pre><code>$codeblock</code></pre>";
2383: return "\n\n".$this->hashBlock($codeblock)."\n\n";
2384: }
2385: public function _doFencedCodeBlocks_newlines($matches) {
2386: return str_repeat("<br$this->empty_element_suffix",
2387: strlen($matches[0]));
2388: }
2389:
2390:
2391:
2392:
2393:
2394:
2395: public $em_relist = array(
2396: '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)',
2397: '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
2398: '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
2399: );
2400: public $strong_relist = array(
2401: '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)',
2402: '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
2403: '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
2404: );
2405: public $em_strong_relist = array(
2406: '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)',
2407: '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
2408: '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
2409: );
2410:
2411:
2412: public function formParagraphs($text) {
2413:
2414:
2415:
2416:
2417:
2418: $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2419:
2420: $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2421:
2422:
2423:
2424:
2425: foreach ($grafs as $key => $value) {
2426: $value = trim($this->runSpanGamut($value));
2427:
2428:
2429:
2430: $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2431:
2432: if ($is_p) {
2433: $value = "<p>$value</p>";
2434: }
2435: $grafs[$key] = $value;
2436: }
2437:
2438:
2439: $text = implode("\n\n", $grafs);
2440:
2441:
2442: $text = $this->unhash($text);
2443:
2444: return $text;
2445: }
2446:
2447:
2448:
2449:
2450: public function ($text) {
2451:
2452:
2453:
2454:
2455: $less_than_tab = $this->tab_width - 1;
2456:
2457:
2458: $text = preg_replace_callback('{
2459: ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
2460: [ ]*
2461: \n? # maybe *one* newline
2462: ( # text = $2 (no blank lines allowed)
2463: (?:
2464: .+ # actual text
2465: |
2466: \n # newlines but
2467: (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2468: (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2469: # by non-indented content
2470: )*
2471: )
2472: }xm',
2473: array(&$this, '_stripFootnotes_callback'),
2474: $text);
2475: return $text;
2476: }
2477: public function ($matches) {
2478: $note_id = $this->fn_id_prefix . $matches[1];
2479: $this->footnotes[$note_id] = $this->outdent($matches[2]);
2480: return '';
2481: }
2482:
2483:
2484: public function ($text) {
2485:
2486:
2487:
2488:
2489: if (!$this->in_anchor) {
2490: $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2491: }
2492: return $text;
2493: }
2494:
2495:
2496: public function ($text) {
2497:
2498:
2499:
2500: $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2501: array(&$this, '_appendFootnotes_callback'), $text);
2502:
2503: if (!empty($this->footnotes_ordered)) {
2504: $text .= "\n\n";
2505: $text .= "<div class=\"footnotes\">\n";
2506: $text .= "<hr". $this->empty_element_suffix ."\n";
2507: $text .= "<ol>\n\n";
2508:
2509: $attr = " rev=\"footnote\"";
2510: if ($this->fn_backlink_class != "") {
2511: $class = $this->fn_backlink_class;
2512: $class = $this->encodeAttribute($class);
2513: $attr .= " class=\"$class\"";
2514: }
2515: if ($this->fn_backlink_title != "") {
2516: $title = $this->fn_backlink_title;
2517: $title = $this->encodeAttribute($title);
2518: $attr .= " title=\"$title\"";
2519: }
2520: $num = 0;
2521:
2522: while (!empty($this->footnotes_ordered)) {
2523: $footnote = reset($this->footnotes_ordered);
2524: $note_id = key($this->footnotes_ordered);
2525: unset($this->footnotes_ordered[$note_id]);
2526:
2527: $footnote .= "\n";
2528: $footnote = $this->runBlockGamut("$footnote\n");
2529: $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2530: array(&$this, '_appendFootnotes_callback'), $footnote);
2531:
2532: $attr = str_replace("%%", ++$num, $attr);
2533: $note_id = $this->encodeAttribute($note_id);
2534:
2535:
2536: $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>";
2537: if (preg_match('{</p>$}', $footnote)) {
2538: $footnote = substr($footnote, 0, -4) . " $backlink</p>";
2539: } else {
2540: $footnote .= "\n\n<p>$backlink</p>";
2541: }
2542:
2543: $text .= "<li id=\"fn:$note_id\">\n";
2544: $text .= $footnote . "\n";
2545: $text .= "</li>\n\n";
2546: }
2547:
2548: $text .= "</ol>\n";
2549: $text .= "</div>";
2550: }
2551: return $text;
2552: }
2553: public function ($matches) {
2554: $node_id = $this->fn_id_prefix . $matches[1];
2555:
2556:
2557:
2558: if (isset($this->footnotes[$node_id])) {
2559:
2560: $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2561: unset($this->footnotes[$node_id]);
2562:
2563: $num = $this->footnote_counter++;
2564: $attr = " rel=\"footnote\"";
2565: if ($this->fn_link_class != "") {
2566: $class = $this->fn_link_class;
2567: $class = $this->encodeAttribute($class);
2568: $attr .= " class=\"$class\"";
2569: }
2570: if ($this->fn_link_title != "") {
2571: $title = $this->fn_link_title;
2572: $title = $this->encodeAttribute($title);
2573: $attr .= " title=\"$title\"";
2574: }
2575:
2576: $attr = str_replace("%%", $num, $attr);
2577: $node_id = $this->encodeAttribute($node_id);
2578:
2579: return
2580: "<sup id=\"fnref:$node_id\">".
2581: "<a href=\"#fn:$node_id\"$attr>$num</a>".
2582: "</sup>";
2583: }
2584:
2585: return "[^".$matches[1]."]";
2586: }
2587:
2588:
2589:
2590:
2591: public function stripAbbreviations($text) {
2592:
2593:
2594:
2595: $less_than_tab = $this->tab_width - 1;
2596:
2597:
2598: $text = preg_replace_callback('{
2599: ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
2600: (.*) # text = $2 (no blank lines allowed)
2601: }xm',
2602: array(&$this, '_stripAbbreviations_callback'),
2603: $text);
2604: return $text;
2605: }
2606: public function _stripAbbreviations_callback($matches) {
2607: $abbr_word = $matches[1];
2608: $abbr_desc = $matches[2];
2609: if ($this->abbr_word_re)
2610: $this->abbr_word_re .= '|';
2611: $this->abbr_word_re .= preg_quote($abbr_word);
2612: $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2613: return '';
2614: }
2615:
2616:
2617: public function doAbbreviations($text) {
2618:
2619:
2620:
2621: if ($this->abbr_word_re) {
2622:
2623:
2624: $text = preg_replace_callback('{'.
2625: '(?<![\w\x1A])'.
2626: '(?:'.$this->abbr_word_re.')'.
2627: '(?![\w\x1A])'.
2628: '}',
2629: array(&$this, '_doAbbreviations_callback'), $text);
2630: }
2631: return $text;
2632: }
2633: public function _doAbbreviations_callback($matches) {
2634: $abbr = $matches[0];
2635: if (isset($this->abbr_desciptions[$abbr])) {
2636: $desc = $this->abbr_desciptions[$abbr];
2637: if (empty($desc)) {
2638: return $this->hashPart("<abbr>$abbr</abbr>");
2639: } else {
2640: $desc = $this->encodeAttribute($desc);
2641: return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
2642: }
2643: } else {
2644: return $matches[0];
2645: }
2646: }
2647:
2648: }
2649: