Markdown.pm (6845B)
1 #!/usr/bin/env perl 2 3 # LSG::Markdown - markdown preprocessor for the LSG 4 # Written by lumidify <nobody@lumidify.org> 5 # 6 # To the extent possible under law, the author has dedicated 7 # all copyright and related and neighboring rights to this 8 # software to the public domain worldwide. This software is 9 # distributed without any warranty. 10 # 11 # You should have received a copy of the CC0 Public Domain 12 # Dedication along with this software. If not, see 13 # <http://creativecommons.org/publicdomain/zero/1.0/>. 14 15 package LSG::Markdown; 16 use strict; 17 use warnings; 18 use utf8; 19 use open qw< :encoding(UTF-8) >; 20 binmode STDIN, ":encoding(UTF-8)"; 21 binmode STDOUT, ":encoding(UTF-8)"; 22 binmode STDERR, ":encoding(UTF-8)"; 23 use File::Spec::Functions; 24 use Text::Markdown qw(markdown); 25 use LSG::Misc; 26 use LSG::Config qw($config); 27 28 sub handle_fnc { 29 my $pageid = shift; 30 my $lang = shift; 31 my $line = shift; 32 my $file = shift; 33 my $fnc_name = shift; 34 my @fnc_args = split(/ /, shift); 35 if (!exists($config->{"funcs"}->{$fnc_name})) { 36 die "ERROR: $file: undefined function \"$fnc_name\":\n$line\n"; 37 } 38 return $config->{"funcs"}->{$fnc_name}->($pageid, $lang, @fnc_args); 39 } 40 41 sub handle_lnk { 42 my $pageid = shift; 43 my $lang = shift; 44 my $line = shift; 45 my $file = shift; 46 my $txt = shift; 47 my $lnk = shift; 48 my $lnk_file = ""; 49 my $lnk_path = ""; 50 my $url = ""; 51 52 my $char_one = substr($lnk, 0, 1); 53 my $download = ""; 54 if ($char_one eq "=") { 55 $download = " download"; 56 $lnk = substr($lnk, 1); 57 } 58 $char_one = substr($lnk, 0, 1); 59 if ($char_one eq "@") { 60 $lnk_file = $config->{"metadata"}->{$pageid}->{"basename"} . substr($lnk, 1); 61 $lnk_path = catfile("site", "static", $lnk_file); 62 $url = LSG::Misc::gen_relative_link("$lang/$pageid", "static/$lnk_file"); 63 } elsif ($char_one eq "#") { 64 $lnk_file = substr($lnk, 1); 65 $lnk_path = catfile("site", "static", $lnk_file); 66 $url = LSG::Misc::gen_relative_link("$lang/$pageid", "static/$lnk_file"); 67 } elsif ($char_one eq "\$") { 68 $lnk_file = substr($lnk, 1); 69 $lnk_path = catfile("pages", $lnk_file); 70 # Convert to /lang/page format 71 my $lnk_abs = substr($lnk_file, -2) . "/" . substr($lnk_file, 0, length($lnk_file) - 3) . ".html"; 72 $url = LSG::Misc::gen_relative_link("$lang/$pageid", $lnk_abs); 73 } else { 74 $url = $lnk; 75 } 76 if ($lnk_path && !(-f $lnk_path)) { 77 warn "WARNING: $file: linked file $lnk_path does not exist:\n$line\n"; 78 } 79 # Note: This is a huge hack and will probably break something. 80 # This should actually return markdown, but it has to return HTML 81 # directly, so it's really weird and inconsistent. 82 return "<a href=\"$url\"$download>$txt</a>"; 83 } 84 85 sub handle_img { 86 my $pageid = shift; 87 my $lang = shift; 88 my $line = shift; 89 my $file = shift; 90 my $txt = shift; 91 my $img = shift; 92 my $img_file = ""; 93 my $img_path = ""; 94 my $src = ""; 95 96 my $char_one = substr($img, 0, 1); 97 if ($char_one eq "@") { 98 $img_file = $config->{"metadata"}->{$pageid}->{"basename"} . substr($img, 1); 99 $img_path = catfile("site", "static", $img_file); 100 $src = LSG::Misc::gen_relative_link("$lang/$pageid", "static/$img_file"); 101 } elsif ($char_one eq "#") { 102 $img_file = substr($img, 1); 103 $img_path = catfile("site", "static", $img_file); 104 $src = LSG::Misc::gen_relative_link("$lang/$pageid", "static/$img_file"); 105 } else { 106 $src = $img; 107 } 108 if ($img_path && !(-f $img_path)) { 109 warn "WARNING: $file: image file $img_path does not exist:\n$line\n"; 110 } 111 112 return "![$txt]($src)"; 113 } 114 115 sub add_child { 116 my $parent = shift; 117 my $type = shift; 118 $parent->{"child"} = {type => $type, txt => "", url => "", parent => $parent, child => {}}; 119 120 return $parent->{"child"}; 121 } 122 123 sub finish_child { 124 my $child = shift; 125 my $pageid = shift; 126 my $lang = shift; 127 my $line = shift; 128 my $file = shift; 129 my $parent = $child->{"parent"}; 130 131 my ($html, @extra_pages); 132 if ($child->{"type"} eq "img") { 133 $html = handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); 134 } elsif ($child->{"type"} eq "lnk") { 135 $html = handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); 136 } elsif ($child->{"type"} eq "fnc") { 137 ($html, @extra_pages) = handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); 138 } 139 $parent->{"txt"} .= $html; 140 141 return ($parent, @extra_pages); 142 } 143 144 # FIXME: This whole "parser" is extremely primitive and will break on a lot of input. 145 # The whole thing should be replaced with something proper sometime. 146 sub parse_md { 147 my $lang = shift; 148 my $pageid = shift; 149 my $inpath = shift; 150 open(my $in, "<", $inpath) or die "ERROR: Can't open $inpath for reading."; 151 # skip metadata 152 while (<$in> =~ /^([^:]*):(.*)$/) {} 153 154 my $txt = ""; 155 my $bs = 0; 156 my $IN_IMG = 1; 157 my $IN_LNK = 2; 158 my $IN_FNC = 4; 159 my $IN_TXT = 8; 160 my $IN_URL = 16; 161 my $IN_IMG_START = 32; 162 my %structure = (txt => "", child => {}); 163 my $cur_child_ref = \%structure; 164 my @states = (0); 165 my @extra_pages; 166 foreach (<$in>) { 167 foreach my $char (split //, $_) { 168 if ($char eq "\\") { 169 $bs++; 170 if (!($bs %= 2)) {$txt .= "\\"}; 171 } elsif ($bs % 2) { 172 # FIXME: CLEANUP!!! 173 if ($states[-1] & $IN_TXT) { 174 $cur_child_ref->{"txt"} .= $char; 175 } elsif ($states[-1] & $IN_URL) { 176 $cur_child_ref->{"url"} .= $char; 177 } elsif (!($states[-1] & ($IN_IMG | $IN_LNK | $IN_FNC))) { 178 $structure{"txt"} .= $char; 179 } 180 $bs = 0; 181 } elsif ($char eq "!") { 182 push(@states, $IN_IMG_START); 183 } elsif ($char eq "[") { 184 if ($states[-1] & $IN_IMG_START) { 185 $states[-1] = $IN_IMG | $IN_TXT; 186 $cur_child_ref = add_child($cur_child_ref, "img"); 187 } else { 188 push(@states, $IN_LNK | $IN_TXT); 189 $cur_child_ref = add_child($cur_child_ref, "lnk"); 190 } 191 } elsif ($char eq "{") { 192 $cur_child_ref = add_child($cur_child_ref, "fnc"); 193 push(@states, $IN_FNC | $IN_TXT); 194 } elsif ($char eq "]" && ($states[-1] & ($IN_IMG | $IN_LNK) && $states[-1] & $IN_TXT)) { 195 $states[-1] &= ~$IN_TXT; 196 } elsif ($char eq "}" && $states[-1] & $IN_FNC && $states[-1] & $IN_TXT) { 197 $states[-1] &= ~$IN_TXT; 198 } elsif ($char eq "(" && $states[-1] & ($IN_IMG | $IN_LNK | $IN_FNC)) { 199 $states[-1] |= $IN_URL; 200 } elsif ($char eq ")" && ($states[-1] & $IN_URL)) { 201 pop(@states); 202 my @tmp_pages; 203 ($cur_child_ref, @tmp_pages) = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath); 204 push(@extra_pages, @tmp_pages); 205 } else { 206 if ($states[-1] & $IN_IMG_START) { 207 pop(@states); 208 $char = "!" . $char; 209 } 210 if ($states[-1] & $IN_TXT) { 211 $cur_child_ref->{"txt"} .= $char; 212 } elsif ($states[-1] & $IN_URL) { 213 $cur_child_ref->{"url"} .= $char; 214 } elsif (!($states[-1] & ($IN_IMG | $IN_LNK | $IN_FNC))) { 215 $structure{"txt"} .= $char; 216 } 217 } 218 } 219 } 220 221 return (markdown($structure{"txt"}), @extra_pages); 222 } 223 224 1;