lsg

Lumidify Site Generator
git clone git://lumidify.org/lsg.git (fast, but not encrypted)
git clone https://lumidify.org/git/lsg.git (encrypted, but very slow)
Log | Files | Refs | README | LICENSE

commit 901aadb474e06693524e3fc6f2092f4d94bb1d07
parent 60cba53ab7684b466185fd3886fb19d22b688e53
Author: lumidify <nobody@lumidify.org>
Date:   Wed, 27 Dec 2023 15:26:40 +0100

Improve book sorting

Diffstat:
MLSG/Config.pm | 2++
MLSG/Generate.pm | 85++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
MLSG/Markdown.pm | 17+++++++++++------
MLSG/UserFuncs.pm | 190+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mlsg.pl | 24++++++++++++------------
5 files changed, 257 insertions(+), 61 deletions(-)

diff --git a/LSG/Config.pm b/LSG/Config.pm @@ -25,6 +25,7 @@ use Exporter qw(import); our @EXPORT_OK = qw($config); # Yes, I know this isn't just used for real config +# FIXME: separate fields from config.ini from other parts to avoid conflicts our $config; sub read_modified_dates { @@ -81,6 +82,7 @@ sub read_config { $section = $1; next; } + # FIXME: report errors properly my ($key, $value) = split("=", $_, 2); if ($value =~ /:/) { my @value = split(":", $value); diff --git a/LSG/Generate.pm b/LSG/Generate.pm @@ -26,32 +26,60 @@ use File::Path qw(make_path); use LSG::Markdown; use LSG::Config qw($config); -sub gen_files() { - foreach my $pageid (keys %{$config->{"metadata"}}) { - foreach my $lang (keys %{$config->{"langs"}}) { - my $template = $config->{"metadata"}->{$pageid}->{"template"} . ".$lang.html"; - if ( - exists($config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"}) && - exists($config->{"modified_dates"}->{"templates"}->{$template}) && - $config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"} eq $config->{"metadata"}->{$pageid}->{"modified"}->{$lang} && - $config->{"modified_dates"}->{"templates"}->{$template} eq $config->{"templates"}->{$template}->{"modified"} && - (!exists $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} || - $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ne "true") - ) { - next; - } - print("Processing $pageid.$lang\n"); - my $html_dir = catfile("site", $lang, $config->{"metadata"}->{$pageid}->{"dirname"}); - make_path($html_dir); - my $fullname = catfile("pages", "$pageid.$lang"); - my $html = LSG::Markdown::parse_md($lang, $pageid, $fullname); - my $final_html = LSG::Template::render_template($html, $lang, $pageid); - my $html_file = catfile("site", $lang, $pageid) . ".html"; - open(my $in, ">", $html_file) or die "ERROR: can't open $html_file for writing\n"; - print $in $final_html; - close($in); - } +sub gen_page { + my ($pageid, $html_pages) = @_; + my @ret_pages; + foreach my $lang (keys %{$config->{"langs"}}) { + my $template = $config->{"metadata"}->{$pageid}->{"template"} . ".$lang.html"; + # FIXME: also check if the html file actually exists + # -> maybe compare with modified date of html instead of markdown + if ( + exists($config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"}) && + exists($config->{"modified_dates"}->{"templates"}->{$template}) && + $config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"} eq $config->{"metadata"}->{$pageid}->{"modified"}->{$lang} && + $config->{"modified_dates"}->{"templates"}->{$template} eq $config->{"templates"}->{$template}->{"modified"} && + (!exists $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} || + $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ne "true") + ) { + next; + } + #print("Processing $pageid.$lang\n"); + my $html_dir = catfile("site", $lang, $config->{"metadata"}->{$pageid}->{"dirname"}); + make_path($html_dir); + my $html; + if (defined($html_pages)) { + if (!exists($html_pages->{$lang})) { + die "Page $pageid does not exist for language $lang\n"; + } + $html = $html_pages->{$lang}; + } else { + my $fullname = catfile("pages", "$pageid.$lang"); + my @tmp_pages; + ($html, @tmp_pages) = LSG::Markdown::parse_md($lang, $pageid, $fullname); + push(@ret_pages, @tmp_pages); + } + my $final_html = LSG::Template::render_template($html, $lang, $pageid); + my $html_file = catfile("site", $lang, $pageid) . ".html"; + open(my $in, ">", $html_file) or die "ERROR: can't open $html_file for writing\n"; + print $in $final_html; + close($in); + } + return @ret_pages; +} + +sub gen_files { + my %extra_pages; + for my $pageid (keys %{$config->{"metadata"}}) { + for my $page (gen_page($pageid)) { + if (exists $extra_pages{$page->[0]}->{$page->[1]}) { + die "Duplicate page $page->[0] for language $page->[1]\n"; + } + $extra_pages{$page->[0]}->{$page->[1]} = $page->[2]; + } } + for my $pageid (keys %extra_pages) { + gen_page($pageid, $extra_pages{$pageid}); + } } sub delete_obsolete_recurse { @@ -74,6 +102,13 @@ sub delete_obsolete_recurse { } } closedir($dh); + opendir($dh, $dir) or die "Unable to open directory \"" . getcwd() . "/$dir\": $!\n"; + if (scalar(grep { $_ ne "." && $_ ne ".." } readdir($dh)) == 0) { + print("Deleting old directory \"" . getcwd() . "/$dir\".\n"); + rmdir($dir); + } + closedir($dh); + # FIXME: remove empty dirs foreach (@dirs) { delete_obsolete_recurse($_); } diff --git a/LSG/Markdown.pm b/LSG/Markdown.pm @@ -128,15 +128,17 @@ sub finish_child { my $file = shift; my $parent = $child->{"parent"}; + my ($html, @extra_pages); if ($child->{"type"} eq "img") { - $parent->{"txt"} .= handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); + $html = handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); } elsif ($child->{"type"} eq "lnk") { - $parent->{"txt"} .= handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); + $html = handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); } elsif ($child->{"type"} eq "fnc") { - $parent->{"txt"} .= handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); + ($html, @extra_pages) = handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"}); } + $parent->{"txt"} .= $html; - return $parent; + return ($parent, @extra_pages); } # FIXME: This whole "parser" is extremely primitive and will break on a lot of input. @@ -160,6 +162,7 @@ sub parse_md { my %structure = (txt => "", child => {}); my $cur_child_ref = \%structure; my @states = (0); + my @extra_pages; foreach (<$in>) { foreach my $char (split //, $_) { if ($char eq "\\") { @@ -196,7 +199,9 @@ sub parse_md { $states[-1] |= $IN_URL; } elsif ($char eq ")" && ($states[-1] & $IN_URL)) { pop(@states); - $cur_child_ref = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath); + my @tmp_pages; + ($cur_child_ref, @tmp_pages) = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath); + push(@extra_pages, @tmp_pages); } else { if ($states[-1] & $IN_IMG_START) { pop(@states); @@ -213,7 +218,7 @@ sub parse_md { } } - return markdown($structure{"txt"}); + return (markdown($structure{"txt"}), @extra_pages); } 1; diff --git a/LSG/UserFuncs.pm b/LSG/UserFuncs.pm @@ -31,6 +31,14 @@ use LSG::Misc; # 2: page language # 3-: other args (e.g. for func call) +# Return value: +# Usually just the html text. +# Optionally, a list of array references of the form [$pageid, $lang, $html] +# defining further pages, together with the complete body html text of the +# page. The returned text is always taken verbatim as the html code of the +# page body, there is no option to interpret it as markdown. +# When called from templates, the extra pages are ignored. + # Yeah, this is extremely inefficient, but it's # not like we're comparing billions of books. sub sort_numeric { @@ -58,33 +66,176 @@ sub sort_numeric { } sub sort_books { - my $pageid = shift; - my $lang = shift; - my $sort_by = shift; - my $create_subheadings = shift; - if (!$sort_by) {die "ERROR: not enough arguments to function call in $pageid\n"} - my $output = ""; - my %tmp_md = (); + # FIXME: 'list' currently doesn't make much sense - the + # sorting should be changed to just be alphabetical by + # title when 'list' is used + + # $mode == list: just list books + # $mode == combined: create subheadings for @sort_by + # $mode == separate: create separate pages for @sort_by + # $dir: directory to search for pages to sort + # (new pages are also created in this directory) + # @sort_by: list of metadata attributes to sort by + # (this is a hierarchical sorting, i.e. if the second + # category in @sort_by is the same for two pages, + # the first category must also be the same, and so + # on, otherwise there will probably be an error at + # some point, or the result will just be weird) + my ($pageid, $lang, $dir, $mode, @sort_by) = @_; + if (!defined($dir) || !defined($mode)) { + die "ERROR: Too few arguments to sort_by.\n"; + } + if ($mode eq "list") { + $mode = 0; + } elsif ($mode eq "combined") { + $mode = 1; + } elsif ($mode eq "separate") { + $mode = 2; + } else { + die "ERROR: Invalid mode $mode for sort_books.\n"; + } + my %tmp_md; foreach my $id (keys %{$config->{"metadata"}}) { - if ($config->{"metadata"}->{$id}->{"dirname"} eq "books") { + # pages generated by sort_books need to be skipped so when this + # function is called again for other languages, it doesn't try + # to sort all the generated pages (yes, this is really ugly) + + # prevent autovivification of $config->{"metadata"}->{$id}->{$lang} + next if (exists($config->{"metadata"}->{$id}->{$lang}) && + $config->{"metadata"}->{$id}->{$lang}->{"generated:sort_books"}); + if ($config->{"metadata"}->{$id}->{"dirname"} eq $dir) { $tmp_md{$id} = $config->{"metadata"}->{$id}; - if (!exists($config->{"metadata"}->{$id}->{$lang}->{$sort_by})) { - die "ERROR: $pageid: can't sort by \"$sort_by\"\n"; + my $found = 0; + for my $sb (@sort_by) { + if (!exists($config->{"metadata"}->{$id}->{$lang}) || + !exists($config->{"metadata"}->{$id}->{$lang}->{$sb})) { + $found = 1; + } else { + if ($found) { + # there can't be any "undef gaps" - as soon as one sort key + # is undef, all the ones afterwards are ignored (in the + # final output, the page is located on the same "level" as + # the category of the first undef sort key) + die "ERROR: $pageid: metadata $sb defined but previous " . + "sort key already undef.\n"; + } + my $val = $config->{"metadata"}->{$id}->{$lang}->{$sb}; + if (!exists($config->{"$sb:$lang"}->{$val})) { + die "No display value configured for sort key $sb=$val (language $lang).\n"; + } + } } } } - my $current = ""; - foreach my $id (sort {$tmp_md{$a}->{$lang}->{$sort_by} cmp $tmp_md{$b}->{$lang}->{$sort_by} or - sort_numeric($tmp_md{$a}->{$lang}->{"title"}, $tmp_md{$b}->{$lang}->{"title"})} (keys %tmp_md)) { - if ($create_subheadings && $create_subheadings eq "true" && $current ne $tmp_md{$id}->{$lang}->{$sort_by}) { - $current = $tmp_md{$id}->{$lang}->{$sort_by}; - $output .= "<h3>$current</h3>\n"; + # I could do a Schwartzian transform here, but I won't because I'm too lazy. + my @sorted = sort { + for my $sb (@sort_by) { + # if a sort_by value is undef, use the title of the page instead + # so entries on the same level are sorted properly even if some + # are actual pages and other are categories + my $sort_a = exists($tmp_md{$a}->{$lang}->{$sb}) ? + $config->{"$sb:$lang"}->{$tmp_md{$a}->{$lang}->{$sb}} : + $tmp_md{$a}->{$lang}->{"title"}; + my $sort_b = exists($tmp_md{$b}->{$lang}->{$sb}) ? + $config->{"$sb:$lang"}->{$tmp_md{$b}->{$lang}->{$sb}} : + $tmp_md{$b}->{$lang}->{"title"}; + if ((my $ret = sort_numeric($sort_a, $sort_b))) { + return $ret; + } } + return sort_numeric($tmp_md{$a}->{$lang}->{"title"}, $tmp_md{$b}->{$lang}->{"title"}); + } (keys %tmp_md); + my $output = ""; + my %current; + my @extra_pages; + my @page_stack = ([$pageid, $lang, ""]); + my $margin_dir = $config->{"lang_dirs"}->{$lang} eq "rtl" ? "right" : "left"; + foreach my $id (@sorted) { my $rel_lnk = LSG::Misc::gen_relative_link("$lang/$pageid", "$lang/$id.html"); - $output .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n"; + if ($mode == 1 || $mode == 2) { + my $indent = 0; + my $found_unequal = 0; + for my $i (0..$#sort_by) { + my $sb = $sort_by[$i]; + # Note: it would be possible to uses exists instead of + # defined here, but using defined makes the code a bit simpler + if (defined($current{$sb}) != defined($tmp_md{$id}->{$lang}->{$sb}) || + (defined($current{$sb}) && $current{$sb} ne $tmp_md{$id}->{$lang}->{$sb})) { + $found_unequal = 1; + $current{$sb} = $tmp_md{$id}->{$lang}->{$sb}; + for my $j ($indent+1..$#page_stack) { + push(@extra_pages, pop(@page_stack)); + } + if (defined($current{$sb})) { + my $name = $config->{"$sb:$lang"}->{$current{$sb}}; + # This is currently hard-coded. Up to four heading sizes are + # used (starting at <h3>), then they just stay the same + if ($mode == 1) { + my $h_sz = $indent + 3 > 6 ? 6 : $indent + 3; + $output .= "<h$h_sz style=\"margin-$margin_dir: " . + ($indent * 15). "pt;\">$name</h$h_sz>\n"; + } else { + my $new_id = "$dir/$sb/$current{$sb}"; + if (exists $config->{"metadata"}->{$new_id}->{$lang}) { + die "ERROR: Duplicate page $new_id (lang $lang).\n"; + } + my $cat_lnk = LSG::Misc::gen_relative_link( + "$lang/$page_stack[-1]->[0]", "$lang/$new_id.html" + ); + $page_stack[-1]->[2] .= "<p><a href=\"$cat_lnk\">$name</a></p>\n"; + push(@page_stack, [ + $new_id, + $lang, + "<h3>$name</h3>\n" + ]); + $config->{"metadata"}->{$new_id}->{$lang} = { + title => $name, + "generated:sort_books" => 1 + }; + # FIXME: maybe check if these overwrite a different value + $config->{"metadata"}->{$new_id}->{"template"} = $config->{"metadata"}->{$pageid}->{"template"}; + $config->{"metadata"}->{$new_id}->{"dirname"} = "$dir/$sb"; + $config->{"metadata"}->{$new_id}->{"basename"} = $current{$sb}; + } + } + } elsif ($found_unequal && defined($current{$sb})) { + die "ERROR: $sb same as previous page in list for page $id, but higher-level category different (lang $lang).\n"; + } + if (!defined($current{$sb})) { + # as soon as one sort key is undef, the other ones should + # also be undef for it to make sense + for my $j ($i+1..$#sort_by) { + if (defined($tmp_md{$id}->{$lang}->{$sort_by[$j]})) { + die "ERROR: $sort_by[$j] set for page $id, but $sb unset (lang $lang).\n"; + } + $current{$sort_by[$j]} = undef; + } + last; + } + $indent++; + } + if ($mode == 1) { + $output .= "<p style=\"margin-$margin_dir: " . ($indent * 15) . "pt;\">" . + "<a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n"; + } else { + $rel_lnk = LSG::Misc::gen_relative_link("$lang/$page_stack[-1]->[0]", "$lang/$id.html"); + $page_stack[-1]->[2] .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n"; + } + } else { + $output .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n"; + } } - return $output; + if ($mode == 2) { + for my $i (1..$#page_stack) { + push(@extra_pages, pop(@page_stack)); + } + $output = $page_stack[0]->[2]; + shift @page_stack; + return ($output, @extra_pages); + } else { + return $output; + } } sub gen_lang_selector { @@ -115,6 +266,9 @@ sub gen_nav { #} foreach my $nav_page (@nav) { my $title = $config->{"metadata"}->{$nav_page}->{$lang}->{"title"}; + if (!defined($title)) { + die "Unable to find title for navigation page \"$nav_page\"\n"; + } my $url = LSG::Misc::gen_relative_link("$lang/$pageid", "$lang/$nav_page.html"); $output .= "<li><a href=\"$url\">$title</a></li>\n"; } diff --git a/lsg.pl b/lsg.pl @@ -222,22 +222,22 @@ B<sort_books> B<Parameters:> -- attribute to sort by +- directory to take books from -- create heading when attribute changes or not +- mode -B<Purpose:> - -Generate sorted list of all books, first by the given attribute, which can be anything -in the metadata, then by the titles. The second attribute can be used to create, for -instance, category titles. This does not make sense though when the attribute is just -the title which changes every time anyways. If the second argument is left out, it -defaults to "false". The attribute to be sorted by (obviously) needs to be defined for -each book. +- attribute(s) to sort by -B<Example:> +B<Purpose:> -C<{sort_books}(category false)> +Generate sorted list of all pages in the given directory, first by the given attribute(s), +which can be anything in the metadata, then by the titles. The mode argument can be used +to either just list all books ("list"), list all books with subheadings for the different +sorting attributes ("combined"), or generate different pages for the different values of +the sorting attributes. Note that the display names for the attributes need to be defined +in the configuration file config.ini. For instance, if a page contains metadata +"category:stuff", config.ini must contain a section "[category:$lang]" for each language +that contains a line "stuff=Display Name". This function was created for a book site, but it could probably be used for articles as well.