commit 901aadb474e06693524e3fc6f2092f4d94bb1d07
parent 60cba53ab7684b466185fd3886fb19d22b688e53
Author: lumidify <nobody@lumidify.org>
Date: Wed, 27 Dec 2023 15:26:40 +0100
Improve book sorting
Diffstat:
5 files changed, 257 insertions(+), 61 deletions(-)
diff --git a/LSG/Config.pm b/LSG/Config.pm
@@ -25,6 +25,7 @@ use Exporter qw(import);
our @EXPORT_OK = qw($config);
# Yes, I know this isn't just used for real config
+# FIXME: separate fields from config.ini from other parts to avoid conflicts
our $config;
sub read_modified_dates {
@@ -81,6 +82,7 @@ sub read_config {
$section = $1;
next;
}
+ # FIXME: report errors properly
my ($key, $value) = split("=", $_, 2);
if ($value =~ /:/) {
my @value = split(":", $value);
diff --git a/LSG/Generate.pm b/LSG/Generate.pm
@@ -26,32 +26,60 @@ use File::Path qw(make_path);
use LSG::Markdown;
use LSG::Config qw($config);
-sub gen_files() {
- foreach my $pageid (keys %{$config->{"metadata"}}) {
- foreach my $lang (keys %{$config->{"langs"}}) {
- my $template = $config->{"metadata"}->{$pageid}->{"template"} . ".$lang.html";
- if (
- exists($config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"}) &&
- exists($config->{"modified_dates"}->{"templates"}->{$template}) &&
- $config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"} eq $config->{"metadata"}->{$pageid}->{"modified"}->{$lang} &&
- $config->{"modified_dates"}->{"templates"}->{$template} eq $config->{"templates"}->{$template}->{"modified"} &&
- (!exists $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ||
- $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ne "true")
- ) {
- next;
- }
- print("Processing $pageid.$lang\n");
- my $html_dir = catfile("site", $lang, $config->{"metadata"}->{$pageid}->{"dirname"});
- make_path($html_dir);
- my $fullname = catfile("pages", "$pageid.$lang");
- my $html = LSG::Markdown::parse_md($lang, $pageid, $fullname);
- my $final_html = LSG::Template::render_template($html, $lang, $pageid);
- my $html_file = catfile("site", $lang, $pageid) . ".html";
- open(my $in, ">", $html_file) or die "ERROR: can't open $html_file for writing\n";
- print $in $final_html;
- close($in);
- }
+sub gen_page {
+ my ($pageid, $html_pages) = @_;
+ my @ret_pages;
+ foreach my $lang (keys %{$config->{"langs"}}) {
+ my $template = $config->{"metadata"}->{$pageid}->{"template"} . ".$lang.html";
+ # FIXME: also check if the html file actually exists
+ # -> maybe compare with modified date of html instead of markdown
+ if (
+ exists($config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"}) &&
+ exists($config->{"modified_dates"}->{"templates"}->{$template}) &&
+ $config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"} eq $config->{"metadata"}->{$pageid}->{"modified"}->{$lang} &&
+ $config->{"modified_dates"}->{"templates"}->{$template} eq $config->{"templates"}->{$template}->{"modified"} &&
+ (!exists $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ||
+ $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ne "true")
+ ) {
+ next;
+ }
+ #print("Processing $pageid.$lang\n");
+ my $html_dir = catfile("site", $lang, $config->{"metadata"}->{$pageid}->{"dirname"});
+ make_path($html_dir);
+ my $html;
+ if (defined($html_pages)) {
+ if (!exists($html_pages->{$lang})) {
+ die "Page $pageid does not exist for language $lang\n";
+ }
+ $html = $html_pages->{$lang};
+ } else {
+ my $fullname = catfile("pages", "$pageid.$lang");
+ my @tmp_pages;
+ ($html, @tmp_pages) = LSG::Markdown::parse_md($lang, $pageid, $fullname);
+ push(@ret_pages, @tmp_pages);
+ }
+ my $final_html = LSG::Template::render_template($html, $lang, $pageid);
+ my $html_file = catfile("site", $lang, $pageid) . ".html";
+ open(my $in, ">", $html_file) or die "ERROR: can't open $html_file for writing\n";
+ print $in $final_html;
+ close($in);
+ }
+ return @ret_pages;
+}
+
+sub gen_files {
+ my %extra_pages;
+ for my $pageid (keys %{$config->{"metadata"}}) {
+ for my $page (gen_page($pageid)) {
+ if (exists $extra_pages{$page->[0]}->{$page->[1]}) {
+ die "Duplicate page $page->[0] for language $page->[1]\n";
+ }
+ $extra_pages{$page->[0]}->{$page->[1]} = $page->[2];
+ }
}
+ for my $pageid (keys %extra_pages) {
+ gen_page($pageid, $extra_pages{$pageid});
+ }
}
sub delete_obsolete_recurse {
@@ -74,6 +102,13 @@ sub delete_obsolete_recurse {
}
}
closedir($dh);
+ opendir($dh, $dir) or die "Unable to open directory \"" . getcwd() . "/$dir\": $!\n";
+ if (scalar(grep { $_ ne "." && $_ ne ".." } readdir($dh)) == 0) {
+ print("Deleting old directory \"" . getcwd() . "/$dir\".\n");
+ rmdir($dir);
+ }
+ closedir($dh);
+ # FIXME: remove empty dirs
foreach (@dirs) {
delete_obsolete_recurse($_);
}
diff --git a/LSG/Markdown.pm b/LSG/Markdown.pm
@@ -128,15 +128,17 @@ sub finish_child {
my $file = shift;
my $parent = $child->{"parent"};
+ my ($html, @extra_pages);
if ($child->{"type"} eq "img") {
- $parent->{"txt"} .= handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
+ $html = handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
} elsif ($child->{"type"} eq "lnk") {
- $parent->{"txt"} .= handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
+ $html = handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
} elsif ($child->{"type"} eq "fnc") {
- $parent->{"txt"} .= handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
+ ($html, @extra_pages) = handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
}
+ $parent->{"txt"} .= $html;
- return $parent;
+ return ($parent, @extra_pages);
}
# FIXME: This whole "parser" is extremely primitive and will break on a lot of input.
@@ -160,6 +162,7 @@ sub parse_md {
my %structure = (txt => "", child => {});
my $cur_child_ref = \%structure;
my @states = (0);
+ my @extra_pages;
foreach (<$in>) {
foreach my $char (split //, $_) {
if ($char eq "\\") {
@@ -196,7 +199,9 @@ sub parse_md {
$states[-1] |= $IN_URL;
} elsif ($char eq ")" && ($states[-1] & $IN_URL)) {
pop(@states);
- $cur_child_ref = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath);
+ my @tmp_pages;
+ ($cur_child_ref, @tmp_pages) = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath);
+ push(@extra_pages, @tmp_pages);
} else {
if ($states[-1] & $IN_IMG_START) {
pop(@states);
@@ -213,7 +218,7 @@ sub parse_md {
}
}
- return markdown($structure{"txt"});
+ return (markdown($structure{"txt"}), @extra_pages);
}
1;
diff --git a/LSG/UserFuncs.pm b/LSG/UserFuncs.pm
@@ -31,6 +31,14 @@ use LSG::Misc;
# 2: page language
# 3-: other args (e.g. for func call)
+# Return value:
+# Usually just the html text.
+# Optionally, a list of array references of the form [$pageid, $lang, $html]
+# defining further pages, together with the complete body html text of the
+# page. The returned text is always taken verbatim as the html code of the
+# page body, there is no option to interpret it as markdown.
+# When called from templates, the extra pages are ignored.
+
# Yeah, this is extremely inefficient, but it's
# not like we're comparing billions of books.
sub sort_numeric {
@@ -58,33 +66,176 @@ sub sort_numeric {
}
sub sort_books {
- my $pageid = shift;
- my $lang = shift;
- my $sort_by = shift;
- my $create_subheadings = shift;
- if (!$sort_by) {die "ERROR: not enough arguments to function call in $pageid\n"}
- my $output = "";
- my %tmp_md = ();
+ # FIXME: 'list' currently doesn't make much sense - the
+ # sorting should be changed to just be alphabetical by
+ # title when 'list' is used
+
+ # $mode == list: just list books
+ # $mode == combined: create subheadings for @sort_by
+ # $mode == separate: create separate pages for @sort_by
+ # $dir: directory to search for pages to sort
+ # (new pages are also created in this directory)
+ # @sort_by: list of metadata attributes to sort by
+ # (this is a hierarchical sorting, i.e. if the second
+ # category in @sort_by is the same for two pages,
+ # the first category must also be the same, and so
+ # on, otherwise there will probably be an error at
+ # some point, or the result will just be weird)
+ my ($pageid, $lang, $dir, $mode, @sort_by) = @_;
+ if (!defined($dir) || !defined($mode)) {
+ die "ERROR: Too few arguments to sort_by.\n";
+ }
+ if ($mode eq "list") {
+ $mode = 0;
+ } elsif ($mode eq "combined") {
+ $mode = 1;
+ } elsif ($mode eq "separate") {
+ $mode = 2;
+ } else {
+ die "ERROR: Invalid mode $mode for sort_books.\n";
+ }
+ my %tmp_md;
foreach my $id (keys %{$config->{"metadata"}}) {
- if ($config->{"metadata"}->{$id}->{"dirname"} eq "books") {
+ # pages generated by sort_books need to be skipped so when this
+ # function is called again for other languages, it doesn't try
+ # to sort all the generated pages (yes, this is really ugly)
+
+ # prevent autovivification of $config->{"metadata"}->{$id}->{$lang}
+ next if (exists($config->{"metadata"}->{$id}->{$lang}) &&
+ $config->{"metadata"}->{$id}->{$lang}->{"generated:sort_books"});
+ if ($config->{"metadata"}->{$id}->{"dirname"} eq $dir) {
$tmp_md{$id} = $config->{"metadata"}->{$id};
- if (!exists($config->{"metadata"}->{$id}->{$lang}->{$sort_by})) {
- die "ERROR: $pageid: can't sort by \"$sort_by\"\n";
+ my $found = 0;
+ for my $sb (@sort_by) {
+ if (!exists($config->{"metadata"}->{$id}->{$lang}) ||
+ !exists($config->{"metadata"}->{$id}->{$lang}->{$sb})) {
+ $found = 1;
+ } else {
+ if ($found) {
+ # there can't be any "undef gaps" - as soon as one sort key
+ # is undef, all the ones afterwards are ignored (in the
+ # final output, the page is located on the same "level" as
+ # the category of the first undef sort key)
+ die "ERROR: $pageid: metadata $sb defined but previous " .
+ "sort key already undef.\n";
+ }
+ my $val = $config->{"metadata"}->{$id}->{$lang}->{$sb};
+ if (!exists($config->{"$sb:$lang"}->{$val})) {
+ die "No display value configured for sort key $sb=$val (language $lang).\n";
+ }
+ }
}
}
}
- my $current = "";
- foreach my $id (sort {$tmp_md{$a}->{$lang}->{$sort_by} cmp $tmp_md{$b}->{$lang}->{$sort_by} or
- sort_numeric($tmp_md{$a}->{$lang}->{"title"}, $tmp_md{$b}->{$lang}->{"title"})} (keys %tmp_md)) {
- if ($create_subheadings && $create_subheadings eq "true" && $current ne $tmp_md{$id}->{$lang}->{$sort_by}) {
- $current = $tmp_md{$id}->{$lang}->{$sort_by};
- $output .= "<h3>$current</h3>\n";
+ # I could do a Schwartzian transform here, but I won't because I'm too lazy.
+ my @sorted = sort {
+ for my $sb (@sort_by) {
+ # if a sort_by value is undef, use the title of the page instead
+ # so entries on the same level are sorted properly even if some
+ # are actual pages and other are categories
+ my $sort_a = exists($tmp_md{$a}->{$lang}->{$sb}) ?
+ $config->{"$sb:$lang"}->{$tmp_md{$a}->{$lang}->{$sb}} :
+ $tmp_md{$a}->{$lang}->{"title"};
+ my $sort_b = exists($tmp_md{$b}->{$lang}->{$sb}) ?
+ $config->{"$sb:$lang"}->{$tmp_md{$b}->{$lang}->{$sb}} :
+ $tmp_md{$b}->{$lang}->{"title"};
+ if ((my $ret = sort_numeric($sort_a, $sort_b))) {
+ return $ret;
+ }
}
+ return sort_numeric($tmp_md{$a}->{$lang}->{"title"}, $tmp_md{$b}->{$lang}->{"title"});
+ } (keys %tmp_md);
+ my $output = "";
+ my %current;
+ my @extra_pages;
+ my @page_stack = ([$pageid, $lang, ""]);
+ my $margin_dir = $config->{"lang_dirs"}->{$lang} eq "rtl" ? "right" : "left";
+ foreach my $id (@sorted) {
my $rel_lnk = LSG::Misc::gen_relative_link("$lang/$pageid", "$lang/$id.html");
- $output .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
+ if ($mode == 1 || $mode == 2) {
+ my $indent = 0;
+ my $found_unequal = 0;
+ for my $i (0..$#sort_by) {
+ my $sb = $sort_by[$i];
+ # Note: it would be possible to uses exists instead of
+ # defined here, but using defined makes the code a bit simpler
+ if (defined($current{$sb}) != defined($tmp_md{$id}->{$lang}->{$sb}) ||
+ (defined($current{$sb}) && $current{$sb} ne $tmp_md{$id}->{$lang}->{$sb})) {
+ $found_unequal = 1;
+ $current{$sb} = $tmp_md{$id}->{$lang}->{$sb};
+ for my $j ($indent+1..$#page_stack) {
+ push(@extra_pages, pop(@page_stack));
+ }
+ if (defined($current{$sb})) {
+ my $name = $config->{"$sb:$lang"}->{$current{$sb}};
+ # This is currently hard-coded. Up to four heading sizes are
+ # used (starting at <h3>), then they just stay the same
+ if ($mode == 1) {
+ my $h_sz = $indent + 3 > 6 ? 6 : $indent + 3;
+ $output .= "<h$h_sz style=\"margin-$margin_dir: " .
+ ($indent * 15). "pt;\">$name</h$h_sz>\n";
+ } else {
+ my $new_id = "$dir/$sb/$current{$sb}";
+ if (exists $config->{"metadata"}->{$new_id}->{$lang}) {
+ die "ERROR: Duplicate page $new_id (lang $lang).\n";
+ }
+ my $cat_lnk = LSG::Misc::gen_relative_link(
+ "$lang/$page_stack[-1]->[0]", "$lang/$new_id.html"
+ );
+ $page_stack[-1]->[2] .= "<p><a href=\"$cat_lnk\">$name</a></p>\n";
+ push(@page_stack, [
+ $new_id,
+ $lang,
+ "<h3>$name</h3>\n"
+ ]);
+ $config->{"metadata"}->{$new_id}->{$lang} = {
+ title => $name,
+ "generated:sort_books" => 1
+ };
+ # FIXME: maybe check if these overwrite a different value
+ $config->{"metadata"}->{$new_id}->{"template"} = $config->{"metadata"}->{$pageid}->{"template"};
+ $config->{"metadata"}->{$new_id}->{"dirname"} = "$dir/$sb";
+ $config->{"metadata"}->{$new_id}->{"basename"} = $current{$sb};
+ }
+ }
+ } elsif ($found_unequal && defined($current{$sb})) {
+ die "ERROR: $sb same as previous page in list for page $id, but higher-level category different (lang $lang).\n";
+ }
+ if (!defined($current{$sb})) {
+ # as soon as one sort key is undef, the other ones should
+ # also be undef for it to make sense
+ for my $j ($i+1..$#sort_by) {
+ if (defined($tmp_md{$id}->{$lang}->{$sort_by[$j]})) {
+ die "ERROR: $sort_by[$j] set for page $id, but $sb unset (lang $lang).\n";
+ }
+ $current{$sort_by[$j]} = undef;
+ }
+ last;
+ }
+ $indent++;
+ }
+ if ($mode == 1) {
+ $output .= "<p style=\"margin-$margin_dir: " . ($indent * 15) . "pt;\">" .
+ "<a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
+ } else {
+ $rel_lnk = LSG::Misc::gen_relative_link("$lang/$page_stack[-1]->[0]", "$lang/$id.html");
+ $page_stack[-1]->[2] .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
+ }
+ } else {
+ $output .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
+ }
}
- return $output;
+ if ($mode == 2) {
+ for my $i (1..$#page_stack) {
+ push(@extra_pages, pop(@page_stack));
+ }
+ $output = $page_stack[0]->[2];
+ shift @page_stack;
+ return ($output, @extra_pages);
+ } else {
+ return $output;
+ }
}
sub gen_lang_selector {
@@ -115,6 +266,9 @@ sub gen_nav {
#}
foreach my $nav_page (@nav) {
my $title = $config->{"metadata"}->{$nav_page}->{$lang}->{"title"};
+ if (!defined($title)) {
+ die "Unable to find title for navigation page \"$nav_page\"\n";
+ }
my $url = LSG::Misc::gen_relative_link("$lang/$pageid", "$lang/$nav_page.html");
$output .= "<li><a href=\"$url\">$title</a></li>\n";
}
diff --git a/lsg.pl b/lsg.pl
@@ -222,22 +222,22 @@ B<sort_books>
B<Parameters:>
-- attribute to sort by
+- directory to take books from
-- create heading when attribute changes or not
+- mode
-B<Purpose:>
-
-Generate sorted list of all books, first by the given attribute, which can be anything
-in the metadata, then by the titles. The second attribute can be used to create, for
-instance, category titles. This does not make sense though when the attribute is just
-the title which changes every time anyways. If the second argument is left out, it
-defaults to "false". The attribute to be sorted by (obviously) needs to be defined for
-each book.
+- attribute(s) to sort by
-B<Example:>
+B<Purpose:>
-C<{sort_books}(category false)>
+Generate sorted list of all pages in the given directory, first by the given attribute(s),
+which can be anything in the metadata, then by the titles. The mode argument can be used
+to either just list all books ("list"), list all books with subheadings for the different
+sorting attributes ("combined"), or generate different pages for the different values of
+the sorting attributes. Note that the display names for the attributes need to be defined
+in the configuration file config.ini. For instance, if a page contains metadata
+"category:stuff", config.ini must contain a section "[category:$lang]" for each language
+that contains a line "stuff=Display Name".
This function was created for a book site, but it could probably be used for articles
as well.