commit 186074f5c746c6f3452da864722a9fd7da2dd964
parent 1adf70462c0ec838b8611ce505c139862a768e6d
Author: lumidify <nobody@lumidify.org>
Date: Mon, 30 Mar 2020 13:04:56 +0200
Fix 'Add replacement' with words requiring expansions in the GUI; tests currently broken
Diffstat:
8 files changed, 163 insertions(+), 62 deletions(-)
diff --git a/tests/data/endings2.txt b/tests/data/endings2.txt
@@ -0,0 +1,2 @@
+end4 end4_replaced
+end5 end5_replaced
diff --git a/tests/data/words.txt b/tests/data/words.txt
@@ -8,3 +8,4 @@ word6 word6_replaced
word7 word7_replaced
word8 word8_replaced
word9 word9_replaced
+word10 word10_replaced
diff --git a/tests/test4/config b/tests/test4/config
@@ -0,0 +1,18 @@
+split "\s+"
+beforeword "\s"
+afterword "\s"
+
+ignore "../data/ignore.txt"
+table words "../data/words.txt"
+table words1 "../data/words.txt"
+table endings "../data/endings.txt"
+table endings2 "../data/endings2.txt"
+
+expand words endings noroot
+expand words endings2 noroot
+expand words1 endings2 noroot
+
+group beginword endword
+replace words
+replace words1
+endgroup
diff --git a/tests/test4/descr.txt b/tests/test4/descr.txt
@@ -0,0 +1 @@
+matchignore only endword; expand noroot
diff --git a/tests/test4/err.txt b/tests/test4/err.txt
@@ -0,0 +1,3 @@
+Unknown word: "word0"
+Word "word1_replacedend1r1|word1_replacedend1r2" with 2 word choices.
+Unknown word: "-dword9end2"
diff --git a/tests/test4/expected.txt b/tests/test4/expected.txt
@@ -0,0 +1,3 @@
+ignore
+word0 word1_replacedend1r1|word1_replacedend1r2
+-dword9end2 word9_replacedend2r-d
diff --git a/tests/test4/input.txt b/tests/test4/input.txt
@@ -0,0 +1 @@
+word10end1end5 word10end5
diff --git a/transliterate.pl b/transliterate.pl
@@ -561,28 +561,7 @@ sub load_ignore_table {
# 0 - an error occurred
# 1 - everything's fine
sub expand_table {
- my ($cmd, $tables, $config) = @_;
- my $table_name = $cmd->[1]->{"value"};
- my $new_table_name = $cmd->[2]->{"value"};
- my $forms_name = $cmd->[3]->{"value"};
- if ($new_table_name eq "same") {
- $new_table_name = $table_name;
- }
- if (!exists($tables->{$table_name})) {
- warn "expand_table: table \"$table_name\" does not exist.\n";
- return 0;
- }
- if (!exists($tables->{$forms_name})) {
- warn "expand_table: table \"$forms_name\" does not exist.\n";
- return 0;
- }
- my $table = $tables->{$table_name};
- my $forms = $tables->{$forms_name};
-
- my $noroot = 0;
- if ($#$cmd >= 4 && $cmd->[4]->{"value"} eq "noroot") {
- $noroot = 1;
- }
+ my ($table, $forms, $noroot, $config) = @_;
my %new_table;
foreach my $word (keys %$table) {
@@ -603,8 +582,7 @@ sub expand_table {
$new_table{$word} = $table->{$word};
}
}
- $tables->{$new_table_name} = \%new_table;
- return 1;
+ return \%new_table;
}
# Check if the number and types of arguments given to a config command are right
@@ -648,9 +626,19 @@ sub check_args {
# $config_list - the list returned by parse_config
sub interpret_config {
my ($config_list, $args) = @_;
+ my %tables;
my %config;
+ # table_paths stores a list of all table and replacement ids that are
+ # impacted by the path so the replacement can be added on the fly when
+ # a new replacement is added from the GUI
+ # the "replacement id" is just the number of the replacement group,
+ # starting at 0 with the first group in the config
$config{"table_paths"} = {};
- $config{"tables"} = {};
+ # a mapping between the table ids and tables for all tables used as
+ # ending tables in expand statements - so expansions can be done
+ # on the fly when adding a replacement word from the GUI
+ $config{"ending_tables"} = {};
+ # ignore is the path to the ignore file, ignore_words the actual table
$config{"ignore"} = "";
$config{"ignore_words"} = {};
$config{"split"} = "\\s";
@@ -658,12 +646,16 @@ sub interpret_config {
$config{"afterword"} = "\\s";
$config{"tablesep"} = "\t";
$config{"choicesep"} = "\$";
+ # a list of "replacement configs", which specify the type and any
+ # other arguments (this is given to replace_match, etc.
$config{"replacements"} = [];
- my %tmp_table_paths;
+ # these are temporary mappings used while loading the config
+ my %path_to_table;
+ my %table_id_to_path;
my %mandatory_args = (
"ignore" => [$STRING],
"table" => [$ID],
- "expand" => [$ID, $ID, $ID],
+ "expand" => [$ID, $ID],
"match" => [$STRING, $STRING],
"matchignore" => [$STRING],
"replace" => [$ID],
@@ -688,17 +680,42 @@ sub interpret_config {
return undef;
}
if ($cmd->[0]->{"value"} eq "table") {
- my $table = load_table $cmd->[2]->{"value"}, $args, \%config;
- if (!$table) {
- return undef;
+ my $table_path = $cmd->[2]->{"value"};
+ my $table;
+ # add to temporary path-to-table mapping so tables aren't
+ # loaded unnecessarily
+ if (exists $path_to_table{$table_path}) {
+ $table = $path_to_table{$table_path};
+ } else {
+ $table = load_table $table_path, $args, \%config;
+ $path_to_table{$table_path} = $table;
+ return if !$table;
}
- $config{tables}->{$cmd->[1]->{"value"}} = $table;
- $tmp_table_paths{$cmd->[1]->{"value"}} = $cmd->[2]->{"value"};
+ my $table_id = $cmd->[1]->{"value"};
+ $tables{$table_id} = $table;
+ $table_id_to_path{$table_id} = $table_path;
} elsif ($cmd->[0]->{"value"} eq "expand") {
- # FIXME: need to handle table paths when a new table name is used for the expansion
- if (!expand_table($cmd, $config{tables}, \%config)) {
- return undef;
+ my $orig_table_id = $cmd->[1]->{"value"};
+ my $ending_table_id = $cmd->[2]->{"value"};
+ my $noroot = 0;
+ if ($#$cmd >= 3 && $cmd->[3]->{"value"} eq "noroot") {
+ $noroot = 1;
+ }
+ if (!exists $tables{$orig_table_id}) {
+ warn "expand: table \"$orig_table_id\" doesn't exist\n";
+ return;
+ } elsif (!exists $tables{$ending_table_id}) {
+ warn "expand: table \"$ending_table_id\" doesn't exist\n";
+ return
}
+
+ $config{"ending_tables"}->{$ending_table_id} = $tables{$ending_table_id};
+ $config{"expands"}->{$orig_table_id} = [] if !exists $config{"expands"}->{$orig_table_id};
+ push @{$config{"expands"}->{$orig_table_id}}, [$ending_table_id, $noroot];
+
+ my $new_table = expand_table($tables{$orig_table_id}, $tables{$ending_table_id}, $noroot, \%config);
+ return if !defined $new_table;
+ $tables{$orig_table_id} = $new_table;
} elsif ($cmd->[0]->{"value"} eq "group") {
if ($in_group) {
warn "ERROR: New group started without ending last one in config\n";
@@ -725,6 +742,7 @@ sub interpret_config {
"search" => NFD($cmd->[1]->{"value"}),
"replace" => $cmd->[2]->{"value"}};
for (3..$#$cmd) {
+ # add optional arguments as keys in replacement config
$config{"replacements"}->[-1]->{$cmd->[$_]->{"value"}} = 1;
}
} elsif ($cmd->[0]->{"value"} eq "matchignore") {
@@ -742,17 +760,24 @@ sub interpret_config {
return undef;
}
my $table = $cmd->[1]->{"value"};
- if (!exists($config{tables}->{$table})) {
+ if (!exists($tables{$table})) {
warn "ERROR: nonexistent table \"$table\" in replace statement.\n";
return undef;
}
- if (exists($tmp_table_paths{$table})) {
- $config{"table_paths"}->{$tmp_table_paths{$table}} = [$#{$config{"replacements"}}, $table];
- }
+
+ # make a list of all replacements that are affected by this
+ # file so that they can be updated when a word is added
+ # through the gui
+ my $table_path = $table_id_to_path{$table};
+ my $replacement_id = $#{$config{"replacements"}};
+ $config{"table_paths"}->{$table_path} = [] if !exists $config{"table_paths"}->{$table_path};
+ push @{$config{"table_paths"}->{$table_path}}, [$replacement_id, $table];
+
# Note: we don't need to check if $table{"choicesep"} was defined
# here since we can't ever get this far without first having
# loaded a table anyways
- add_to_trie($table, $config{"replacements"}->[-1]->{"words"}, $config{tables}->{$table}, $args, \%config);
+ my $trie_root = $config{"replacements"}->[-1]->{"words"};
+ add_to_trie($table, $trie_root, $tables{$table}, $args, \%config);
} elsif ($cmd->[0]->{"value"} eq "split") {
$config{"split"} = $cmd->[1]->{"value"};
} elsif ($cmd->[0]->{"value"} eq "beforeword") {
@@ -857,9 +882,21 @@ sub handle_unknown_word_action {
}
print($fh $word . $config->{tablesep} . $replace_word . "\n");
close($fh);
- my $replacement_id = $config->{"table_paths"}->{$table_path}->[0];
- my $trie = $config->{"replacements"}->[$replacement_id]->{"words"};
- add_to_trie($config->{"table_paths"}->{$table_path}->[1], $trie, {$word => $replace_word}, $args, $config);
+ # loop over all table ids that are impacted by this file
+ foreach my $replacement (@{$config->{"table_paths"}->{$table_path}}) {
+ my $replacement_id = $replacement->[0];
+ my $table_id = $replacement->[1];
+ my $trie = $config->{"replacements"}->[$replacement_id]->{"words"};
+ my $final_table = {$word => $replace_word};
+ # handle expansions
+ foreach my $expand (@{$config->{"expands"}->{$table_id}}) {
+ my $ending_table_id = $expand->[0];
+ my $noroot = $expand->[1];
+ my $endings_table = $config->{"ending_tables"}->{$ending_table_id};
+ $final_table = expand_table $final_table, $endings_table, $noroot, $config;
+ }
+ add_to_trie($table_id, $trie, $final_table, $args, $config);
+ }
return 1;
} elsif ($action->[0] eq "reload") {
my $new_config = load_config $args;
@@ -1451,18 +1488,15 @@ pressed.
The filtering for which table files are actually shown here is
currently a bit rudimentary. First, all paths that are used in the
-B<table> statements in the config are put into a list. Then, the
-paths corresponding to any tables used for word endings in the
-B<expand> statements are removed from the list, and that is what
-is shown in this window. The reason for removing those paths from
-the list is that it gets somewhat confusing when all the tables
-that are only used for word endings are also in the list, and it
-is very unlikely that an unknown word would need to be written to
-one of those files. If necessary, the word can always be added
-manually and the config reloaded. Note also that only actual
-table paths are shown here, not the tables themselves - this is
-not necessarily a one-to-one mapping since new tables can be
-generated with the B<expand> statements.
+B<table> statements in the config are put into a list. Then, only
+the paths corresponding to table IDs actually used in B<replace>
+statements are added to the list that gets shown in the GUI. The
+reason for not showing all table paths in the list is that it gets
+somewhat confusing when all the tables that are only used for word
+endings are also in the list, and it is very unlikely that an
+unknown word would need to be written to one of those files. If
+necessary, the word can always be added manually and the config
+reloaded.
=item Reload config
@@ -1570,6 +1604,17 @@ cannot be used as part of the replacement string in B<match> statements.
Lookaheads and lookbehinds are fine, though, and could be useful in
certain cases.
+All tables must be loaded before they are used, or there will be an
+error that the table does not exist.
+
+Warning: If a B<replace> statement is located before an B<expand>
+statement that would have impacted the table used, there will be no
+error but the expand statement won't have any impact.
+
+Basic rule of thumb: Always put the B<table> statements before the
+B<expand> statements and the B<expand> statements before the B<replace>
+statements.
+
=over 8
=item B<split> <regex string>
@@ -1643,12 +1688,21 @@ original script first and the replacement word second. The replacement word
can optionally have several parts separated by B<choicesep>, which will cause the
user to be prompted to choose one of the options.
-=item B<expand> <table identifier> <new table identifier> <word ending table> [noroot]
+If, for whatever reason, the same table is needed twice, but with different endings,
+the table can simply be loaded twice with different IDs. If the same path is loaded,
+the table that has already been loaded will be reused.
+
+=item B<expand> <table identifier> <word ending table> [noroot]
Expand the table C<< <table identifier> >>, i.e. generate all the word forms using
the word endings in C<< <word ending table> >>, saving the result as a table with the
-identifier C<< <new table identifier> >>. If C<same> is specified as C<< <new table
-identifier> >>, the original C<< <table identifier> >> is used instead.
+identifier C<< <new table identifier> >>.
+
+Note: There used to be a C<< <new table identifier> >> argument to create a new
+table in case one table had to be expanded with different endings. This has been
+removed because it was a bit ugly, especially since there wasn't a proper mapping
+from table IDs to filenames anymore. If this functionality is needed, the same table
+file can simply be loaded multiple times. See the B<table> section above.
If B<noroot> is set, the root forms of the words are not kept.
@@ -1660,10 +1714,6 @@ Note that each of the root words is also split into its choices (if necessary)
during the expanding, so it is possible to use B<choicesep> in both the endings
and root words.
-Note that the paths of all tables used for C<< <word ending table> >> are removed from the
-list of paths that is later shown in the unknown word window. See L</"UNKNOWN WORD
-WINDOW"> for details.
-
=item B<match> <regex string> <replacement string> [beginword] [endword] [nofinal]
Perform a RegEx match using the given C<< <regex string> >>, replacing it with
@@ -1701,6 +1751,28 @@ End a replacement group.
=back
+=head1 BUGS
+
+Although it may not seem like it, one of the ugliest parts of the program is the
+GUI functionality that allows the user to add a replacement word. The problem is
+that all information about the B<expand> and B<replace> statements has to be kept
+in order to properly handle adding a word to one of the files and simultaneously
+adding it to the currently loaded tables I<without reloading the entire config>.
+The way it currently works, the replacement word is directly written to the file,
+then all B<expand> statements that would have impacted the words from this file
+are redone (just for the newly added word) and the resulting words are added to
+the appropriate tables (or, technically, the appropriate 'trie'). Since a file
+can be mapped to multiple table IDs and a table ID can occur in multiple replace
+statements, this is more complicated than it sounds, and thus it is very likely
+that there are bugs lurking here somewhere. Do note that "Reload config" will
+B<always> reload the entire configuration, so that's safe to do even if the
+on-the-fly replacing doesn't work.
+
+In general, I have tested the GUI code much less than the rest since you can't
+really test it automatically very well.
+
+Tell me if you find any bugs.
+
=head1 SEE ALSO
perlre, perlretut