transliterate

Transliteration engine
git clone git://lumidify.org/transliterate.git
Log | Files | Refs | README | LICENSE

commit 659fb098117bc50efc4cb92d586afa267c727489
parent 9ceb2478425536bef95dcf3d4cc4e4c2d0bd51f4
Author: lumidify <nobody@lumidify.org>
Date:   Mon,  6 Apr 2020 15:43:26 +0200

Fix choiceoverride

Diffstat:
Atests/data/override.txt | 1+
Atests/test6/README | 2++
Atests/test6/config | 16++++++++++++++++
Atests/test6/descr.txt | 1+
Atests/test6/err.txt | 6++++++
Atests/test6/expected.txt | 6++++++
Atests/test6/input.txt | 6++++++
Mtransliterate.pl | 16++++++++++++----
8 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/tests/data/override.txt b/tests/data/override.txt @@ -0,0 +1 @@ +word0_replaced$word0_replaced2 word0_replaced2 diff --git a/tests/test6/README b/tests/test6/README @@ -0,0 +1,2 @@ +This can be used to play around with "choiceoverride", but it won't +work with "--nochoices". diff --git a/tests/test6/config b/tests/test6/config @@ -0,0 +1,16 @@ +split "[ \n]+" +beforeword " " +afterword "[ \n]" + +ignore "../data/ignore.txt" +table words "../data/words.txt" +table endings "../data/endings.txt" +choiceoverride "../data/override.txt" + +expand words endings + +match "\d+" "num_replaced" beginword + +group beginword endword +replace words +endgroup diff --git a/tests/test6/descr.txt b/tests/test6/descr.txt @@ -0,0 +1 @@ +Basic test diff --git a/tests/test6/err.txt b/tests/test6/err.txt @@ -0,0 +1,6 @@ +Unknown word: "word20" +Unknown word: "word01231" +Word "word0_replaced$word0_replaced2" with 2 word choices. +Unknown word: "aword1" +Unknown word: "end3" +Word "word0_replacedend3_replaced$word0_replaced2end3_replaced" with 2 word choices. diff --git a/tests/test6/expected.txt b/tests/test6/expected.txt @@ -0,0 +1,6 @@ +word1_replaced word2_replaced +num_replacedword1_replaced word9_replaced num_replaced word4_replaced +word20 word01231 word0_replaced2 +aword1 +word1_replacedend1_replaced word0_replacedend3_replaced$word0_replaced2end3_replaced end3 +num_replacedword2_replacedend1_replaced diff --git a/tests/test6/input.txt b/tests/test6/input.txt @@ -0,0 +1,6 @@ +word1 word2 +123word1 word9 123 word4 +word20 word01231 word0 +aword1 +word1end1 word0end3 end3 +432word2end1 diff --git a/transliterate.pl b/transliterate.pl @@ -5,6 +5,9 @@ # NOTE: If you're wondering why the error codes used by the functions are so # inconsistent, go ask my former self +# NOTE 2: This codebase has grown as new features were needed, so it's quite +# ugly now, but I haven't had time to clean it up. + use strict; use warnings; use utf8; @@ -525,7 +528,7 @@ sub open_file_rel_abs { # Load a file of replacement words into a hash table sub load_table { - my ($filename, $args, $config, $revert) = @_; + my ($filename, $args, $config, $src_verbatim, $revert) = @_; my $fh = open_file_rel_abs $filename, $args->{"config"}; return if !$fh; my %table; @@ -547,7 +550,7 @@ sub load_table { $word = NFD $words[0]; $replacement = NFD $words[1]; } - my @word_choices = split /\Q$config->{choicesep}\E/, $word; + my @word_choices = $src_verbatim ? ($word) : split /\Q$config->{choicesep}\E/, $word; foreach my $word_choice (@word_choices) { if (exists $table{$word_choice}) { if ($args->{"checkduplicates"}) { @@ -722,7 +725,7 @@ sub interpret_config { if (exists $path_to_table{$table_path}) { $table = $path_to_table{$table_path}; } else { - $table = load_table $table_path, $args, \%config, $table_args{"revert"}; + $table = load_table $table_path, $args, \%config, 0, $table_args{"revert"}; return if !defined $table; $path_to_table{$table_path} = $table; } @@ -737,7 +740,10 @@ sub interpret_config { $config{"display_tables"}->{$table_path} = 1 if !exists $table_args{"nodisplay"}; } elsif ($cmd_name eq "choiceoverride") { my $table_path = $cmd->[1]->{"value"}; - my $table = load_table $table_path, $args, \%config; + # argument $src_verbatim to load_table forces it to take the entire + # source word without splitting it up with "choicesep", since here, + # we explicitly want to replace multiple choices with one + my $table = load_table $table_path, $args, \%config, 1; return if !defined $table; if (exists $config{"choiceoverride"}) { warn "Duplicate specification of \"choiceoverride\" option.\n"; @@ -1825,6 +1831,8 @@ Note that this does not sort the choices before comparison and they have to be matched exactly, so when a new choice is added, that needs to be added to this mapping as well, in exactly the same order. +Like B<targetdiacritics>, this has no effect if B<--nochoices> is set. + To clarify the order in which choices are added (if they are not explicitly specified):