commit 92c2f7acbbef5624803f1d296dce093edbe13dd3
parent f5414cb7c164023b787fa56d4ab59313df8961e1
Author: lumidify <nobody@lumidify.org>
Date: Thu, 2 Apr 2020 10:10:55 +0200
Add targetdiacritics option
Diffstat:
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/transliterate.pl b/transliterate.pl
@@ -379,6 +379,19 @@ sub prompt_choose_word {
my $word = $replacements[$cur_replacement]->[1];
$wordlabel->set_text("Word \"$word\" has multiple replacement options:");
my @choices = split /\Q$config->{choicesep}\E/, $replacements[$cur_replacement]->[1];
+ if (exists $config->{"targetdiacritics"}) {
+ # This nasty bit of code finds the number of diacritics in every
+ # choice and sorts the choice in descending order based on that
+ my %choice_nums;
+ foreach my $choice (@choices) {
+ $choice_nums{$choice} = 0;
+ foreach my $diacritic (@{$config->{"targetdiacritics"}}) {
+ my @matches = NFD($choice) =~ /$diacritic/;
+ $choice_nums{$choice} += scalar @matches if @matches;
+ }
+ }
+ @choices = sort {$choice_nums{$b} <=> $choice_nums{$a}} @choices;
+ }
foreach my $word_choice (@choices) {
my $button = Gtk2::Button->new($word_choice);
$button->signal_connect(
@@ -698,7 +711,8 @@ sub interpret_config {
"choicesep" => [$STRING],
"group" => [],
"endgroup" => [],
- "diacritics" => [$STRING]
+ "diacritics" => [$STRING],
+ "targetdiacritics" => [$STRING]
);
my $in_group = 0;
foreach my $cmd (@$config_list) {
@@ -825,6 +839,13 @@ sub interpret_config {
foreach (1..$#$cmd) {
push @{$config{"diacritics"}}, $cmd->[$_]->{"value"};
}
+ } elsif ($cmd->[0]->{"value"} eq "targetdiacritics") {
+ if (!exists $config{"targetdiacritics"}) {
+ $config{"targetdiacritics"} = [];
+ }
+ foreach (1..$#$cmd) {
+ push @{$config{"targetdiacritics"}}, $cmd->[$_]->{"value"};
+ }
} elsif ($cmd->[0]->{"value"} eq "split") {
$config{"split"} = $cmd->[1]->{"value"};
} elsif ($cmd->[0]->{"value"} eq "beforeword") {
@@ -1858,6 +1879,18 @@ There are quite advanced Unicode algorithms that could be used to compare words
while ignoring diacritics, but I do not know if it would be possible to use any
of those with the current way this engine works.
+=item B<targetdiacritics> <diacritic> [...]
+
+This was only added to simplify transliteration from Hindi to Urdu with the
+same database. When this is set, the choices in the
+L<word choice window|/"WORD CHOICE WINDOW"> are sorted in descending order
+based on the number of diacritics from this list that are matched in each
+choice. This is so that when transliterating from Hindi to Urdu, the choice
+with the most diacritics is always at the top.
+
+The attentive reader will notice at this point that most of the features
+in this program were added specifically for dealing with Urdu and Hindi.
+
=back
=head1 BUGS