Add targetdiacritics option - transliterate

commit 92c2f7acbbef5624803f1d296dce093edbe13dd3
parent f5414cb7c164023b787fa56d4ab59313df8961e1
Author: lumidify <nobody@lumidify.org>
Date:   Thu,  2 Apr 2020 10:10:55 +0200

Add targetdiacritics option

Diffstat:
M transliterate.pl  | 35 ++++++++++++++++++++++++++++++++++-

1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/transliterate.pl b/transliterate.pl
@@ -379,6 +379,19 @@ sub prompt_choose_word {
 		my $word = $replacements[$cur_replacement]->[1];
 		$wordlabel->set_text("Word \"$word\" has multiple replacement options:");
 		my @choices = split /\Q$config->{choicesep}\E/, $replacements[$cur_replacement]->[1];
+		if (exists $config->{"targetdiacritics"}) {
+			# This nasty bit of code finds the number of diacritics in every
+			# choice and sorts the choice in descending order based on that
+			my %choice_nums;
+			foreach my $choice (@choices) {
+				$choice_nums{$choice} = 0;
+				foreach my $diacritic (@{$config->{"targetdiacritics"}}) {
+					my @matches = NFD($choice) =~ /$diacritic/;
+					$choice_nums{$choice} += scalar @matches if @matches;
+				}
+			}
+			@choices = sort {$choice_nums{$b} <=> $choice_nums{$a}} @choices;
+		}
 		foreach my $word_choice (@choices) {
 			my $button = Gtk2::Button->new($word_choice);
 			$button->signal_connect(
@@ -698,7 +711,8 @@ sub interpret_config {
 		"choicesep" => [$STRING],
 		"group" => [],
 		"endgroup" => [],
-		"diacritics" => [$STRING]
+		"diacritics" => [$STRING],
+		"targetdiacritics" => [$STRING]
 	);
 	my $in_group = 0;
 	foreach my $cmd (@$config_list) {
@@ -825,6 +839,13 @@ sub interpret_config {
 				foreach (1..$#$cmd) {
 					push @{$config{"diacritics"}}, $cmd->[$_]->{"value"};
 				}
+			} elsif ($cmd->[0]->{"value"} eq "targetdiacritics") {
+				if (!exists $config{"targetdiacritics"}) {
+					$config{"targetdiacritics"} = [];
+				}
+				foreach (1..$#$cmd) {
+					push @{$config{"targetdiacritics"}}, $cmd->[$_]->{"value"};
+				}
 			} elsif ($cmd->[0]->{"value"} eq "split") {
 				$config{"split"} = $cmd->[1]->{"value"};
 			} elsif ($cmd->[0]->{"value"} eq "beforeword") {
@@ -1858,6 +1879,18 @@ There are quite advanced Unicode algorithms that could be used to compare words
 while ignoring diacritics, but I do not know if it would be possible to use any
 of those with the current way this engine works.
 
+=item B<targetdiacritics> <diacritic> [...]
+
+This was only added to simplify transliteration from Hindi to Urdu with the
+same database. When this is set, the choices in the
+L<word choice window|/"WORD CHOICE WINDOW"> are sorted in descending order
+based on the number of diacritics from this list that are matched in each
+choice. This is so that when transliterating from Hindi to Urdu, the choice
+with the most diacritics is always at the top.
+
+The attentive reader will notice at this point that most of the features
+in this program were added specifically for dealing with Urdu and Hindi.
+
 =back
 
 =head1 BUGS

	transliterate Transliteration engine
	git clone git://lumidify.org/transliterate.git
	Log \| Files \| Refs \| README \| LICENSE