transliterate

Transliteration engine
git clone git://lumidify.org/transliterate.git
Log | Files | Refs | README | LICENSE

commit 05eced686bac4dfb799896f89493c76a3337c602
parent a6c5da06d240b5739b8732bb4e8ffc7f40168b1f
Author: lumidify <nobody@lumidify.org>
Date:   Thu,  2 Apr 2020 14:28:14 +0200

Update documentation for targetdiacritics

Diffstat:
Mtransliterate.pl | 24++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/transliterate.pl b/transliterate.pl @@ -21,7 +21,7 @@ use Scalar::Util qw(weaken); use File::Basename qw(dirname); use File::Spec::Functions qw(rel2abs file_name_is_absolute); -# takes a string of words separated by '$' and returns a new string in the +# takes a string of words separated by '$config->{choicesep}' and returns a new string in the # same format with duplicates removed sub get_unique_words { my ($word, $config) = @_; @@ -31,7 +31,7 @@ sub get_unique_words { } # Adds all words in $words to $trie -# Automatically combines duplicate words with "$" inbetween +# Automatically combines duplicate words with "$config->{choicesep}" inbetween sub add_to_trie { my ($table_name, $trie, $words, $args, $config) = @_; foreach my $word (keys %$words) { @@ -262,7 +262,7 @@ sub prompt_unknown_word { return $action; } -# Prompt the user when a word has multiple replacement options (separated by $) +# Prompt the user when a word has multiple replacement options (separated by $config->{choicesep}) # $cur_lineno - display string to show the current line number sub prompt_choose_word { my ($substrings, $cur_lineno, $config) = @_; @@ -271,6 +271,10 @@ sub prompt_choose_word { my @replacements; foreach (0..$#$substrings) { if ($substrings->[$_]->[1] =~ /\Q$config->{choicesep}\E/) { + # This ugly bit of code is here as a special case for transliterating + # Hindi to Urdu text - if there are *exactly* two choices and one + # contains diacritics but the other one doesn't, the one with diacritics + # is automatically used instead of prompting the user. if (exists $config->{"targetdiacritics"}) { my @choices = split /\Q$config->{choicesep}\E/, $substrings->[$_]->[1]; my @diacritics = @{$config->{"targetdiacritics"}}; @@ -1904,8 +1908,20 @@ based on the number of diacritics from this list that are matched in each choice. This is so that when transliterating from Hindi to Urdu, the choice with the most diacritics is always at the top. +Additionally, if there are I<exactly> two choices for a word and one of +them contains diacritics but the other one doesn't, the one containing +diacritics is automatically taken without ever prompting the user. This +is, admittedly, a very language-specific feature, but I couldn't think of +a simple way of adding it without building it directly into the actual program. + +Note that due to the way this is implemented, it will not take any effect +if B<--nochoices> is enabled. + The attentive reader will notice at this point that most of the features -in this program were added specifically for dealing with Urdu and Hindi. +in this program were added specifically for dealing with Urdu and Hindi, +which does appear to make sense, considering that this program was written +specifically for transliterating Urdu to Hindi and vice versa (although +not quite as much vice versa). =back