commit 229f872be7b4a68a74d1c9924845438968190809
parent 49357ed325d344e830564abdaacb61976e1fca00
Author: lumidify <nobody@lumidify.org>
Date: Tue, 31 Mar 2020 17:40:10 +0200
Modify diacritic handling
Diffstat:
2 files changed, 38 insertions(+), 41 deletions(-)
diff --git a/tests/data/words1.txt b/tests/data/words1.txt
@@ -8,3 +8,4 @@ word6,word6_replaced
word7,word7_replaced
word8,word8_replaced
word9,word9_replaced
+wörd0,word0_replaced|word0_replaced2
diff --git a/transliterate.pl b/transliterate.pl
@@ -162,33 +162,45 @@ sub prompt_unknown_word {
$vbox->pack_start($hbox, FALSE, FALSE, 10);
$hbox->show;
+ # AHHHH! IT BURNS!!! THE CODE IS SO HORRIBLE!
+ # Take note, kids - this is what happens when you keep adding
+ # features without rethinking your basic design.
+
+ # declare this here so it can already be used
+ my $path_list;
if (exists $config->{"diacritics"}) {
$hbox = Gtk2::HBox->new(FALSE, 0);
- my $no_diacritic_label;
- my $stripped;
- my $accept_button = Gtk2::Button->new("Accept?");
+ my $orig_entry;
+ my $repl_entry;
+ my $accept_button = Gtk2::Button->new("Add to table");
$accept_button->signal_connect(
clicked => sub {
- $action = ["replace", @$stripped];
- $window->destroy;
+ if ($path_list->get_active != -1) {
+ $action = ["add", $orig_entry->get_text, $repl_entry->get_text, $path_list->get_active_text];
+ $window->destroy;
+ }
}, $window);
$button = Gtk2::Button->new("Retry without diacritics");
$button->signal_connect(
clicked => sub {
- $stripped = replace_strip_diacritics($config, $word);
- my $tmp = "";
+ my $stripped = replace_strip_diacritics($config, $word);
+ my $repl_text = "";
foreach (@$stripped) {
- $tmp .= $_->[1];
+ $repl_text .= $_->[1];
}
- $no_diacritic_label->set_text($tmp);
+ $repl_entry->set_text($repl_text);
+ $orig_entry->set_text($word);
+ $repl_entry->show;
+ $orig_entry->show;
$accept_button->show;
}, $window);
$hbox->pack_start($button, FALSE, FALSE, 0);
$button->show;
$hbox->pack_start($accept_button, FALSE, FALSE, 0);
- $no_diacritic_label = Gtk2::Label->new("");
- $hbox->pack_start($no_diacritic_label, FALSE, FALSE, 10);
- $no_diacritic_label->show;
+ $orig_entry = Gtk2::Entry->new;
+ $repl_entry = Gtk2::Entry->new;
+ $hbox->pack_start($orig_entry, TRUE, TRUE, 10);
+ $hbox->pack_start($repl_entry, TRUE, TRUE, 10);
$vbox->pack_start($hbox, FALSE, FALSE, 0);
$hbox->show;
}
@@ -197,7 +209,7 @@ sub prompt_unknown_word {
$label = Gtk2::Label->new("Add to list: ");
$hbox->pack_start($label, FALSE, FALSE, 0);
$label->show;
- my $path_list = Gtk2::ComboBox->new_text;
+ $path_list = Gtk2::ComboBox->new_text;
foreach my $path (sort keys %{$config->{"display_tables"}}) {
$path_list->append_text($path);
}
@@ -896,7 +908,7 @@ sub load_config {
# 1 - the current line needs to be re-transliterated with the new config
# 2 - an error occurred while reloading the config
sub handle_unknown_word_action {
- my ($substrings, $index, $action, $config, $args) = @_;
+ my ($action, $config, $args) = @_;
if ($action->[0] eq "ignore") {
$config->{"ignore_words"}->{$action->[2]} = "";
if ($action->[1] eq "permanent") {
@@ -954,10 +966,7 @@ sub handle_unknown_word_action {
} else {
return 2;
}
- } elsif ($action->[0] eq "replace") {
- splice @$substrings, $index, 1, @{$action}[1..$#$action];
}
- return 0;
}
# Split $substrings based on the "split" regex in $config.
@@ -1226,14 +1235,11 @@ sub replace_line {
# $args - the command line args
# $cur_lineno - display string to show the user the current line number
# Returns:
-# -1 - all done
-# anything else - the substrings must be replaced again starting at the returned index
+# 1 - the line needs to be re-transliterated
+# 0 - all done
sub get_unknown_words {
my ($substrings, $config, $args, $cur_lineno) = @_;
- my $i = 0;
- # this is done so $substrings can be modified during the loop
- # (instead of just foreach)
- while ($i <= $#$substrings) {
+ foreach my $i (0 .. $#$substrings) {
my $word = $substrings->[$i];
if (!$word->[0] && !exists($config->{"ignore_words"}->{$word->[1]})) {
my $contextl = "";
@@ -1254,23 +1260,21 @@ sub get_unknown_words {
);
# if $ret == 2, config could not be loaded
# if $ret == 1, line must be redone with new config
- my $ret = handle_unknown_word_action($substrings, $i, $action, $config, $args);
+ my $ret = handle_unknown_word_action($action, $config, $args);
# keep retrying until the user chooses an action which
# didn't throw an error
while ($ret == 2) {
$action = prompt_unknown_word($contextl, $contextl_orig,
$word->[1], $contextr, $contextr_orig,
$config, "$cur_lineno", 1);
- $ret = handle_unknown_word_action($substrings, $i, $action, $config, $args);
+ $ret = handle_unknown_word_action($action, $config, $args);
}
# re-transliterate the line with the new config
- if ($ret == 1) {
- return $i;
- }
+ return 1 if $ret == 1;
}
$i++;
}
- return -1;
+ return 0;
}
# Main replacement function
@@ -1292,18 +1296,8 @@ sub replace {
if (!$args->{"nounknowns"}) {
# re-transliterate the string if the config was reloaded
- my $start_index = 0;
- while ($start_index != -1) {
- $start_index = get_unknown_words($substrings, $config, $args, "$./$total_lines", $start_index);
- if ($start_index != -1) {
- my $str = "";
- foreach ($start_index..$#$substrings) {
- $str .= $substrings->[$_]->[2];
- }
- my $new_substrings = replace_line($config, $str);
- splice @$substrings, $start_index;
- push @$substrings, @$new_substrings;
- }
+ while (get_unknown_words($substrings, $config, $args, "$./$total_lines")) {
+ $substrings = replace_line($config, $nfd_line);
}
} elsif ($args->{"debug"}) {
foreach my $s (@$substrings) {
@@ -1849,6 +1843,8 @@ on-the-fly replacing doesn't work.
In general, I have tested the GUI code much less than the rest since you can't
really test it automatically very well.
+The diacritic handling code is very rudimentary.
+
Tell me if you find any bugs.
=head1 SEE ALSO