commit 229f872be7b4a68a74d1c9924845438968190809
parent 49357ed325d344e830564abdaacb61976e1fca00
Author: lumidify <nobody@lumidify.org>
Date:   Tue, 31 Mar 2020 17:40:10 +0200
Modify diacritic handling
Diffstat:
2 files changed, 38 insertions(+), 41 deletions(-)
diff --git a/tests/data/words1.txt b/tests/data/words1.txt
@@ -8,3 +8,4 @@ word6,word6_replaced
 word7,word7_replaced
 word8,word8_replaced
 word9,word9_replaced
+wörd0,word0_replaced|word0_replaced2
diff --git a/transliterate.pl b/transliterate.pl
@@ -162,33 +162,45 @@ sub prompt_unknown_word {
 	$vbox->pack_start($hbox, FALSE, FALSE, 10);
 	$hbox->show;
 
+	# AHHHH! IT BURNS!!! THE CODE IS SO HORRIBLE!
+	# Take note, kids - this is what happens when you keep adding
+	# features without rethinking your basic design.
+
+	# declare this here so it can already be used
+	my $path_list;
 	if (exists $config->{"diacritics"}) {
 		$hbox = Gtk2::HBox->new(FALSE, 0);
-		my $no_diacritic_label;
-		my $stripped;
-		my $accept_button = Gtk2::Button->new("Accept?");
+		my $orig_entry;
+		my $repl_entry;
+		my $accept_button = Gtk2::Button->new("Add to table");
 		$accept_button->signal_connect(
 			clicked => sub {
-				$action = ["replace", @$stripped];
-				$window->destroy;
+				if ($path_list->get_active != -1) {
+					$action = ["add", $orig_entry->get_text, $repl_entry->get_text, $path_list->get_active_text];
+					$window->destroy;
+				}
 			}, $window);
 		$button = Gtk2::Button->new("Retry without diacritics");
 		$button->signal_connect(
 			clicked => sub {
-				$stripped = replace_strip_diacritics($config, $word);
-				my $tmp = "";
+				my $stripped = replace_strip_diacritics($config, $word);
+				my $repl_text = "";
 				foreach (@$stripped) {
-					$tmp .= $_->[1];
+					$repl_text .= $_->[1];
 				}
-				$no_diacritic_label->set_text($tmp);
+				$repl_entry->set_text($repl_text);
+				$orig_entry->set_text($word);
+				$repl_entry->show;
+				$orig_entry->show;
 				$accept_button->show;
 			}, $window);
 		$hbox->pack_start($button, FALSE, FALSE, 0);
 		$button->show;
 		$hbox->pack_start($accept_button, FALSE, FALSE, 0);
-		$no_diacritic_label = Gtk2::Label->new("");
-		$hbox->pack_start($no_diacritic_label, FALSE, FALSE, 10);
-		$no_diacritic_label->show;
+		$orig_entry = Gtk2::Entry->new;
+		$repl_entry = Gtk2::Entry->new;
+		$hbox->pack_start($orig_entry, TRUE, TRUE, 10);
+		$hbox->pack_start($repl_entry, TRUE, TRUE, 10);
 		$vbox->pack_start($hbox, FALSE, FALSE, 0);
 		$hbox->show;
 	}
@@ -197,7 +209,7 @@ sub prompt_unknown_word {
 	$label = Gtk2::Label->new("Add to list: ");
 	$hbox->pack_start($label, FALSE, FALSE, 0);
 	$label->show;
-	my $path_list = Gtk2::ComboBox->new_text;
+	$path_list = Gtk2::ComboBox->new_text;
 	foreach my $path (sort keys %{$config->{"display_tables"}}) {
 		$path_list->append_text($path);
 	}
@@ -896,7 +908,7 @@ sub load_config {
 # 1 - the current line needs to be re-transliterated with the new config
 # 2 - an error occurred while reloading the config
 sub handle_unknown_word_action {
-	my ($substrings, $index, $action, $config, $args) = @_;
+	my ($action, $config, $args) = @_;
 	if ($action->[0] eq "ignore") {
 		$config->{"ignore_words"}->{$action->[2]} = "";
 		if ($action->[1] eq "permanent") {
@@ -954,10 +966,7 @@ sub handle_unknown_word_action {
 		} else {
 			return 2;
 		}
-	} elsif ($action->[0] eq "replace") {
-		splice @$substrings, $index, 1, @{$action}[1..$#$action];
 	}
-	return 0;
 }
 
 # Split $substrings based on the "split" regex in $config.
@@ -1226,14 +1235,11 @@ sub replace_line {
 # $args - the command line args
 # $cur_lineno - display string to show the user the current line number
 # Returns:
-# -1 - all done
-# anything else - the substrings must be replaced again starting at the returned index
+# 1 - the line needs to be re-transliterated
+# 0 - all done
 sub get_unknown_words {
 	my ($substrings, $config, $args, $cur_lineno) = @_;
-	my $i = 0;
-	# this is done so $substrings can be modified during the loop
-	# (instead of just foreach)
-	while ($i <= $#$substrings) {
+	foreach my $i (0 .. $#$substrings) {
 		my $word = $substrings->[$i];
 		if (!$word->[0] && !exists($config->{"ignore_words"}->{$word->[1]})) {
 			my $contextl = "";
@@ -1254,23 +1260,21 @@ sub get_unknown_words {
 			);
 			# if $ret == 2, config could not be loaded
 			# if $ret == 1, line must be redone with new config
-			my $ret = handle_unknown_word_action($substrings, $i, $action, $config, $args);
+			my $ret = handle_unknown_word_action($action, $config, $args);
 			# keep retrying until the user chooses an action which
 			# didn't throw an error
 			while ($ret == 2) {
 				$action = prompt_unknown_word($contextl, $contextl_orig,
 					$word->[1], $contextr, $contextr_orig,
 					$config, "$cur_lineno", 1);
-				$ret = handle_unknown_word_action($substrings, $i, $action, $config, $args);
+				$ret = handle_unknown_word_action($action, $config, $args);
 			}
 			# re-transliterate the line with the new config
-			if ($ret == 1) {
-				return $i;
-			}
+			return 1 if $ret == 1;
 		}
 		$i++;
 	}
-	return -1;
+	return 0;
 }
 
 # Main replacement function
@@ -1292,18 +1296,8 @@ sub replace {
 
 		if (!$args->{"nounknowns"}) {
 			# re-transliterate the string if the config was reloaded
-			my $start_index = 0;
-			while ($start_index != -1) {
-				$start_index = get_unknown_words($substrings, $config, $args, "$./$total_lines", $start_index);
-				if ($start_index != -1) {
-					my $str = "";
-					foreach ($start_index..$#$substrings) {
-						$str .= $substrings->[$_]->[2];
-					}
-					my $new_substrings = replace_line($config, $str);
-					splice @$substrings, $start_index;
-					push @$substrings, @$new_substrings;
-				}
+			while (get_unknown_words($substrings, $config, $args, "$./$total_lines")) {
+				$substrings = replace_line($config, $nfd_line);
 			}
 		} elsif ($args->{"debug"}) {
 			foreach my $s (@$substrings) {
@@ -1849,6 +1843,8 @@ on-the-fly replacing doesn't work.
 In general, I have tested the GUI code much less than the rest since you can't
 really test it automatically very well.
 
+The diacritic handling code is very rudimentary.
+
 Tell me if you find any bugs.
 
 =head1 SEE ALSO