commit 49357ed325d344e830564abdaacb61976e1fca00
parent d08734f2e255b3f84a1e3f62df38e404800650c9
Author: lumidify <nobody@lumidify.org>
Date: Tue, 31 Mar 2020 16:43:21 +0200
Clean up a bit
Diffstat:
1 file changed, 14 insertions(+), 33 deletions(-)
diff --git a/transliterate.pl b/transliterate.pl
@@ -960,17 +960,17 @@ sub handle_unknown_word_action {
return 0;
}
-# FIXME: This only splits off "lone" split characters or those at the border to a
-# transliterated block, in oder to keep compound words together for replace. The
-# cruft needs to be removed at some point.
-# Split $substrings into single words based on the "split" option
-# in $config.
+# Split $substrings based on the "split" regex in $config.
+# This only marks "lone" split characters or split characters at a
+# border between transliterated and untransliterated blocks as
+# transliterated in order to keep compound words together for
+# `prompt_unknown_word`.
# $substrings can already be split at this point; only the
# ones that haven't been transliterated yet are modified
sub split_words {
my ($config, $substrings) = @_;
- # FIXME: is it more efficient to pre-compile with \A and \z individually?
- my $split_re = qr/($config->{"split"})/;
+ my $split_pre = qr/\A($config->{"split"})/;
+ my $split_post = qr/($config->{"split"})\z/;
my @substrings_new;
#FIXME: cleanup
foreach my $cur_substr (@$substrings) {
@@ -979,12 +979,12 @@ sub split_words {
next;
}
my $str = $cur_substr->[1];
- if ($str =~ /\A$split_re/) {
+ if ($str =~ /$split_pre/) {
push @substrings_new, [1, $1, $1];
$str = substr $str, length($1);
}
next if $str eq "";
- if ($str =~ /$split_re\z/) {
+ if ($str =~ /$split_post/) {
$str = substr $str, 0, -length($1);
push @substrings_new, [0, $str, $str];
push @substrings_new, [1, $1, $1];
@@ -993,30 +993,6 @@ sub split_words {
}
}
@$substrings = @substrings_new;
-=pod
- # FIXME: this is *probably* not needed anymore
- my @substrings_new;
- foreach my $cur_substr (@$substrings) {
- if ($cur_substr->[0] == 1) {
- push(@substrings_new, $cur_substr);
- next;
- }
-
- my @words = split(/$split_re/, $cur_substr->[1]);
- for my $i (0..$#words) {
- # Word is not delimiter
- # Split produces an empty field at the beginning if the string
- # starts with the delimiter
- if ($i % 2 == 0) {
- push(@substrings_new, [0, $words[$i], $words[$i]]) if ($words[$i] ne '');
- } else {
- # Delimiters can count as already replaced
- push(@substrings_new, [1, $words[$i], $words[$i]]);
- }
- }
- }
- @$substrings = @substrings_new;
-=cut
}
# small helper function to add a untransliterated string to the last substring
@@ -1239,6 +1215,11 @@ sub replace_line {
# added and just that word is selected to ignore, you never get a chance to add a
# replacement for the other word that it is attached to
+# NOTE: This is very ugly code. The GUI code is the worst, but this whole part
+# of the program is nasty. This is partially due to the fact that features kept
+# being added when their use was discovered. This problem might be fixed in the
+# future when I have time to rewrite all of this.
+
# Handle unknown words
# $substrings - the current substrings with unknown words
# $config - the program config