transliterate

Transliteration engine
git clone git://lumidify.org/transliterate.git
Log | Files | Refs | README | LICENSE

commit f21abfb2d751e7d2c228cae1a6c40e69609a7174
parent 05eced686bac4dfb799896f89493c76a3337c602
Author: lumidify <nobody@lumidify.org>
Date:   Fri,  3 Apr 2020 20:23:18 +0200

Misc. cleanup

Diffstat:
Mtransliterate.pl | 200+++++++++++++++++++++++++++++--------------------------------------------------
1 file changed, 73 insertions(+), 127 deletions(-)

diff --git a/transliterate.pl b/transliterate.pl @@ -126,15 +126,10 @@ sub prompt_unknown_word { }, $window); $hbox->pack_start($label, FALSE, FALSE, 0); $hbox->pack_start($text, TRUE, TRUE, 10); - $label->show; - $text->show; $vbox->pack_start($hbox, FALSE, FALSE, 10); - $hbox->show; $hbox = Gtk2::HBox->new(FALSE, 0); $hbox->pack_start($button, FALSE, FALSE, 0); - $button->show; $vbox->pack_start($hbox, FALSE, FALSE, 0); - $hbox->show; }; $make_context_box->($contextl, $contextr, "Context: "); $make_context_box->($contextl_orig, $contextr_orig, "Original: "); @@ -142,7 +137,6 @@ sub prompt_unknown_word { my $hbox = Gtk2::HBox->new(FALSE, 0); my $label = Gtk2::Label->new("Ignore: "); $hbox->pack_start($label, FALSE, FALSE, 0); - $label->show; my $button = Gtk2::Button->new("This run"); $button->signal_connect( clicked => sub { @@ -150,7 +144,6 @@ sub prompt_unknown_word { $window->destroy; }, $window); $hbox->pack_start($button, FALSE, FALSE, 0); - $button->show; $button = Gtk2::Button->new("Permanently"); $button->signal_connect( clicked => sub { @@ -158,9 +151,7 @@ sub prompt_unknown_word { $window->destroy; }, $window); $hbox->pack_start($button, FALSE, FALSE, 5); - $button->show; $vbox->pack_start($hbox, FALSE, FALSE, 10); - $hbox->show; # AHHHH! IT BURNS!!! THE CODE IS SO HORRIBLE! # Take note, kids - this is what happens when you keep adding @@ -169,25 +160,19 @@ sub prompt_unknown_word { $hbox = Gtk2::HBox->new(FALSE, 0); $label = Gtk2::Label->new("Add to list: "); $hbox->pack_start($label, FALSE, FALSE, 0); - $label->show; my $path_list = Gtk2::ComboBox->new_text; foreach my $path (sort keys %{$config->{"display_tables"}}) { $path_list->append_text($path); } $hbox->pack_start($path_list, FALSE, FALSE, 0); - $path_list->show; $vbox->pack_start($hbox, FALSE, FALSE, 10); - $hbox->show; $hbox = Gtk2::HBox->new(FALSE, 0); $label = Gtk2::Label->new("Replacement: "); $hbox->pack_start($label, FALSE, FALSE, 0); - $label->show; my $replace_entry = Gtk2::Entry->new; $hbox->pack_start($replace_entry, TRUE, TRUE, 0); - $replace_entry->show; $vbox->pack_start($hbox, FALSE, FALSE, 0); - $hbox->show; $hbox = Gtk2::HBox->new(FALSE, 0); if (exists $config->{"diacritics"}) { @@ -202,7 +187,6 @@ sub prompt_unknown_word { $replace_entry->set_text($repl_text); }, $window); $hbox->pack_start($button, FALSE, FALSE, 0); - $button->show; } $button = Gtk2::Button->new("Add replacement"); @@ -214,9 +198,7 @@ sub prompt_unknown_word { } }, $window); $hbox->pack_start($button, FALSE, FALSE, 5); - $button->show; $vbox->pack_start($hbox, FALSE, FALSE, 5); - $hbox->show; $hbox = Gtk2::HBox->new(FALSE, 0); $button = Gtk2::Button->new("Stop processing"); @@ -226,7 +208,6 @@ sub prompt_unknown_word { $window->destroy; }, $window); $hbox->pack_start($button, FALSE, FALSE, 0); - $button->show; $button = Gtk2::Button->new("Reload config"); $button->signal_connect( @@ -235,19 +216,15 @@ sub prompt_unknown_word { $window->destroy; }, $window); $hbox->pack_start($button, FALSE, FALSE, 5); - $button->show; if ($config_error) { $label = Gtk2::Label->new("Error loading config; see terminal output for details"); $hbox->pack_start($label, FALSE, FALSE, 0); - $label->show; } $vbox->pack_start($hbox, FALSE, FALSE, 5); - $hbox->show; $window->add($vbox); - $vbox->show; - $window->show; + $window->show_all; Gtk2->main; if ($stop) { @@ -311,21 +288,17 @@ sub prompt_choose_word { my $linelabel = Gtk2::Label->new("Current line: $cur_lineno"); $vbox->pack_start($linelabel, FALSE, FALSE, 0); - $linelabel->show; my $wordlabel = Gtk2::Label->new(""); $wordlabel->set_alignment(0.0, 0.0); $vbox->pack_start($wordlabel, FALSE, FALSE, 0); - $wordlabel->show; my $undo = Gtk2::Button->new("Undo"); $vbox->pack_start($undo, FALSE, FALSE, 0); - $undo->show; $undo->set_sensitive(FALSE); my $button_vbox = Gtk2::VBox->new(FALSE, 0); $vbox->pack_start($button_vbox, FALSE, FALSE, 0); - $button_vbox->show; my $accept = Gtk2::Button->new("Accept changes?"); $vbox->pack_start($accept, FALSE, FALSE, 0); @@ -339,21 +312,12 @@ sub prompt_choose_word { $text->set_editable(FALSE); $hbox->pack_start($label, FALSE, FALSE, 0); $hbox->pack_start($text, TRUE, TRUE, 10); - $label->show; - $text->show; $vbox->pack_start($hbox, FALSE, FALSE, 10); - $hbox->show; $hbox = Gtk2::HBox->new(FALSE, 0); my $stop_button = Gtk2::Button->new("Stop processing"); $hbox->pack_start($stop_button, FALSE, FALSE, 0); - $stop_button->show; $vbox->pack_start($hbox, FALSE, FALSE, 0); - $hbox->show; - - $window->add($vbox); - $vbox->show; - $window->show; # generate the context to the left and to the right of the current word being replaced my $get_context = sub { @@ -468,10 +432,11 @@ sub prompt_choose_word { $fill_button_vbox->(); $fill_text_buffer->(); + $window->add($vbox); + $window->show_all; + $accept->hide; Gtk2->main; - if ($stop) { - die "Processing stopped at line $cur_lineno"; - } + die "Processing stopped at line $cur_lineno" if $stop; } my $ID = 0; @@ -483,15 +448,14 @@ sub parse_config { my $fh; if (!open($fh, "<", $f)) { warn "Can't open config file \"$f\"!\n"; - return undef; + return; } - my $line; my @commands; my $state = 0; my $IN_ID = 1; my $IN_STR = 2; my $cur_val = ""; - while ($line = <$fh>) { + while (my $line = <$fh>) { chomp($line); $state = 0; push(@commands, []); @@ -524,7 +488,7 @@ sub parse_config { } if ($state & $IN_STR) { warn "ERROR: Unterminated string in config:\n$line"; - return undef; + return; } elsif ($cur_val) { push(@{$commands[-1]}, {type => $ID, value => $cur_val}); $cur_val = ""; @@ -539,31 +503,36 @@ sub parse_config { return \@commands; } -# Load a file of replacement words into a hash table -sub load_table { - my ($filename, $args, $config, $revert) = @_; - my $fh; - # if the paths are relative, find their absolute location based - # on the location of the config file +# if the path is relative, find its absolute location based +# on the location of the config file +sub open_file_rel_abs { + my ($filename, $config_file) = @_; if (!file_name_is_absolute $filename) { - my $config_dir = dirname $args->{"config"}; + my $config_dir = dirname $config_file; $filename = rel2abs($filename, $config_dir); } - if (!open($fh, "<", $filename)) { - warn "Can't open table file \"$filename\"!\n"; - return undef; + my $fh; + if (!open $fh, "<", $filename) { + warn "Can't open file \"$filename\"!\n"; + return; } - my $line; - my @words; + return $fh; +} + +# Load a file of replacement words into a hash table +sub load_table { + my ($filename, $args, $config, $revert) = @_; + my $fh = open_file_rel_abs $filename, $args->{"config"}; + return if !$fh; my %table; - while ($line = <$fh>) { + while (my $line = <$fh>) { chomp $line; next if (!$line); - @words = split(/\Q$config->{tablesep}\E/, $line); + my @words = split(/\Q$config->{tablesep}\E/, $line); if (@words != 2) { warn "ERROR: Malformed line in file \"$filename\":\n$line\n"; close $fh; - return undef; + return; } my $word; my $replacement; @@ -598,24 +567,14 @@ sub load_table { # Load table for words to ignore - only the keys matter, since there is no replacement sub load_ignore_table { my ($filename, $args) = @_; - my $line; + my $fh = open_file_rel_abs $filename, $args->{"config"}; + return if !$fh; my %table; - if (!file_name_is_absolute $filename) { - my $config_dir = dirname $args->{"config"}; - $filename = rel2abs($filename, $config_dir); - } - my $fh; - if (!open($fh, "<", $filename)) { - warn "Can't open ignore file \"$filename\"!\n"; - return undef;; - } - while ($line = <$fh>) { + while (my $line = <$fh>) { chomp $line; - if ($line) { - $table{NFD($line)} = ""; - } + $table{NFD($line)} = "" if $line; } - close($fh); + close $fh; return \%table; } @@ -625,7 +584,6 @@ sub load_ignore_table { # 1 - everything's fine sub expand_table { my ($table, $forms, $noroot, $config) = @_; - my %new_table; foreach my $word (keys %$table) { foreach my $ending (keys %$forms) { @@ -641,17 +599,15 @@ sub expand_table { } $new_table{$word . $ending} = join($config->{choicesep}, @word_options); } - if (!$noroot) { - $new_table{$word} = $table->{$word}; - } + $new_table{$word} = $table->{$word} if !$noroot; } return \%new_table; } # Check if the number and types of arguments given to a config command are right # Returns: -# 0 - an error occurred -# 1 - everything's fine +# undef - the arguments don't match +# 1 - the arguments match sub check_args { my ($args, $cmd) = @_; my $cmd_name = $cmd->[0]->{"value"}; @@ -661,7 +617,7 @@ sub check_args { $err .= " " . $arg->{"value"} } warn "$err\n"; - return 0; + return; } my $arg_num = 0; while ($arg_num <= $#$args) { @@ -678,7 +634,7 @@ sub check_args { $err .= " STRING" if ($arg->{"type"} == $STRING); } warn "$err\n"; - return 0; + return; } $arg_num++; } @@ -738,15 +694,14 @@ sub interpret_config { foreach my $cmd (@$config_list) { # All load statements must be before expand statements # All expand, beforeword, and afterword statements must be before replace statements + my $cmd_name = $cmd->[0]->{"value"}; if ($cmd->[0]->{"type"} == $ID) { if (!exists($mandatory_args{$cmd->[0]->{"value"}})) { warn "ERROR: Unknown command \"" . $cmd->[0]->{"value"} . "\" in config\n"; - return undef; - } - if (!check_args($mandatory_args{$cmd->[0]->{"value"}}, $cmd)) { - return undef; + return; } - if ($cmd->[0]->{"value"} eq "table") { + return if !check_args($mandatory_args{$cmd_name}, $cmd); + if ($cmd_name eq "table") { my $table_path = $cmd->[2]->{"value"}; my %table_args; foreach (3..$#$cmd) { @@ -768,7 +723,7 @@ sub interpret_config { # this is a hash to avoid duplicates if the same file # is loaded multiple times $config{"display_tables"}->{$table_path} = 1 if !exists $table_args{"nodisplay"}; - } elsif ($cmd->[0]->{"value"} eq "expand") { + } elsif ($cmd_name eq "expand") { my $orig_table_id = $cmd->[1]->{"value"}; my $ending_table_id = $cmd->[2]->{"value"}; my $noroot = 0; @@ -780,7 +735,7 @@ sub interpret_config { return; } elsif (!exists $tables{$ending_table_id}) { warn "expand: table \"$ending_table_id\" doesn't exist\n"; - return + return; } $config{"ending_tables"}->{$ending_table_id} = $tables{$ending_table_id}; @@ -788,28 +743,28 @@ sub interpret_config { push @{$config{"expands"}->{$orig_table_id}}, [$ending_table_id, $noroot]; my $new_table = expand_table($tables{$orig_table_id}, $tables{$ending_table_id}, $noroot, \%config); - return if !defined $new_table; + return if !$new_table; $tables{$orig_table_id} = $new_table; - } elsif ($cmd->[0]->{"value"} eq "group") { + } elsif ($cmd_name eq "group") { if ($in_group) { warn "ERROR: New group started without ending last one in config\n"; - return undef; + return; } push @{$config{"replacements"}}, {"type" => "group", "words" => {}}; for (1..$#$cmd) { $config{"replacements"}->[-1]->{$cmd->[$_]->{"value"}} = 1; } $in_group = 1; - } elsif ($cmd->[0]->{"value"} eq "endgroup") { + } elsif ($cmd_name eq "endgroup") { if (!$in_group) { warn "ERROR: endgroup command called while not in group\n"; - return undef; + return; } $in_group = 0; - } elsif ($cmd->[0]->{"value"} eq "match") { + } elsif ($cmd_name eq "match") { if ($in_group) { warn "ERROR: match command is invalid inside group\n"; - return undef; + return; } push @{$config{"replacements"}}, { "type" => "match", @@ -819,24 +774,24 @@ sub interpret_config { # add optional arguments as keys in replacement config $config{"replacements"}->[-1]->{$cmd->[$_]->{"value"}} = 1; } - } elsif ($cmd->[0]->{"value"} eq "matchignore") { + } elsif ($cmd_name eq "matchignore") { if ($in_group) { warn "ERROR: matchignore command is invalid inside group\n"; - return undef; + return; } push @{$config{"replacements"}}, {"type" => "match", "search" => NFD($cmd->[1]->{"value"})}; for (2..$#$cmd) { $config{"replacements"}->[-1]->{$cmd->[$_]->{"value"}} = 1; } - } elsif ($cmd->[0]->{"value"} eq "replace") { + } elsif ($cmd_name eq "replace") { if (!$in_group) { warn "ERROR: replace command called while not in group\n"; - return undef; + return; } my $table = $cmd->[1]->{"value"}; if (!exists($tables{$table})) { warn "ERROR: nonexistent table \"$table\" in replace statement.\n"; - return undef; + return; } # make a list of all replacements that are affected by this @@ -852,40 +807,31 @@ sub interpret_config { # loaded a table anyways my $trie_root = $config{"replacements"}->[-1]->{"words"}; add_to_trie($table, $trie_root, $tables{$table}, $args, \%config); - } elsif ($cmd->[0]->{"value"} eq "diacritics") { - if (!exists $config{"diacritics"}) { - $config{"diacritics"} = []; + } elsif ($cmd_name eq "diacritics" || $cmd_name eq "targetdiacritics") { + if (!exists $config{$cmd_name}) { + $config{$cmd_name} = []; } foreach (1..$#$cmd) { - push @{$config{"diacritics"}}, $cmd->[$_]->{"value"}; - } - } elsif ($cmd->[0]->{"value"} eq "targetdiacritics") { - if (!exists $config{"targetdiacritics"}) { - $config{"targetdiacritics"} = []; + push @{$config{$cmd_name}}, $cmd->[$_]->{"value"}; } - foreach (1..$#$cmd) { - push @{$config{"targetdiacritics"}}, $cmd->[$_]->{"value"}; - } - } elsif ($cmd->[0]->{"value"} eq "split") { + } elsif ($cmd_name eq "split") { $config{"split"} = $cmd->[1]->{"value"}; - } elsif ($cmd->[0]->{"value"} eq "beforeword") { + } elsif ($cmd_name eq "beforeword") { $config{"beforeword"} = $cmd->[1]->{"value"}; - } elsif ($cmd->[0]->{"value"} eq "afterword") { + } elsif ($cmd_name eq "afterword") { $config{"afterword"} = $cmd->[1]->{"value"}; - } elsif ($cmd->[0]->{"value"} eq "tablesep") { + } elsif ($cmd_name eq "tablesep") { $config{"tablesep"} = $cmd->[1]->{"value"}; - } elsif ($cmd->[0]->{"value"} eq "choicesep") { + } elsif ($cmd_name eq "choicesep") { $config{"choicesep"} = $cmd->[1]->{"value"}; - } elsif ($cmd->[0]->{"value"} eq "ignore") { + } elsif ($cmd_name eq "ignore") { $config{"ignore"} = $cmd->[1]->{"value"}; my $table = load_ignore_table $cmd->[1]->{"value"}, $args; - if (!$table) { - return undef; - } + return if !$table; $config{"ignore_words"} = $table; } else { - warn "ERROR: unknown command \"" . $cmd->[0]->{"value"} . "\" in config.\n"; - return undef; + warn "ERROR: unknown command \"" . $cmd_name . "\" in config.\n"; + return; } } else { my $err = "ERROR: line does not start with command:\n"; @@ -893,16 +839,16 @@ sub interpret_config { $err .= $cmd_part->{"value"}; } warn "$err\n"; - return undef; + return; } } if ($in_group) { warn "ERROR: unclosed group in config\n"; - return undef; + return; } if (!$config{"ignore"}) { warn "ERROR: no file of words to ignore specified.\n"; - return undef; + return; } return \%config; } @@ -914,7 +860,7 @@ sub load_config { my $args = shift; my $config_list = parse_config($args->{"config"}); if (!$config_list) { - return undef; + return; } return interpret_config $config_list, $args; }