transliterate

Transliteration engine
git clone git://lumidify.org/transliterate.git (fast, but not encrypted)
git clone https://lumidify.org/git/transliterate.git (encrypted, but very slow)
Log | Files | Refs | README | LICENSE

commit 30989566256dc25ac95db7b9ccaefd4a400cad2c
parent a30baf4b4f41cf66cc9c7e9e9507291f698ac83c
Author: lumidify <nobody@lumidify.org>
Date:   Tue,  9 Mar 2021 08:59:56 +0100

Add dumptables option

Diffstat:
A.gitignore | 2++
Mtransliterate.pl | 23+++++++++++++++++++++--
2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +ur.aff +ur.dic diff --git a/transliterate.pl b/transliterate.pl @@ -943,6 +943,16 @@ sub interpret_config { warn "ERROR: no file of words to ignore specified.\n"; return; } + if ($args->{"dumptables"}) { + foreach my $table_id (keys %tables) { + my $table_path = $table_id_to_path{$table_id}; + if ($config{"display_tables"}->{$table_path}) { + for my $word (keys %{$tables{$table_id}}) { + print NFC($word) . "\n"; + } + } + } + } return \%config; } @@ -1452,7 +1462,7 @@ GetOptions( "force", "start=i", "output=s", "config=s", "errors=s", "help", - "checkduplicates") or pod2usage(1); + "checkduplicates", "dumptables") or pod2usage(1); pod2usage(-exitval => 0, -verbose => 2) if $args{"help"}; pod2usage(-exitval => 1, -verbose => 1) if @ARGV > 1; @@ -1464,7 +1474,7 @@ my $config = load_config \%args; if (!$config) { die "ERROR: Invalid config\n"; } -exit 0 if ($args{"checkduplicates"}); +exit 0 if ($args{"checkduplicates"} || $args{"dumptables"}); my $inputfh; my $total_lines = "UNKNOWN"; @@ -1572,6 +1582,15 @@ On that note, before duplicates are checked between tables in the same replacement group, duplicates inside the same file are already replaced, so that might be a bit confusing as well. +=item B<--dumptables> + +Prints the words of all tables that don't have B<nodisplay> set. + +This is mainly meant to be used for generating word lists in order to +use them in a spell checker. Note that the words printed here are in +UTF-8 NFC (Unicode Canonical Composition Form), so it may not be ideal +when the spellchecked text is not in the same form. + =item B<--nochoices> Disables prompting for the right word when multiple replacement words exist.