commit 9bece82fb8385f9b72a49e7c2b98a9b4f2006182
parent 32b57ea48795b5d6a406fcf15e3543d47618b666
Author: lumidify <nobody@lumidify.org>
Date: Wed, 8 Apr 2020 08:00:04 +0200
Add comment option for text files; default to STDIN for input file
Diffstat:
3 files changed, 36 insertions(+), 17 deletions(-)
diff --git a/tests/test6/config b/tests/test6/config
@@ -1,6 +1,7 @@
split "[ \n]+"
beforeword " "
afterword "[ \n]"
+comment "#"
ignore "../data/ignore.txt"
table words "../data/words.txt"
diff --git a/tests/test6/input.txt b/tests/test6/input.txt
@@ -1,3 +1,4 @@
+word1#sfsafafasfs#sdfdsfsfs
word1 word2
123word1 word9 123 word4
word20 word01231 word0
diff --git a/transliterate.pl b/transliterate.pl
@@ -469,7 +469,7 @@ sub parse_config {
$state = 0;
push(@commands, []);
foreach my $char (split(//, $line)) {
- if ($char eq "#") {
+ if ($char eq "#" && !($state & $IN_STR)) {
last;
} elsif ($char eq '"') {
if ($state & $IN_STR) {
@@ -698,6 +698,7 @@ sub interpret_config {
"afterword" => [$STRING],
"tablesep" => [$STRING],
"choicesep" => [$STRING],
+ "comment" => [$STRING],
"group" => [],
"endgroup" => [],
"retrywithout" => [$STRING, $STRING],
@@ -837,7 +838,7 @@ sub interpret_config {
}
} elsif ($cmd_name eq "split" || $cmd_name eq "beforeword" ||
$cmd_name eq "afterword" || $cmd_name eq "tablesep" ||
- $cmd_name eq "choicesep") {
+ $cmd_name eq "choicesep" || $cmd_name eq "comment") {
$config{$cmd_name} = $cmd->[1]->{"value"};
} elsif ($cmd_name eq "ignore") {
$config{"ignore"} = $cmd->[1]->{"value"};
@@ -1272,15 +1273,12 @@ sub get_unknown_words {
# to the file handle $outputfh, prompting the user for unknown words or
# word choices (if those aren't disabled on the command line)
sub replace {
- my ($config, $args, $outputfh) = @_;
- # Is there *really* no more efficient way to get the total number of lines?
- open my $fh, "<", $args->{"input"} or die "ERROR: Cannot open input file \"$args->{input}\" for reading.\n";
- my $total_lines = 0;
- while (<$fh>) {$total_lines++};
- close $fh;
- open $fh, "<", $args->{"input"} or die "ERROR: Cannot open input file \"$args->{input}\" for reading.\n";
- while (my $line = <$fh>) {
+ my ($config, $args, $total_lines, $inputfh, $outputfh) = @_;
+ while (my $line = <$inputfh>) {
next if $. < $args->{"start"};
+ if (exists $config->{"comment"}) {
+ $line =~ s/\Q$config->{comment}\E.*$//;
+ }
my $nfd_line = NFD($line);
my $substrings = replace_line($config, $nfd_line);
@@ -1311,7 +1309,6 @@ sub replace {
print $outputfh $_->[1];
}
}
- close $fh;
}
my %args = ("config" => "config", "start" => 1, "errors" => "", "output" => "");
@@ -1324,7 +1321,7 @@ GetOptions(
"checkduplicates") or pod2usage(1);
pod2usage(-exitval => 0, -verbose => 2) if $args{"help"};
-pod2usage(1) if $#ARGV != 0 && !$args{"checkduplicates"};
+pod2usage(-exitval => 1, -verbose => 1) if @ARGV > 1;
if (!-f $args{"config"}) {
die "ERROR: config file \"$args{config}\" does not exist or is not a file.\n";
@@ -1335,11 +1332,19 @@ if (!$config) {
}
exit 0 if ($args{"checkduplicates"});
-my $input = $ARGV[0];
-if (!-f $input) {
- die "ERROR: input file \"$input\" does not exist or is not a file.\n";
+my $inputfh;
+my $total_lines = "UNKNOWN";
+if (@ARGV < 1) {
+ warn "WARNING: no input file supplied; taking input from STDIN\n";
+ $inputfh = \*STDIN;
+} else {
+ open $inputfh, "<", $ARGV[0] or die "ERROR: Cannot open input file \"$ARGV[0]\" for reading.\n";
+ # Is there *really* no more efficient way to get the total number of lines?
+ $total_lines = 0;
+ while (<$inputfh>) {$total_lines++};
+ close $inputfh;
+ open $inputfh, "<", $ARGV[0] or die "ERROR: Cannot open input file \"$ARGV[0]\" for reading.\n";
}
-$args{"input"} = $input;
if (-f $args{"errors"} && !$args{"force"}) {
my $choice = "";
@@ -1373,7 +1378,8 @@ if ($args{"output"} eq "") {
open $outputfh, ">", $args{"output"} or die "ERROR: cannot open \"$args{output}\" for writing.\n";
}
-replace($config, \%args, $outputfh);
+replace($config, \%args, $total_lines, $inputfh, $outputfh);
+close $inputfh;
close $outputfh;
__END__
@@ -1387,6 +1393,7 @@ transliterate.pl - Transliterate text files
transliterate.pl [options][input file]
Start the transliteration engine with the given file as input.
+The input file defaults to STDIN if no filename is given.
=head1 OPTIONS
@@ -1762,6 +1769,16 @@ prompting the user.
B<Default:> C<$>
+=item B<comment> <string>
+
+If enabled, anything after C<< <string> >> will be ignored on all lines in
+the input file.
+
+Note that this is really just a "dumb replacement", so there's no way to
+prevent a line with the comment character from being replaced. Just try
+to always set this to a character that does not occur anywhere in the text
+(or don't use the option at all).
+
=item B<ignore> <filename>
Sets the file of words to ignore.