commit bfbb35d091d986579edafe7b01ce0ce12dc1a27d
parent 0cae73baf7706b733eec0fadee2e11c7eb25b998
Author: lumidify <>
Date: Fri, 17 Dec 2021 09:11:55 +0100
Add Makefile; improve documentation
A | .gitignore | | | 1 | + |
A | CHANGELOG | | | 4 | ++++ |
A | Makefile | | | 37 | +++++++++++++++++++++++++++++++++++++ |
M | README | | | 11 | +++++++++-- |
A | lumia | | | 1403 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
D | | | | 1403 | ------------------------------------------------------------------------------- |
M | tests/README | | | 3 | +++ |
7 files changed, 1457 insertions(+), 1405 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
@@ -0,0 +1,4 @@
+1.0 -> 1.1
+* Made it work with cksum implementation that don't have '-q'
+* Added Makefile
+* Improved documentation
diff --git a/Makefile b/Makefile
@@ -0,0 +1,37 @@
+NAME = lumia
+VERSION = 1.1
+PREFIX = /usr/local
+MAN1 = ${NAME:=.1}
+${MAN1}: ${NAME}
+ pod2man ${NAME} ${MAN1}
+install: ${MAN1}
+ mkdir -p "${DESTDIR}${PREFIX}/bin"
+ cp -f ${NAME} "${DESTDIR}${PREFIX}/bin"
+ chmod 755 "${DESTDIR}${PREFIX}/bin/${NAME}"
+ mkdir -p "${DESTDIR}${MANPREFIX}/man1"
+ cp -f ${MAN1} "${DESTDIR}${MANPREFIX}/man1"
+ chmod 644 "${DESTDIR}${MANPREFIX}/man1/${MAN1}"
+ rm -f "${DESTDIR}${PREFIX}/bin/${NAME}"
+ rm -f "${DESTDIR}${MANPREFIX}/man1/${MAN1}"
+ rm -f ${MAN1}
+ rm -rf "${NAME}-${VERSION}"
+ mkdir -p "${NAME}-${VERSION}"
+ cp -rf ${NAME} ${MISCFILES} "${NAME}-${VERSION}"
+ tar cf - "${NAME}-${VERSION}" | gzip -c > "${NAME}-${VERSION}.tar.gz"
+ rm -rf "${NAME}-${VERSION}"
+.PHONY: all clean install uninstall dist
diff --git a/README b/README
@@ -1,3 +1,10 @@
-REQUIREMENTS: String::ShellQuote
+REQUIREMENTS: Perl 5, String::ShellQuote
-See the perldoc in for documentation (run perldoc -F
+lumia is meant for managing checksums of files in order to prevent bitrot.
+It does this by storing several special files in each directory to keep
+track of the checksums.
+See the perldoc in lumia for the documentation (run perldoc -F
+Alternatively, install it with 'make install' (as root; this just generates
+a man page and copies lumia and the generated man page to the appropriate
+system directories) and then run 'man lumia'.
diff --git a/lumia b/lumia
@@ -0,0 +1,1403 @@
+#!/usr/bin/env perl
+# TODO: some way to avoid writing .lumidify* in dirs but still index them?
+# TODO: store modified date and checksum files with changed date
+# TODO: add option to just check dir structure or maybe check if everything exists
+# TODO: add option to compare cksums of two dirs
+# TODO: exit status!
+use strict;
+use warnings;
+use File::Spec::Functions qw(catfile abs2rel);
+use File::Basename qw(basename dirname);
+use String::ShellQuote;
+use Pod::Usage;
+use Getopt::Long;
+# the file used to store checksums for files
+my $CKSUM_FILE = ".lumidify_archive_cksums";
+# the file used to store directory names
+my $DIR_FILE = ".lumidify_archive_dirs";
+# the file read to ignore files or directories
+my $IGNORE_FILE = ".lumidify_archive_ignore";
+# the file containing checksums of $CKSUM_FILE and $DIR_FILE
+my $DOUBLE_CKSUM_FILE = ".lumidify_archive_cksums.cksum";
+# uncomment this instead of the lines below to use
+# sha256 instead of cksum as the hash algorithm
+# Note: this isn't really tested properly
+#my $CKSUM_CMD = 'sha256 -q';
+my $CKSUM_CMD = 'cksum';
+ $CKSUM_FILE => 1,
+ $DIR_FILE => 1,
+ $IGNORE_FILE => 1,
+# escape a filename for writing into the checksum files
+sub escape_filename {
+ my $file = shift;
+ $file =~ s/\\/\\\\/g;
+ $file =~ s/"/\\"/g;
+ return $file;
+# make a generic file iterator
+# $file_func determines whether a file should be returned by the iterator
+# $dir_func is called for each directory and returns all files that
+# should be added to the queue
+sub make_file_iter {
+ my ($file_func, $dir_func, @queue) = @_;
+ return sub {
+ while (@queue) {
+ my $file = pop @queue;
+ if (-d $file) {
+ my $new_files = $dir_func->($file);
+ next if !defined $new_files;
+ push @queue, @$new_files;
+ }
+ return $file if $file_func->($file);
+ }
+ return;
+ };
+# make a basic filename iterator, which simply returns all files
+# for which $file_func returns a true value
+sub make_file_iter_basic {
+ my ($file_func, @files) = @_;
+ make_file_iter $file_func, sub {
+ my $dh;
+ if (!opendir $dh, $_[0]) {
+ warn "WARNING: Unable to open directory \"$_[0]\"!";
+ return [];
+ }
+ my @new_files = map "$_[0]/$_", grep {$_ ne "." && $_ ne ".."} readdir $dh;
+ closedir $dh;
+ return \@new_files;
+ }, @files;
+# make an interator that only returns the directories which are present
+# in the $DIR_FILE files, in addition to the files and directories that
+# were originally passed as arguments
+# note: this returns nonexistent directories if those are still
+# specified in the lumia files
+sub make_lumia_iter {
+ my ($quiet, @dirs) = @_;
+ make_file_iter sub {1}, sub {
+ my $path = "$_[0]/$DIR_FILE";
+ return [] if !-f $path;
+ my $dirs = read_file($path, {});
+ return if !defined $dirs;
+ my @new_dirs;
+ foreach my $dir (keys %$dirs) {
+ my $dir_path = "$_[0]/$dir";
+ if (!-d $dir_path) {
+ warn "ERROR: Directory \"$dir_path\" mentioned in " .
+ "\"$path\" does not exist or is not directory.\n" if !$quiet;
+ }
+ # still push it even when it doesn't exist so rmold can work properly
+ push @new_dirs, $dir_path;
+ }
+ return \@new_dirs;
+ }, @dirs;
+# remove all special lumia files from the given directory
+sub clean_files {
+ my ($dir, $args) = @_;
+ my $iter = make_file_iter_basic sub {exists $SPECIAL_FILES{basename $_[0]};}, $dir;
+ while (my $file = $iter->()) {
+ if (!unlink $file) {
+ warn "WARNING: Unable to remove file \"$file\"!\n";
+ } else {
+ print "Deleted \"$file\"\n" if !$args->{"q"};
+ }
+ }
+# read a file, processing each line with $handle_cksum_func if set
+# and writing the results into $cksums
+# $handle_cksum_func must return two values, the checksum of the
+# argument and the rest of the string (that is then parsed for
+# the filename); if it returns undef, this function also returns undef
+sub read_file {
+ my ($file, $cksums, $handle_cksum_func) = @_;
+ my $fh;
+ if (!open $fh, "<", $file) {
+ warn "ERROR: Unable to open file \"$file\": $!\n";
+ return;
+ }
+ my $in_fn = 0;
+ my $cur_cksum;
+ my $cur_str;
+ my $cur_fn = "";
+ foreach (<$fh>) {
+ next if (!$in_fn && /^$/);
+ if ($handle_cksum_func && !$in_fn) {
+ ($cur_cksum, $cur_str) = $handle_cksum_func->($_);
+ return undef if !defined $cur_cksum;
+ } else {
+ $cur_str = $_;
+ }
+ my $bs = 0;
+ foreach my $ch (split(//, $cur_str)) {
+ if ($ch eq "\\") {
+ $bs++;
+ $cur_fn .= "\\" if !($bs %= 2) && $in_fn;
+ } elsif ($bs % 2) {
+ $cur_fn .= $ch if $in_fn;
+ $bs = 0;
+ } elsif ($ch eq "\"") {
+ if ($in_fn) {
+ $in_fn = 0;
+ $cksums->{$cur_fn} = $cur_cksum;
+ $cur_fn = "";
+ last;
+ }
+ $in_fn = 1;
+ } elsif ($in_fn) {
+ $cur_fn .= $ch;
+ }
+ }
+ }
+ close $fh;
+ if ($in_fn) {
+ warn "ERROR: Unterminated filename in file \"$file\"\n";
+ return undef;
+ }
+ return $cksums;
+# read a single checksum file, writing the checksums into the hash $cksums and returning it
+sub read_cksum_file {
+ my ($file, $cksums) = @_;
+ return read_file $file, $cksums, sub {
+ my $line = shift;
+ my @fields = split(/ /, $line, $CKSUM_NUMFIELDS+1);
+ if (@fields != $CKSUM_NUMFIELDS+1) {
+ warn "WARNING: Malformed line \"$line\" in file \"$file\"\n";
+ return;
+ }
+ my $cur_cksum = join(" ", @fields[0..$CKSUM_NUMFIELDS-1]);
+ my $cur_str = $fields[$CKSUM_NUMFIELDS];
+ return ($cur_cksum, $cur_str);
+ };
+# read the checksums and directory names in $dir
+sub read_cksums {
+ my $dir = shift;
+ my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {});
+ return undef if !defined $cksums;
+ $cksums = read_file("$dir/$DIR_FILE", $cksums);
+ return undef if !defined $cksums;
+ return $cksums;
+# get the checksum output for $path
+# returns undef if $CKSUM_CMD returns an error
+sub get_cksum {
+ my $path = shift;
+ my $path_esc = shell_quote $path;
+ my $cksum_output = `$CKSUM_CMD -- $path_esc 2>&1`;
+ if ($?) {
+ warn "ERROR getting cksum for file \"$path\":\n$cksum_output";
+ return undef;
+ }
+ chomp $cksum_output;
+ my @fields = split(/ /, $cksum_output, $CKSUM_NUMFIELDS+1);
+ return join(" ", @fields[0..$CKSUM_NUMFIELDS-1]);
+# check the checksums in $dir/$cksum_file
+# if $quiet is set, only print failed files
+sub check_cksums {
+ my ($dir, $cksum_file, $quiet) = @_;
+ my $cksums = read_cksum_file("$dir/$cksum_file", {});
+ return 0 if !defined $cksums;
+ my $failed = 1;
+ foreach my $file (keys %$cksums) {
+ my $path = "$dir/$file";
+ my $output = get_cksum $path;
+ next if !defined $output;
+ if ($output eq $cksums->{$file}) {
+ print "OK $path\n" if !$quiet;
+ } else {
+ print "FAILED $path\n";
+ $failed = 0;
+ }
+ }
+ return $failed;
+# check the checksums of all files and directories in @dirs
+sub check_files {
+ my $args = shift;
+ my @dirs;
+ foreach my $file (@_) {
+ if (-d $file) {
+ push @dirs, $file;
+ next;
+ }
+ my $dir = dirname $file;
+ my $base = basename $file;
+ if (exists $SPECIAL_FILES{$base}) {
+ warn "ERROR: File is reserved for lumia: $file\n";
+ next;
+ }
+ my $cksums = read_cksum_file("$dir/$CKSUM_FILE");
+ next if !defined $cksums;
+ if (!exists $cksums->{$base}) {
+ warn "ERROR: File doesn't exist in checksums: $file\n";
+ next;
+ }
+ my $output = get_cksum "$file";
+ next if !defined $output;
+ if ($output eq $cksums->{$base}) {
+ print "OK $file\n" if !$args->{"q"};
+ } else {
+ print "FAILED $file\n";
+ }
+ }
+ my $iter = make_lumia_iter 0, @dirs;
+ while (my $file = $iter->()) {
+ check_cksums $file, $DOUBLE_CKSUM_FILE, $args->{"q"};
+ check_cksums $file, $CKSUM_FILE, $args->{"q"};
+ }
+# write the checksums of the special lumia files given as arguments
+# to $DOUBLE_CKSUM_FILE in $dir
+sub write_special_cksums {
+ my ($dir, @files) = @_;
+ my $cksum_file = "$dir/$DOUBLE_CKSUM_FILE";
+ my $cksums = {};
+ if (-f $cksum_file) {
+ $cksums = read_cksum_file $cksum_file, {};
+ }
+ return if !defined $cksums;
+ foreach my $file (@files) {
+ my $cksum_output = get_cksum("$dir/$file");
+ next if (!defined $cksum_output);
+ $cksums->{$file} = $cksum_output;
+ }
+ write_file($cksum_file, $cksums, 1);
+# search for new files that aren't present in the checksum files
+# - if $file_func is set, it is called for each new file
+# - if $before_dir_func is set, it is called before processing the
+# files in each directory that has new files OR if a directory
+# is entirely new (well, it only checks if $DOUBLE_CKSUM_FILE exists)
+# - if $after_dir_func is set, it is called after processing the
+# files in each directory that has new files
+sub check_new_files {
+ my ($top_dir, $file_func, $before_dir_func, $after_dir_func) = @_;
+ my $iter = make_file_iter sub {1}, sub {
+ my $dir = shift;
+ my $dh;
+ if (!opendir $dh, $dir) {
+ warn "ERROR: Unable to open directory \"$dir\"!";
+ return undef;
+ }
+ my $read_file_noerror = sub {
+ if (-f $_[0]) {
+ return $_[1]->($_[0], {}) // {};
+ }
+ return {};
+ };
+ my $ignore = $read_file_noerror->("$dir/$IGNORE_FILE", \&read_file);
+ my $lumia_dirs = $read_file_noerror->("$dir/$DIR_FILE", \&read_file);
+ my $lumia_files = $read_file_noerror->("$dir/$CKSUM_FILE", \&read_cksum_file);
+ my @dirs;
+ my $found = 0;
+ while (my $file = readdir $dh) {
+ next if $file eq "." || $file eq "..";
+ next if exists $ignore->{$file} || exists $SPECIAL_FILES{$file};
+ if (!exists $lumia_dirs->{$file} && !exists $lumia_files->{$file}) {
+ if (!$found && defined $before_dir_func) {
+ last if !$before_dir_func->($dir);
+ }
+ if (defined $file_func) {
+ $file_func->($dir, $file);
+ } else {
+ print "$dir/$file\n";
+ }
+ $found = 1;
+ }
+ push @dirs, "$dir/$file" if -d "$dir/$file";
+ }
+ closedir $dh;
+ # also call $before_dir_func if the directory has not been initialized yet
+ if (!$found && !-f "$dir/$DOUBLE_CKSUM_FILE" && defined $before_dir_func) {
+ $before_dir_func->($dir);
+ }
+ if ($found && defined $after_dir_func) {
+ $after_dir_func->($dir);
+ }
+ return \@dirs;
+ }, $top_dir;
+ # Is this a horrible hack? I dunno, but it sure is sweet...
+ while ($iter->()) {}
+# add all new files in $top_dir to the checksum files
+sub check_add_new_files {
+ my ($top_dir, $args) = @_;
+ my $changed_dirs = 0;
+ my $changed_files = 0;
+ check_new_files $top_dir, sub {
+ my ($dir, $file) = @_;
+ my $fullpath = "$dir/$file";
+ if (-d $fullpath) {
+ my $dir_file = "$dir/$DIR_FILE";
+ my $fh;
+ if (!open $fh, ">>", $dir_file) {
+ warn "ERROR: Unable to append to file \"$dir_file\"!";
+ return;
+ }
+ print $fh '"' . escape_filename($file) . '"' . "\n";
+ close $fh;
+ $changed_dirs = 1;
+ } else {
+ my $cksum_output = get_cksum $fullpath;
+ return if !defined $cksum_output;
+ my $cksum_file = "$dir/$CKSUM_FILE";
+ my $fh;
+ if (!open $fh, ">>", $cksum_file) {
+ warn "ERROR: Unable to append to file \"$cksum_file\"!";
+ return;
+ }
+ print $fh $cksum_output . ' "' . escape_filename($file) . '"' . "\n";
+ close $fh;
+ $changed_files = 1;
+ }
+ print "Added \"$fullpath\"\n" if !$args->{"q"};
+ }, sub {
+ if (-f "$_[0]/$DOUBLE_CKSUM_FILE") {
+ if (!check_cksums $_[0], $DOUBLE_CKSUM_FILE, 1) {
+ warn "Checksum files corrupt in \"$_[0]\", not adding new checksums!\n";
+ return 0;
+ }
+ } else {
+ write_cksums($_[0], {}, 1, 1);
+ }
+ return 1;
+ }, sub {
+ if ($changed_dirs) {
+ write_special_cksums $_[0], $DIR_FILE;
+ $changed_dirs = 0;
+ }
+ if ($changed_files) {
+ write_special_cksums $_[0], $CKSUM_FILE;
+ $changed_files = 0;
+ }
+ };
+# write the "checksums" in $contents to $path
+# if $is_cksum_file is set, the value each of the keys in $contents points
+# to is written before the key
+sub write_file {
+ my ($path, $contents, $is_cksum_file) = @_;
+ my $fh;
+ if (!open $fh, ">", $path) {
+ warn "ERROR: Unable to open \"$path\" for writing!";
+ return;
+ }
+ foreach my $filename (keys %$contents) {
+ if ($is_cksum_file) {
+ print $fh "$contents->{$filename} ";
+ }
+ print $fh '"' . escape_filename($filename) . '"' . "\n";
+ }
+ close $fh;
+# write the checksums in $contents to the file at $path
+sub write_cksum_file {
+ my ($path, $contents) = @_;
+ write_file $path, $contents, 1;
+# write the checksums in $contents to $dir
+# any keys that point to undef are taken to be directories and vice versa
+# $files_modified and $dirs_modified control which of the special lumia
+# files actually get written
+# note: this doesn't use write_file, etc. in order to (possibly) be a bit more efficient
+sub write_cksums {
+ my ($dir, $contents, $files_modified, $dirs_modified) = @_;
+ # No, this isn't efficient...
+ my @special_files;
+ my $dirs_fh;
+ my $files_fh;
+ if ($files_modified) {
+ my $path = "$dir/$CKSUM_FILE";
+ if (!open $files_fh, ">", $path) {
+ warn "ERROR: Unable to open \"$path\" for writing!";
+ return;
+ }
+ push @special_files, $CKSUM_FILE;
+ }
+ if ($dirs_modified) {
+ my $path = "$dir/$DIR_FILE";
+ if (!open $dirs_fh, ">", $path) {
+ warn "ERROR: Unable to open \"$path\" for writing!";
+ return;
+ }
+ push @special_files, $DIR_FILE;
+ }
+ foreach my $key (keys %$contents) {
+ if ($files_modified && defined $contents->{$key}) {
+ print $files_fh $contents->{$key} . ' "' . escape_filename($key) . '"' . "\n";
+ } elsif ($dirs_modified && !defined $contents->{$key}) {
+ print $dirs_fh '"' . escape_filename($key) . '"' . "\n";
+ }
+ }
+ close $files_fh if defined $files_fh;
+ close $dirs_fh if defined $dirs_fh;
+ if (@special_files) {
+ write_special_cksums $dir, @special_files;
+ }
+# show all files that are present in the checksum files but don't exist on the filesystem anymore
+sub check_old_files {
+ my $top_dir = shift;
+ my $iter = make_lumia_iter 1, $top_dir;
+ while (my $dir = $iter->()) {
+ if (-e $dir) {
+ my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}) // {};
+ foreach my $file (keys %$cksums) {
+ if (!-e "$dir/$file") {
+ warn "Nonexistent file: \"$dir/$file\"!\n";
+ }
+ }
+ } else {
+ warn "Nonexistent directory: \"$dir\"!\n";
+ }
+ }
+# clean up the lumia checksum files, removing any files that aren't present
+# on the filesystem anymore
+sub remove_old_files {
+ my ($top_dir, $args) = @_;
+ my $iter = make_lumia_iter 1, $top_dir;
+ while (my $dir = $iter->()) {
+ if (!-e $dir) {
+ my $parent = dirname $dir;
+ my $child = basename $dir;
+ my $lumia_dirs = read_file("$parent/$DIR_FILE", {}) // {};
+ if (exists $lumia_dirs->{$child}) {
+ delete $lumia_dirs->{$child};
+ write_file "$parent/$DIR_FILE", $lumia_dirs;
+ print "Removed \"$dir\" from \"$parent/$DIR_FILE\"\n" if !$args->{"q"};
+ write_special_cksums $parent, $DIR_FILE;
+ }
+ } else {
+ my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}) // {};
+ my $found = 0;
+ foreach my $file (keys %$cksums) {
+ if (!-e "$dir/$file") {
+ delete $cksums->{$file};
+ print "Removed \"$dir/$file\" from \"$dir/$CKSUM_FILE\"\n" if !$args->{"q"};
+ $found = 1;
+ }
+ }
+ if ($found) {
+ write_cksum_file "$dir/$CKSUM_FILE", $cksums;
+ write_special_cksums $dir, $CKSUM_FILE;
+ }
+ }
+ }
+# sort the given paths into hash based on the dirname
+# returns: a hash with the keys being the dirnames of the given paths and
+# each one pointing to an array containing the basenames of all paths
+# that had this dirname
+sub sort_by_dir {
+ my %sorted_files;
+ foreach my $file (@_) {
+ if (!-e $file) {
+ warn "ERROR: Source file \"$file\" doesn't exist.\n";
+ next;
+ }
+ my $dir = dirname($file);
+ if (!exists($sorted_files{$dir})) {
+ $sorted_files{$dir} = [];
+ }
+ push(@{$sorted_files{$dir}}, basename($file));
+ }
+ return \%sorted_files;
+# check if $dst exists and prompt the user whether it should be overwritten
+# returns 0 if it can be overwritten or doesn't exist, 1 if it shouldn't be overwritten
+sub prompt_overwrite {
+ my $dst = shift;
+ if (-e $dst) {
+ print STDERR "WARNING: \"$dst\" exists already. Do you want to replace it? (y/n) ";
+ my $choice = "";
+ while ($choice ne "y" && $choice ne "n") {
+ $choice = <STDIN>;
+ chomp $choice;
+ }
+ if ($choice eq "n") {
+ warn "Not overwriting \"$dst\"\n";
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ return 0;
+# copies the $src files to $dst and updates the checksums in $dst
+# $src: list of source paths
+# $dst: destination directory or file (in latter case only one src is allowed)
+sub copy_files {
+ my ($src, $dst, $args) = @_;
+ my $dst_dir = $dst;
+ if (!-d $dst) {
+ $dst_dir = dirname $dst;
+ }
+ my $diff_name = 0;
+ # check if the file/dir is getting a different name or
+ # just being copied into a different directory
+ if (!-d $dst && !-d $src->[0]) {
+ $diff_name = 1;
+ }
+ if (!-e $dst && -d $src->[0]) {
+ $diff_name = 1;
+ }
+ my $dst_cksums = read_cksums $dst_dir;
+ return if !defined $dst_cksums;
+ my $src_sorted = sort_by_dir(@$src);
+ my $files_touched = 0;
+ my $dirs_touched = 0;
+ foreach my $src_dir (keys %$src_sorted) {
+ my $src_cksums = read_cksums $src_dir;
+ next if !defined $src_cksums;
+ foreach my $src_file (@{$src_sorted->{$src_dir}}) {
+ my $src_path = "$src_dir/$src_file";
+ my $dst_path = $diff_name ? $dst : "$dst_dir/$src_file";
+ if (-d $dst_path && -d $src_path) {
+ warn "ERROR: Cannot copy directory to already existing directory\n";
+ next;
+ }
+ if (exists $SPECIAL_FILES{$src_file} || exists $SPECIAL_FILES{basename $dst_path}) {
+ warn "ERROR: Not copying special file\n";
+ next;
+ }
+ next if !$args->{"f"} && prompt_overwrite($dst_path);
+ my $options = $args->{"v"} ? "-av" : "-a";
+ next if system("cp", $options, "--", $src_path, $dst);
+ if (-d $src_path) {
+ $dirs_touched = 1;
+ } else {
+ $files_touched = 1;
+ }
+ if (exists $src_cksums->{$src_file}) {
+ if ($diff_name) {
+ $dst_cksums->{basename $dst} = $src_cksums->{$src_file};
+ } else {
+ $dst_cksums->{$src_file} = $src_cksums->{$src_file};
+ }
+ } else {
+ warn "WARNING: \"$src_path\" not in cksum or directory list\n";
+ }
+ }
+ }
+ write_cksums $dst_dir, $dst_cksums, $files_touched, $dirs_touched;
+# move a file (or directory) from $src to $dst, prompting for confirmation if $dst already exists;
+# automatically appends the basename of $src to $dst if $dst is a directory
+sub move_file {
+ my ($src, $dst, $args) = @_;
+ if (exists $SPECIAL_FILES{basename $src} || exists $SPECIAL_FILES{basename $dst}) {
+ warn "ERROR: Not moving special file\n";
+ return 1;
+ }
+ if (-d $dst) {
+ $dst .= "/" . basename($src);
+ }
+ return 1 if !$args->{"f"} && prompt_overwrite($dst);
+ my $ret;
+ if ($args->{"v"}) {
+ $ret = system("mv", "-v", "--", $src, $dst);
+ } else {
+ $ret = system("mv", "--", $src, $dst);
+ }
+ return 1 if $ret;
+ if (-e $src) {
+ warn "ERROR: file could not be removed from source but will still be " .
+ "removed from checksum database\n";
+ }
+ return 0;
+# move all files/directories in $src_files from $src_dir to $dst_dir ($src_files
+# only contains the basenames of the files), removing them from the checksum files
+# in $src_dir and adding them to $dst_cksums
+sub move_from_same_dir {
+ my ($src_dir, $src_files, $dst_cksums, $dst_dir, $args) = @_;
+ my $src_cksums = read_cksums $src_dir;
+ return if !defined $src_cksums;
+ my $files_touched = 0;
+ my $dirs_touched = 0;
+ foreach my $src_file (@$src_files) {
+ my $fullpath = "$src_dir/$src_file";
+ my $tmp_dirs_touched = 0;
+ my $tmp_files_touched = 0;
+ if (-d $fullpath) {
+ $tmp_dirs_touched = 1;
+ } else {
+ $tmp_files_touched = 1;
+ }
+ next if move_file($fullpath, $dst_dir, $args);
+ # need to be able to check if the path is a directory
+ # before actually moving it
+ $dirs_touched ||= $tmp_dirs_touched;
+ $files_touched ||= $tmp_files_touched;
+ if (exists $src_cksums->{$src_file}) {
+ $dst_cksums->{$src_file} = $src_cksums->{$src_file};
+ delete $src_cksums->{$src_file};
+ } else {
+ warn "WARNING: \"$src_dir/$src_file\" not in cksum or directory list.\n";
+ }
+ }
+ write_cksums $src_dir, $src_cksums, $files_touched, $dirs_touched;
+ return ($files_touched, $dirs_touched);
+# rename a single file or directory from $src to $dst
+sub move_rename {
+ my ($src, $dst, $args) = @_;
+ my $src_dir = dirname $src;
+ my $dst_dir = dirname $dst;
+ my $src_file = basename $src;
+ my $dst_file = basename $dst;
+ my $src_cksums = read_cksums $src_dir;
+ return if !defined $src_cksums;
+ my $dst_cksums = {};
+ # if a file is simply being renamed in the same dir, the cksums
+ # should only be loaded and written once
+ if ($src_dir eq $dst_dir) {
+ %$dst_cksums = %$src_cksums;
+ delete $dst_cksums->{$src_file};
+ } else {
+ $dst_cksums = read_cksums $dst_dir;
+ return if !defined $dst_cksums;
+ }
+ my $files_touched = 0;
+ my $dirs_touched = 0;
+ if (-d $src) {
+ $dirs_touched = 1;
+ } else {
+ $files_touched = 1;
+ }
+ return if move_file($src, $dst, $args);
+ if (exists($src_cksums->{$src_file})) {
+ $dst_cksums->{$dst_file} = $src_cksums->{$src_file};
+ delete $src_cksums->{$src_file};
+ } else {
+ warn "WARNING: \"$src\" not in cksum or directory list.\n";
+ }
+ write_cksums $dst_dir, $dst_cksums, $files_touched, $dirs_touched;
+ if ($src_dir ne $dst_dir) {
+ write_cksums $src_dir, $src_cksums, $files_touched, $dirs_touched;
+ }
+# move all files and directories in $src to $dst
+# - if $dst does not exist, $src is only allowed to contain one path, which is
+# renamed to $dst
+# - if $dst is a file, $src is only allowed to contain a single path (which
+# must be a file), which is renamed to $dst
+# - otherwise, all files and directories in $src are moved to $dst
+# $src: list of source paths
+# $dst: destination directory or file (in latter case only one src is allowed)
+sub move_files {
+ my ($src, $dst, $args) = @_;
+ if (!-d $dst && $#$src != 0) {
+ die "move: only one source argument allowed when destination is a file\n";
+ }
+ if (!-d $dst && !-d $src->[0]) {
+ move_rename $src->[0], $dst, $args;
+ return;
+ }
+ if (!-e $dst && -d $src->[0]) {
+ move_rename $src->[0], $dst, $args;
+ return;
+ }
+ if (-e $dst && !-d $dst && -d $src->[0]) {
+ die "move: can't move directory to file\n";
+ }
+ # Separate files by current dir so the cksum and dir files only need to be opened once
+ my $src_files = sort_by_dir(@$src);
+ my $dst_cksums = read_cksums $dst;
+ return if !defined $dst_cksums;
+ my $files_touched = 0;
+ my $dirs_touched = 0;
+ foreach my $src_dir (keys %$src_files) {
+ my ($tmp_files_touched, $tmp_dirs_touched) = move_from_same_dir $src_dir, $src_files->{$src_dir}, $dst_cksums, $dst, $args;
+ $files_touched ||= $tmp_files_touched;
+ $dirs_touched ||= $tmp_dirs_touched;
+ }
+ write_cksums $dst, $dst_cksums, $files_touched, $dirs_touched;
+# remove a file or directory from the filesystem
+sub remove_file_dir {
+ my ($path, $args) = @_;
+ my $options = $args->{"f"} ? "-rf" : "-r";
+ if (system("rm", $options, "--", $path)) {
+ return 1;
+ }
+ if (-e $path) {
+ warn "ERROR: Unable to remove \"$path\" from filesystem but " .
+ "will still be removed from checksum database\n";
+ }
+ return 0;
+# remove all files in one directory, updating the checksum files in the process
+# note: the files are only allowed to be basenames, i.e., they must be the
+# actual filenames present in the checksum files
+sub remove_from_same_dir {
+ my ($args, $dir, @files) = @_;
+ my $cksums = read_cksums $dir;
+ return if !defined $cksums;
+ my $dirs_touched = 0;
+ my $files_touched = 0;
+ foreach my $file (@files) {
+ if (exists $SPECIAL_FILES{$file}) {
+ warn "ERROR: not removing special file $file\n";
+ next;
+ }
+ my $fullpath = "$dir/$file";
+ if (!-e $fullpath) {
+ warn "\"$fullpath\": No such file or directory.\n";
+ }
+ next if remove_file_dir($fullpath, $args);
+ if (exists $cksums->{$file}) {
+ if (defined $cksums->{$file}) {
+ $files_touched = 1;
+ } else {
+ $dirs_touched = 1;
+ }
+ delete $cksums->{$file};
+ } else {
+ warn "WARNING: \"$file\" not in cksum or directory list.\n";
+ }
+ }
+ write_cksums $dir, $cksums, $files_touched, $dirs_touched;
+# remove all given files and directories, updating the appropriate checksum
+# files in the process
+sub remove_files {
+ my $args = shift;
+ my $sorted_files = sort_by_dir(@_);
+ foreach my $dir (keys %$sorted_files) {
+ remove_from_same_dir($args, $dir, @{$sorted_files->{$dir}});
+ }
+# create the given directories, initializing them with empty checksum files
+# note: does not work like "mkdir -p", i.e., the new directories have to
+# be located inside already existing directories
+sub make_dirs {
+ my @created_dirs;
+ foreach (@_) {
+ if (system("mkdir", "--", $_)) {
+ warn "ERROR creating directory $_\n";
+ next;
+ }
+ push(@created_dirs, $_);
+ }
+ # Separate files by current dir so the cksum and dir files only need to be opened once
+ my %dirs;
+ foreach my $dir (@created_dirs) {
+ write_cksums $dir, {}, 1, 1;
+ my $parent = dirname($dir);
+ if (!exists($dirs{$parent})) {
+ $dirs{$parent} = [];
+ }
+ push(@{$dirs{$parent}}, basename($dir));
+ }
+ foreach my $parent (keys %dirs) {
+ my $parent_dirs = read_file "$parent/$DIR_FILE", {};
+ next if !defined $parent_dirs;
+ foreach my $dir (@{$dirs{$parent}}) {
+ $parent_dirs->{$dir} = "";
+ }
+ write_file "$parent/$DIR_FILE", $parent_dirs;
+ write_special_cksums $parent, $DIR_FILE;
+ }
+# extract all special lumia files from $src_dir to $dst_dir, recreating the
+# entire directory structure in the process
+sub extract {
+ my ($src_dir, $dst_dir, $args) = @_;
+ my $iter = make_lumia_iter 0, $src_dir;
+ my $options = $args->{"v"} ? "-av" : "-a";
+ while (my $dir = $iter->()) {
+ my $final_dir = abs2rel $dir, $src_dir;
+ my $fulldir = catfile $dst_dir, $final_dir;
+ system("mkdir", "-p", "--", $fulldir);
+ foreach my $file (keys %SPECIAL_FILES) {
+ my $filepath = catfile $dir, $file;
+ if (-e $filepath) {
+ system("cp", $options, "--", $filepath, catfile($fulldir, $file));
+ }
+ }
+ }
+# update the checksums of the given files
+# ignores any directories given as arguments
+sub update {
+ my @files;
+ foreach (@_) {
+ if (-d $_) {
+ warn "Ignoring directory \"$_\"\n";
+ } else {
+ push @files, $_;
+ }
+ }
+ my $sorted_files = sort_by_dir @files;
+ foreach my $dir (keys %$sorted_files) {
+ my $cksums = read_cksum_file "$dir/$CKSUM_FILE", {};
+ next if !defined $cksums;
+ my $changed = 0;
+ foreach my $file (@{$sorted_files->{$dir}}) {
+ my $cksum_output = get_cksum "$dir/$file";
+ next if !defined $cksum_output;
+ $cksums->{$file} = $cksum_output;
+ $changed = 1;
+ }
+ if ($changed) {
+ write_cksum_file "$dir/$CKSUM_FILE", $cksums;
+ write_special_cksums $dir, $CKSUM_FILE;
+ }
+ }
+sub update_special {
+ my $dir = shift;
+ write_special_cksums $dir, $CKSUM_FILE, $DIR_FILE;
+my %args;
+GetOptions(\%args, "f|force", "q|quiet", "v|verbose", "h|help");
+pod2usage(-exitval => 0, -verbose => 2) if $args{"h"};
+pod2usage(-exitval => 1, -verbose => 1) if @ARGV < 1;
+my $cmd = shift;
+if ($cmd eq "mv") {
+ die "mv requires at least two arguments\n" if @ARGV < 2;
+ my @src = @ARGV[0..$#ARGV-1];
+ move_files \@src, $ARGV[-1], \%args;
+} elsif ($cmd eq "rm") {
+ die "rm requires at least one argument\n" if @ARGV < 1;
+ remove_files \%args, @ARGV;
+} elsif ($cmd eq "addnew") {
+ my $dir = @ARGV ? $ARGV[0] : ".";
+ check_add_new_files $dir, \%args;
+} elsif ($cmd eq "checknew") {
+ my $dir = @ARGV ? $ARGV[0] : ".";
+ check_new_files $dir;
+} elsif ($cmd eq "checkold") {
+ my $dir = @ARGV ? $ARGV[0] : ".";
+ check_old_files $dir;
+} elsif ($cmd eq "rmold") {
+ my $dir = @ARGV ? $ARGV[0] : ".";
+ remove_old_files $dir, \%args;
+} elsif ($cmd eq "check") {
+ if (@ARGV < 1) {
+ check_files \%args, ".";
+ } else {
+ check_files \%args, @ARGV;
+ }
+} elsif ($cmd eq "clean") {
+ my $dir = @ARGV ? $ARGV[0] : ".";
+ clean_files $dir, \%args;
+} elsif ($cmd eq "extract") {
+ my $src_dir = ".";
+ my $dst_dir;
+ if (@ARGV == 2) {
+ $src_dir = $ARGV[0];
+ $dst_dir = $ARGV[1];
+ } elsif (@ARGV == 1) {
+ $dst_dir = $ARGV[0];
+ } else {
+ die "Invalid number of arguments\n";
+ }
+ if (!-d $src_dir) {
+ die "ERROR: Directory \"$src_dir\" does not exist.\n";
+ }
+ if (!-d $dst_dir) {
+ die "ERROR: Directory \"$dst_dir\" does not exist.\n";
+ }
+ extract $src_dir, $dst_dir;
+} elsif ($cmd eq "cp") {
+ die "cp requires at least two arguments\n" if @ARGV < 2;
+ my @src = @ARGV[0..$#ARGV-1];
+ copy_files \@src, $ARGV[-1], \%args;
+} elsif ($cmd eq "mkdir") {
+ die "mkdir requires at least one argument\n" if @ARGV < 1;
+ make_dirs @ARGV;
+} elsif ($cmd eq "update") {
+ die "update requires at least one argument\n" if @ARGV < 1;
+ update @ARGV;
+} elsif ($cmd eq "updatespecial") {
+ die "Invalid number of arguments\n" if @ARGV > 1;
+ my $dir = @ARGV ? $ARGV[0] : ".";
+ update_special $dir;
+} else {
+ pod2usage(-exitval => 1, -verbose => 1);
+=head1 NAME
+lumia - Manage checksums on a filesystem
+=head1 SYNOPSIS
+B<lumia> command [-hqfv] arguments
+lumia is meant for managing checksums of files in order to prevent bitrot.
+It does this by storing several special files in each directory to keep track
+of the checksums:
+=over 8
+=item B<.lumidify_archive_cksums>
+Contains the checksums of all files in the directory.
+=item B<.lumidify_archive_dirs>
+Contains a list of all directories in the directory.
+=item B<.lumidify_archive_cksums.cksum>
+Contains the checksums of B<.lumidify_archive_cksums> and B<.lumidify_archive_dirs>
+just because I'm paranoid.
+=item B<.lumidify_archive_ignore>
+Contains a list of files and directories that should be ignored by lumia.
+Note that this is only read and never written to, unless the command B<clean>
+is used. It is, however, still copied over by the B<extract> command.
+When the documentation for the commands talks about the "checksum database",
+it simply means these files.
+All file/directory names are enclosed in quotes, with any backslashes or quotes
+inside the name escaped with another backslash. The names are allowed to have
+newlines in them.
+The list files only contain a list of filenames, with a newline between the
+closing quote of one name and the opening quote of the next one.
+The checksum files additionally contain the output of the checksum program
+used and a space before the starting quote of the filename.
+=head1 OPTIONS
+=over 8
+=item B<-h>, B<--help>
+Show the full documentation.
+=item B<-q>, B<--quiet>
+Only output errors.
+=item B<-f>, B<--force>
+Overwrite files without prompting for confirmation.
+=item B<-v>, B<--verbose>
+Print each file that is processed by the command.
+See the full documentation for details on which commands support which options
+and what they do.
+It does not matter if the options are written before or after the command.
+If C<--> is written anywhere on the command line, option parsing is stopped,
+so that files starting with a hyphen can still be specified.
+Note that C<-q> and C<-v> aren't exactly opposites - C<-q> applies to commands
+like B<check>, where it suppresses printing of the individual files, while
+C<-v> applies to commands like B<cp>, where it is just passed on to the system
+command called in the background.
+Note further that this is very inconsistent, like the rest of the program, but
+the author has made too many bad decisions to rectify that problem at the moment.
+=head1 COMMANDS
+Note that some commands support multiple files/directories as arguments and others,
+for which it would make just as much sense, don't. That's just the way it is.
+=over 8
+=item B<addnew> [-q] [directory]
+Walks through B<directory>, adding all new files to the checksum database.
+B<directory> defaults to the current directory.
+C<-q> suppresses the printing of each file or directory as it is added.
+=item B<checknew> [directory]
+Walks through B<directory>, printing all files that aren't part of the checksum
+database. B<directory> defaults to the current directory.
+=item B<checkold> [directory]
+Prints all files in the checksum database that do not exist on the filesystem anymore.
+B<directory> defaults to the current directory.
+=item B<rmold> [-q] [directory]
+Removes all files found by B<checkold> from the database. B<directory> defaults to
+the current directory.
+C<-q> suppresses the printing of each file as it is removed.
+=item B<check> [-q] file/directory ...
+Verifies the checksums of all files given, recursing through any directories. If no
+files or directories are given, the current directory is used.
+Note that the checksum database in the corresponding directory will be read again for
+every file given on the command line, even if 1000 files in the same directory are given.
+This problem does not occur when recursing through directories, so it is best to only
+give files directly when checking a few. This problem wouldn't be too difficult to
+fix, but, frankly, I'm too lazy, especially since I only added the feature to check
+files individually as a convenience when I want to quickly check a single file in a
+large directory.
+To explain why it is this way: The directory recursion is done using an iterator, which
+has the directories pushed onto its queue in the beginning. The iterator only returns
+directories, which are then checked all in one go, but this means that files given on
+the command line need to be handled specially.
+C<-q> suppresses the printing of all good checksums but still allows a message to
+be printed when a checksum failed.
+=item B<clean> [-q] [directory]
+Removes all lumia special files used to store the checksum database from B<directory>
+recursively. B<directory> defaults to the current directory.
+Note that this recurses through the entire directory tree, not just the part that is
+actually linked together by the checksum database.
+Warning: This just blindly removes all files with one of the special lumia names,
+even if they weren't actually created by lumia.
+C<-q> suppresses the printing of each file as it is deleted.
+=item B<extract> [-v] [source] destination
+Recreates the entire directory structure from B<source> in B<destination>, but only
+copies the special files used to store the checksum database. B<source> defaults to
+the current directory.
+C<-v> prints each file as it is copied.
+Note that this overwrites files in the destination directory without confirmation.
+=item B<mkdir> directory ...
+Creates the given directories, initializing them with empty checksum database files.
+=item B<update> file ...
+Recalculates the checksums for the given files and replaces them in the database.
+Note: Directories given as arguments are ignored.
+This is mainly meant to quickly "touch" a file after it was modified (e.g. a
+notes file that is occasionally updated).
+=item B<updatespecial> [directory]
+Recalculates the checksums for the special files C<.lumidify_archive_dirs> and
+C<.lumidify_archive_cksums> and writes them to C<.lumidify_archive_cksums.cksum>.
+B<directory> defaults to the current directory.
+This is only meant to be used if, for some reason, the checksum files had to
+be edited manually and thus don't match the checksums in C<.lumidify_archive_cksums.cksum>
+=item B<rm> [-f] file ...
+Removes the given files and directories recursively from the filesystem and
+checksum database. The following caveats apply:
+If any actual errors occur while deleting the file/directory (i.e. the system
+command C<rm> returns a non-zero exit value), the checksum or directory B<is
+left in the database>. If the system C<rm> does not return a non-zero exit value,
+but the file/directory still exists afterwards (e.g. there was a permission
+error and the user answered "n" when prompted), a warning message is printed,
+but the files B<are removed from the database> (if the database can be
+written to).
+It is an error if there are no checksum database files in the directory
+of a file named on the command line.
+C<-f> is passed through to the system C<rm> command.
+=item B<cp> [-vf] source target
+=item B<cp> [-vf] source ... directory
+Copies the given source files, updating the checksum database in the process.
+If the last argument is a file, there must be only one source argument, also a file,
+which is then copied to the target.
+If the last argument is a directory, all source arguments are copied into it.
+It is an error if a source or destination directory does not contain any
+checksum database files.
+B<cp> will issue a warning and skip to the next argument if it is asked to
+merge a directory with an already existing directory. For instance, attempting
+to run C<cp dir1 dir2>, where C<dir2> already contains a directory named
+C<dir1>, will result in an error. This may change in the future, when the
+program is modified to recursively copy the files manually, instead of simply
+calling the system C<cp> on each of the arguments. If this was supported in
+the current version, none of the checksums inside that directory would be
+updated, so it wouldn't be very useful.
+C<-v> is passed through to the system C<cp> command.
+C<-f> silently overwrites files without prompting the user, much like the
+C<-f> option in the system C<cp> command. This is handled manually by the
+program, though, in order to actually determine what the user chose. See
+also the caveat mentioned above.
+=item B<mv> [-f] source target
+=item B<mv> [-f] source ... directory
+Moves the given source files, updating the checksum database in the process.
+If the last argument is a file or does not exist, there must be only one source
+argument, which is renamed to the target name.
+If the last argument is an existing directory, all source arguments are moved
+into it.
+It is an error if a source or destination directory does not contain any
+checksum database files.
+B<mv> behaves the same as B<rm> with regards to checking if the source file
+is still present after the operation and other error handling.
+C<-f> is handled in the same manner as with B<cp>.
+There are already several programs that can be used to check for bitrot,
+as listed in L</"SEE ALSO">. However, all programs I tried either were
+much too complicated for my taste or just did everything behind my back.
+I wanted a simple tool that did exactly what I told it to and also allowed
+me to keep the old checksums when reorganizing files, in order to avoid
+regenerating the checksums from corrupt files. Since I couldn't find those
+features in any program I tried, I wrote my own.
+It may strike some readers as a peculiar idea to save the checksum files in
+I<every single directory>, but this choice was made after much deliberation.
+The other option I could think of was to have one big database, but that
+would have made all commands much more difficult to implement and additionally
+necessitated opening the entire database for every operation. With individual
+files in each directory, operations like B<cp> become quite trivial (ignoring
+all the edge cases) since only the toplevel checksums need to be copied to
+the new destination, and any subdirectories already contain the checksums.
+This method is not without its drawbacks, however. The most glaring problem
+I have found is that there is no way to store the checksums of read-only
+directories or any special directories that cannot be littered with the
+checksum files because that would clash with other software. Despite these
+drawbacks, however, I decided to stick with it because it works for almost
+all cases and doesn't have any of the serious drawbacks that other options
+would have had.
+The names of the special files were chosen to be ".lumidify_archive*" not
+out of vanity, but mainly because I couldn't think of any regular files
+with those names, making them a good choice to avoid clashes.
+The name of the program, C<lumia> (for "lumidify archive"), was similarly
+chosen because it did not clash with any programs installed on my system and
+thus allowed for easy tab-completion.
+By default, the simple cksum algorithm is used to get the checksums. This
+is not very secure, but the main purpose of the program is to prevent
+bitrot, for which cksum should be sufficient, especially since it is much
+faster than other algorithms.
+There is currently no convenient way to change the algorithm other than
+changing the $CKSUM_CMD and $CKSUM_NUMFIELDS variables at the top of
+lumia. $CKSUM_CMD must be the command that returns the checksum
+when it is given a file, and $CKSUM_NUMFIELDS specifies the number of
+space-separated fields the checksum consists of. This has to be specified
+in order to determine where the checksum ends and the filename begins in
+the output. This would be redundant if all implementations of cksum
+supported '-q' for outputting only the checksum, but that only seems to
+be supported by some implementations.
+=over 8
+=item B<Security auditing>
+This program is B<NOT> designed to provide any security auditing, as should
+be clear from the fact that the checksums are stored right in the same
+directory as the files. See mtree(8) for that.
+If you want to, however, you could set $CKSUM_CMD to a secure hash (not cksum)
+and B<extract> the checksums to a separate directory, which you keep in a
+safe place. You could then use the regular C<cp> command to simply replace
+all the checksums with the ones from your backup, in case an attacker modified
+the checksum database in the directory with the actual files you're trying to
+protect. I don't know if there would be any point in doing that, though.
+=item B<Managing archives>
+This is the purpose I wrote the program for.
+You can simply initialize your archive directory with the B<addnew> command.
+Whenever you add new files, just run B<addnew> again. If you want to reorganize
+the archive, you can use the limited commands available.
+I usually just use rsync(1) to copy the entire archive directory over to other
+backup drives and then use the B<check> command again on the new drive.
+I also have checksums for the main data directory on my computer (except for
+things like git repositories, which I don't want littered with the database
+files). Here, I use the B<update> command for files that I edit more often
+and occasionally run B<check> on the entire directory.
+Since the database files are written in each directory, you can run the
+B<addnew> command in any subdirectory when you've added new files there.
+Due to the extensive use of iterators and the author's bad life choices,
+some functions, such as B<addnew> and B<check>, run more slowly than they
+would if they were programmed more efficiently, especially on many small
+files and folders. Too bad.
+This program was written on OpenBSD. It will probably work on most other
+reasonably POSIX-Compliant systems, although I cannot guarantee anything.
+$CKSUM_CMD may need to be modified at the top of lumia. The file
+operation commands are called directly with system(), so those need to
+be available.
+It will most certainly not work on Windows, but that shouldn't be a
+problem for anyone important.
+=head1 BUGS
+All system commands (unless I forgot some) are called with "--" before
+listing the actual files, so files beginning with hyphens should be
+supported. I have tested the commands with filenames starting with spaces
+and hyphens and also containing newlines, but there may very well be issues
+still. Please notify me if you find any filenames that do not work. Handling
+filenames properly is difficult.
+There are probably many other edge cases, especially in the B<mv>, B<cp>,
+and B<rm> commands. Please notify me if you find an issue.
+Operations on files containing newlines may cause Perl to print a warning
+"Unsuccessful stat on filename containing newline" even though nothing is
+wrong since (as described in B<mv> and B<rm>) existence of the file is
+checked afterwards. I didn't feel like disabling warnings, and no normal
+person should be working with files containing newlines anyways, so that's
+the way it is.
+Always 0, unless the arguments given were invalid. We don't do errors around here.
+On a more serious note - I should probably change that at some point.
+For the time being, if you want to run B<check> in a script, you can test
+the output printed when the C<-q> option is used, since this won't output
+anything if there are no errors. Do note, though, that actual errors (file not
+found, etc.) are printed to STDERR, while incorrect checksums are printed
+=head1 SEE ALSO
+par2(1), mtree(8), aide(1), bitrot(no man page)
+=head1 LICENSE
+Copyright (c) 2019, 2020, 2021 lumidify <nobody[at]>
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
diff --git a/ b/
@@ -1,1403 +0,0 @@
-#!/usr/bin/env perl
-# TODO: some way to avoid writing .lumidify* in dirs but still index them?
-# TODO: store modified date and checksum files with changed date
-# TODO: add option to just check dir structure or maybe check if everything exists
-# TODO: add option to compare cksums of two dirs
-# TODO: exit status!
-use strict;
-use warnings;
-use File::Spec::Functions qw(catfile abs2rel);
-use File::Basename qw(basename dirname);
-use String::ShellQuote;
-use Pod::Usage;
-use Getopt::Long;
-# the file used to store checksums for files
-my $CKSUM_FILE = ".lumidify_archive_cksums";
-# the file used to store directory names
-my $DIR_FILE = ".lumidify_archive_dirs";
-# the file read to ignore files or directories
-my $IGNORE_FILE = ".lumidify_archive_ignore";
-# the file containing checksums of $CKSUM_FILE and $DIR_FILE
-my $DOUBLE_CKSUM_FILE = ".lumidify_archive_cksums.cksum";
-# uncomment this instead of the lines below to use
-# sha256 instead of cksum as the hash algorithm
-# Note: this isn't really tested properly
-#my $CKSUM_CMD = 'sha256 -q';
-my $CKSUM_CMD = 'cksum';
- $CKSUM_FILE => 1,
- $DIR_FILE => 1,
- $IGNORE_FILE => 1,
-# escape a filename for writing into the checksum files
-sub escape_filename {
- my $file = shift;
- $file =~ s/\\/\\\\/g;
- $file =~ s/"/\\"/g;
- return $file;
-# make a generic file iterator
-# $file_func determines whether a file should be returned by the iterator
-# $dir_func is called for each directory and returns all files that
-# should be added to the queue
-sub make_file_iter {
- my ($file_func, $dir_func, @queue) = @_;
- return sub {
- while (@queue) {
- my $file = pop @queue;
- if (-d $file) {
- my $new_files = $dir_func->($file);
- next if !defined $new_files;
- push @queue, @$new_files;
- }
- return $file if $file_func->($file);
- }
- return;
- };
-# make a basic filename iterator, which simply returns all files
-# for which $file_func returns a true value
-sub make_file_iter_basic {
- my ($file_func, @files) = @_;
- make_file_iter $file_func, sub {
- my $dh;
- if (!opendir $dh, $_[0]) {
- warn "WARNING: Unable to open directory \"$_[0]\"!";
- return [];
- }
- my @new_files = map "$_[0]/$_", grep {$_ ne "." && $_ ne ".."} readdir $dh;
- closedir $dh;
- return \@new_files;
- }, @files;
-# make an interator that only returns the directories which are present
-# in the $DIR_FILE files, in addition to the files and directories that
-# were originally passed as arguments
-# note: this returns nonexistent directories if those are still
-# specified in the lumia files
-sub make_lumia_iter {
- my ($quiet, @dirs) = @_;
- make_file_iter sub {1}, sub {
- my $path = "$_[0]/$DIR_FILE";
- return [] if !-f $path;
- my $dirs = read_file($path, {});
- return if !defined $dirs;
- my @new_dirs;
- foreach my $dir (keys %$dirs) {
- my $dir_path = "$_[0]/$dir";
- if (!-d $dir_path) {
- warn "ERROR: Directory \"$dir_path\" mentioned in " .
- "\"$path\" does not exist or is not directory.\n" if !$quiet;
- }
- # still push it even when it doesn't exist so rmold can work properly
- push @new_dirs, $dir_path;
- }
- return \@new_dirs;
- }, @dirs;
-# remove all special lumia files from the given directory
-sub clean_files {
- my ($dir, $args) = @_;
- my $iter = make_file_iter_basic sub {exists $SPECIAL_FILES{basename $_[0]};}, $dir;
- while (my $file = $iter->()) {
- if (!unlink $file) {
- warn "WARNING: Unable to remove file \"$file\"!\n";
- } else {
- print "Deleted \"$file\"\n" if !$args->{"q"};
- }
- }
-# read a file, processing each line with $handle_cksum_func if set
-# and writing the results into $cksums
-# $handle_cksum_func must return two values, the checksum of the
-# argument and the rest of the string (that is then parsed for
-# the filename); if it returns undef, this function also returns undef
-sub read_file {
- my ($file, $cksums, $handle_cksum_func) = @_;
- my $fh;
- if (!open $fh, "<", $file) {
- warn "ERROR: Unable to open file \"$file\": $!\n";
- return;
- }
- my $in_fn = 0;
- my $cur_cksum;
- my $cur_str;
- my $cur_fn = "";
- foreach (<$fh>) {
- next if (!$in_fn && /^$/);
- if ($handle_cksum_func && !$in_fn) {
- ($cur_cksum, $cur_str) = $handle_cksum_func->($_);
- return undef if !defined $cur_cksum;
- } else {
- $cur_str = $_;
- }
- my $bs = 0;
- foreach my $ch (split(//, $cur_str)) {
- if ($ch eq "\\") {
- $bs++;
- $cur_fn .= "\\" if !($bs %= 2) && $in_fn;
- } elsif ($bs % 2) {
- $cur_fn .= $ch if $in_fn;
- $bs = 0;
- } elsif ($ch eq "\"") {
- if ($in_fn) {
- $in_fn = 0;
- $cksums->{$cur_fn} = $cur_cksum;
- $cur_fn = "";
- last;
- }
- $in_fn = 1;
- } elsif ($in_fn) {
- $cur_fn .= $ch;
- }
- }
- }
- close $fh;
- if ($in_fn) {
- warn "ERROR: Unterminated filename in file \"$file\"\n";
- return undef;
- }
- return $cksums;
-# read a single checksum file, writing the checksums into the hash $cksums and returning it
-sub read_cksum_file {
- my ($file, $cksums) = @_;
- return read_file $file, $cksums, sub {
- my $line = shift;
- my @fields = split(/ /, $line, $CKSUM_NUMFIELDS+1);
- if (@fields != $CKSUM_NUMFIELDS+1) {
- warn "WARNING: Malformed line \"$line\" in file \"$file\"\n";
- return;
- }
- my $cur_cksum = join(" ", @fields[0..$CKSUM_NUMFIELDS-1]);
- my $cur_str = $fields[$CKSUM_NUMFIELDS];
- return ($cur_cksum, $cur_str);
- };
-# read the checksums and directory names in $dir
-sub read_cksums {
- my $dir = shift;
- my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {});
- return undef if !defined $cksums;
- $cksums = read_file("$dir/$DIR_FILE", $cksums);
- return undef if !defined $cksums;
- return $cksums;
-# get the checksum output for $path
-# returns undef if $CKSUM_CMD returns an error
-sub get_cksum {
- my $path = shift;
- my $path_esc = shell_quote $path;
- my $cksum_output = `$CKSUM_CMD -- $path_esc 2>&1`;
- if ($?) {
- warn "ERROR getting cksum for file \"$path\":\n$cksum_output";
- return undef;
- }
- chomp $cksum_output;
- my @fields = split(/ /, $cksum_output, $CKSUM_NUMFIELDS+1);
- return join(" ", @fields[0..$CKSUM_NUMFIELDS-1]);
-# check the checksums in $dir/$cksum_file
-# if $quiet is set, only print failed files
-sub check_cksums {
- my ($dir, $cksum_file, $quiet) = @_;
- my $cksums = read_cksum_file("$dir/$cksum_file", {});
- return 0 if !defined $cksums;
- my $failed = 1;
- foreach my $file (keys %$cksums) {
- my $path = "$dir/$file";
- my $output = get_cksum $path;
- next if !defined $output;
- if ($output eq $cksums->{$file}) {
- print "OK $path\n" if !$quiet;
- } else {
- print "FAILED $path\n";
- $failed = 0;
- }
- }
- return $failed;
-# check the checksums of all files and directories in @dirs
-sub check_files {
- my $args = shift;
- my @dirs;
- foreach my $file (@_) {
- if (-d $file) {
- push @dirs, $file;
- next;
- }
- my $dir = dirname $file;
- my $base = basename $file;
- if (exists $SPECIAL_FILES{$base}) {
- warn "ERROR: File is reserved for $file\n";
- next;
- }
- my $cksums = read_cksum_file("$dir/$CKSUM_FILE");
- next if !defined $cksums;
- if (!exists $cksums->{$base}) {
- warn "ERROR: File doesn't exist in checksums: $file\n";
- next;
- }
- my $output = get_cksum "$file";
- next if !defined $output;
- if ($output eq $cksums->{$base}) {
- print "OK $file\n" if !$args->{"q"};
- } else {
- print "FAILED $file\n";
- }
- }
- my $iter = make_lumia_iter 0, @dirs;
- while (my $file = $iter->()) {
- check_cksums $file, $DOUBLE_CKSUM_FILE, $args->{"q"};
- check_cksums $file, $CKSUM_FILE, $args->{"q"};
- }
-# write the checksums of the special lumia files given as arguments
-# to $DOUBLE_CKSUM_FILE in $dir
-sub write_special_cksums {
- my ($dir, @files) = @_;
- my $cksum_file = "$dir/$DOUBLE_CKSUM_FILE";
- my $cksums = {};
- if (-f $cksum_file) {
- $cksums = read_cksum_file $cksum_file, {};
- }
- return if !defined $cksums;
- foreach my $file (@files) {
- my $cksum_output = get_cksum("$dir/$file");
- next if (!defined $cksum_output);
- $cksums->{$file} = $cksum_output;
- }
- write_file($cksum_file, $cksums, 1);
-# search for new files that aren't present in the checksum files
-# - if $file_func is set, it is called for each new file
-# - if $before_dir_func is set, it is called before processing the
-# files in each directory that has new files OR if a directory
-# is entirely new (well, it only checks if $DOUBLE_CKSUM_FILE exists)
-# - if $after_dir_func is set, it is called after processing the
-# files in each directory that has new files
-sub check_new_files {
- my ($top_dir, $file_func, $before_dir_func, $after_dir_func) = @_;
- my $iter = make_file_iter sub {1}, sub {
- my $dir = shift;
- my $dh;
- if (!opendir $dh, $dir) {
- warn "ERROR: Unable to open directory \"$dir\"!";
- return undef;
- }
- my $read_file_noerror = sub {
- if (-f $_[0]) {
- return $_[1]->($_[0], {}) // {};
- }
- return {};
- };
- my $ignore = $read_file_noerror->("$dir/$IGNORE_FILE", \&read_file);
- my $lumia_dirs = $read_file_noerror->("$dir/$DIR_FILE", \&read_file);
- my $lumia_files = $read_file_noerror->("$dir/$CKSUM_FILE", \&read_cksum_file);
- my @dirs;
- my $found = 0;
- while (my $file = readdir $dh) {
- next if $file eq "." || $file eq "..";
- next if exists $ignore->{$file} || exists $SPECIAL_FILES{$file};
- if (!exists $lumia_dirs->{$file} && !exists $lumia_files->{$file}) {
- if (!$found && defined $before_dir_func) {
- last if !$before_dir_func->($dir);
- }
- if (defined $file_func) {
- $file_func->($dir, $file);
- } else {
- print "$dir/$file\n";
- }
- $found = 1;
- }
- push @dirs, "$dir/$file" if -d "$dir/$file";
- }
- closedir $dh;
- # also call $before_dir_func if the directory has not been initialized yet
- if (!$found && !-f "$dir/$DOUBLE_CKSUM_FILE" && defined $before_dir_func) {
- $before_dir_func->($dir);
- }
- if ($found && defined $after_dir_func) {
- $after_dir_func->($dir);
- }
- return \@dirs;
- }, $top_dir;
- # Is this a horrible hack? I dunno, but it sure is sweet...
- while ($iter->()) {}
-# add all new files in $top_dir to the checksum files
-sub check_add_new_files {
- my ($top_dir, $args) = @_;
- my $changed_dirs = 0;
- my $changed_files = 0;
- check_new_files $top_dir, sub {
- my ($dir, $file) = @_;
- my $fullpath = "$dir/$file";
- if (-d $fullpath) {
- my $dir_file = "$dir/$DIR_FILE";
- my $fh;
- if (!open $fh, ">>", $dir_file) {
- warn "ERROR: Unable to append to file \"$dir_file\"!";
- return;
- }
- print $fh '"' . escape_filename($file) . '"' . "\n";
- close $fh;
- $changed_dirs = 1;
- } else {
- my $cksum_output = get_cksum $fullpath;
- return if !defined $cksum_output;
- my $cksum_file = "$dir/$CKSUM_FILE";
- my $fh;
- if (!open $fh, ">>", $cksum_file) {
- warn "ERROR: Unable to append to file \"$cksum_file\"!";
- return;
- }
- print $fh $cksum_output . ' "' . escape_filename($file) . '"' . "\n";
- close $fh;
- $changed_files = 1;
- }
- print "Added \"$fullpath\"\n" if !$args->{"q"};
- }, sub {
- if (-f "$_[0]/$DOUBLE_CKSUM_FILE") {
- if (!check_cksums $_[0], $DOUBLE_CKSUM_FILE, 1) {
- warn "Checksum files corrupt in \"$_[0]\", not adding new checksums!\n";
- return 0;
- }
- } else {
- write_cksums($_[0], {}, 1, 1);
- }
- return 1;
- }, sub {
- if ($changed_dirs) {
- write_special_cksums $_[0], $DIR_FILE;
- $changed_dirs = 0;
- }
- if ($changed_files) {
- write_special_cksums $_[0], $CKSUM_FILE;
- $changed_files = 0;
- }
- };
-# write the "checksums" in $contents to $path
-# if $is_cksum_file is set, the value each of the keys in $contents points
-# to is written before the key
-sub write_file {
- my ($path, $contents, $is_cksum_file) = @_;
- my $fh;
- if (!open $fh, ">", $path) {
- warn "ERROR: Unable to open \"$path\" for writing!";
- return;
- }
- foreach my $filename (keys %$contents) {
- if ($is_cksum_file) {
- print $fh "$contents->{$filename} ";
- }
- print $fh '"' . escape_filename($filename) . '"' . "\n";
- }
- close $fh;
-# write the checksums in $contents to the file at $path
-sub write_cksum_file {
- my ($path, $contents) = @_;
- write_file $path, $contents, 1;
-# write the checksums in $contents to $dir
-# any keys that point to undef are taken to be directories and vice versa
-# $files_modified and $dirs_modified control which of the special lumia
-# files actually get written
-# note: this doesn't use write_file, etc. in order to (possibly) be a bit more efficient
-sub write_cksums {
- my ($dir, $contents, $files_modified, $dirs_modified) = @_;
- # No, this isn't efficient...
- my @special_files;
- my $dirs_fh;
- my $files_fh;
- if ($files_modified) {
- my $path = "$dir/$CKSUM_FILE";
- if (!open $files_fh, ">", $path) {
- warn "ERROR: Unable to open \"$path\" for writing!";
- return;
- }
- push @special_files, $CKSUM_FILE;
- }
- if ($dirs_modified) {
- my $path = "$dir/$DIR_FILE";
- if (!open $dirs_fh, ">", $path) {
- warn "ERROR: Unable to open \"$path\" for writing!";
- return;
- }
- push @special_files, $DIR_FILE;
- }
- foreach my $key (keys %$contents) {
- if ($files_modified && defined $contents->{$key}) {
- print $files_fh $contents->{$key} . ' "' . escape_filename($key) . '"' . "\n";
- } elsif ($dirs_modified && !defined $contents->{$key}) {
- print $dirs_fh '"' . escape_filename($key) . '"' . "\n";
- }
- }
- close $files_fh if defined $files_fh;
- close $dirs_fh if defined $dirs_fh;
- if (@special_files) {
- write_special_cksums $dir, @special_files;
- }
-# show all files that are present in the checksum files but don't exist on the filesystem anymore
-sub check_old_files {
- my $top_dir = shift;
- my $iter = make_lumia_iter 1, $top_dir;
- while (my $dir = $iter->()) {
- if (-e $dir) {
- my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}) // {};
- foreach my $file (keys %$cksums) {
- if (!-e "$dir/$file") {
- warn "Nonexistent file: \"$dir/$file\"!\n";
- }
- }
- } else {
- warn "Nonexistent directory: \"$dir\"!\n";
- }
- }
-# clean up the lumia checksum files, removing any files that aren't present
-# on the filesystem anymore
-sub remove_old_files {
- my ($top_dir, $args) = @_;
- my $iter = make_lumia_iter 1, $top_dir;
- while (my $dir = $iter->()) {
- if (!-e $dir) {
- my $parent = dirname $dir;
- my $child = basename $dir;
- my $lumia_dirs = read_file("$parent/$DIR_FILE", {}) // {};
- if (exists $lumia_dirs->{$child}) {
- delete $lumia_dirs->{$child};
- write_file "$parent/$DIR_FILE", $lumia_dirs;
- print "Removed \"$dir\" from \"$parent/$DIR_FILE\"\n" if !$args->{"q"};
- write_special_cksums $parent, $DIR_FILE;
- }
- } else {
- my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}) // {};
- my $found = 0;
- foreach my $file (keys %$cksums) {
- if (!-e "$dir/$file") {
- delete $cksums->{$file};
- print "Removed \"$dir/$file\" from \"$dir/$CKSUM_FILE\"\n" if !$args->{"q"};
- $found = 1;
- }
- }
- if ($found) {
- write_cksum_file "$dir/$CKSUM_FILE", $cksums;
- write_special_cksums $dir, $CKSUM_FILE;
- }
- }
- }
-# sort the given paths into hash based on the dirname
-# returns: a hash with the keys being the dirnames of the given paths and
-# each one pointing to an array containing the basenames of all paths
-# that had this dirname
-sub sort_by_dir {
- my %sorted_files;
- foreach my $file (@_) {
- if (!-e $file) {
- warn "ERROR: Source file \"$file\" doesn't exist.\n";
- next;
- }
- my $dir = dirname($file);
- if (!exists($sorted_files{$dir})) {
- $sorted_files{$dir} = [];
- }
- push(@{$sorted_files{$dir}}, basename($file));
- }
- return \%sorted_files;
-# check if $dst exists and prompt the user whether it should be overwritten
-# returns 0 if it can be overwritten or doesn't exist, 1 if it shouldn't be overwritten
-sub prompt_overwrite {
- my $dst = shift;
- if (-e $dst) {
- print STDERR "WARNING: \"$dst\" exists already. Do you want to replace it? (y/n) ";
- my $choice = "";
- while ($choice ne "y" && $choice ne "n") {
- $choice = <STDIN>;
- chomp $choice;
- }
- if ($choice eq "n") {
- warn "Not overwriting \"$dst\"\n";
- return 1;
- } else {
- return 0;
- }
- }
- return 0;
-# copies the $src files to $dst and updates the checksums in $dst
-# $src: list of source paths
-# $dst: destination directory or file (in latter case only one src is allowed)
-sub copy_files {
- my ($src, $dst, $args) = @_;
- my $dst_dir = $dst;
- if (!-d $dst) {
- $dst_dir = dirname $dst;
- }
- my $diff_name = 0;
- # check if the file/dir is getting a different name or
- # just being copied into a different directory
- if (!-d $dst && !-d $src->[0]) {
- $diff_name = 1;
- }
- if (!-e $dst && -d $src->[0]) {
- $diff_name = 1;
- }
- my $dst_cksums = read_cksums $dst_dir;
- return if !defined $dst_cksums;
- my $src_sorted = sort_by_dir(@$src);
- my $files_touched = 0;
- my $dirs_touched = 0;
- foreach my $src_dir (keys %$src_sorted) {
- my $src_cksums = read_cksums $src_dir;
- next if !defined $src_cksums;
- foreach my $src_file (@{$src_sorted->{$src_dir}}) {
- my $src_path = "$src_dir/$src_file";
- my $dst_path = $diff_name ? $dst : "$dst_dir/$src_file";
- if (-d $dst_path && -d $src_path) {
- warn "ERROR: Cannot copy directory to already existing directory\n";
- next;
- }
- if (exists $SPECIAL_FILES{$src_file} || exists $SPECIAL_FILES{basename $dst_path}) {
- warn "ERROR: Not copying special file\n";
- next;
- }
- next if !$args->{"f"} && prompt_overwrite($dst_path);
- my $options = $args->{"v"} ? "-av" : "-a";
- next if system("cp", $options, "--", $src_path, $dst);
- if (-d $src_path) {
- $dirs_touched = 1;
- } else {
- $files_touched = 1;
- }
- if (exists $src_cksums->{$src_file}) {
- if ($diff_name) {
- $dst_cksums->{basename $dst} = $src_cksums->{$src_file};
- } else {
- $dst_cksums->{$src_file} = $src_cksums->{$src_file};
- }
- } else {
- warn "WARNING: \"$src_path\" not in cksum or directory list\n";
- }
- }
- }
- write_cksums $dst_dir, $dst_cksums, $files_touched, $dirs_touched;
-# move a file (or directory) from $src to $dst, prompting for confirmation if $dst already exists;
-# automatically appends the basename of $src to $dst if $dst is a directory
-sub move_file {
- my ($src, $dst, $args) = @_;
- if (exists $SPECIAL_FILES{basename $src} || exists $SPECIAL_FILES{basename $dst}) {
- warn "ERROR: Not moving special file\n";
- return 1;
- }
- if (-d $dst) {
- $dst .= "/" . basename($src);
- }
- return 1 if !$args->{"f"} && prompt_overwrite($dst);
- my $ret;
- if ($args->{"v"}) {
- $ret = system("mv", "-v", "--", $src, $dst);
- } else {
- $ret = system("mv", "--", $src, $dst);
- }
- return 1 if $ret;
- if (-e $src) {
- warn "ERROR: file could not be removed from source but will still be " .
- "removed from checksum database\n";
- }
- return 0;
-# move all files/directories in $src_files from $src_dir to $dst_dir ($src_files
-# only contains the basenames of the files), removing them from the checksum files
-# in $src_dir and adding them to $dst_cksums
-sub move_from_same_dir {
- my ($src_dir, $src_files, $dst_cksums, $dst_dir, $args) = @_;
- my $src_cksums = read_cksums $src_dir;
- return if !defined $src_cksums;
- my $files_touched = 0;
- my $dirs_touched = 0;
- foreach my $src_file (@$src_files) {
- my $fullpath = "$src_dir/$src_file";
- my $tmp_dirs_touched = 0;
- my $tmp_files_touched = 0;
- if (-d $fullpath) {
- $tmp_dirs_touched = 1;
- } else {
- $tmp_files_touched = 1;
- }
- next if move_file($fullpath, $dst_dir, $args);
- # need to be able to check if the path is a directory
- # before actually moving it
- $dirs_touched ||= $tmp_dirs_touched;
- $files_touched ||= $tmp_files_touched;
- if (exists $src_cksums->{$src_file}) {
- $dst_cksums->{$src_file} = $src_cksums->{$src_file};
- delete $src_cksums->{$src_file};
- } else {
- warn "WARNING: \"$src_dir/$src_file\" not in cksum or directory list.\n";
- }
- }
- write_cksums $src_dir, $src_cksums, $files_touched, $dirs_touched;
- return ($files_touched, $dirs_touched);
-# rename a single file or directory from $src to $dst
-sub move_rename {
- my ($src, $dst, $args) = @_;
- my $src_dir = dirname $src;
- my $dst_dir = dirname $dst;
- my $src_file = basename $src;
- my $dst_file = basename $dst;
- my $src_cksums = read_cksums $src_dir;
- return if !defined $src_cksums;
- my $dst_cksums = {};
- # if a file is simply being renamed in the same dir, the cksums
- # should only be loaded and written once
- if ($src_dir eq $dst_dir) {
- %$dst_cksums = %$src_cksums;
- delete $dst_cksums->{$src_file};
- } else {
- $dst_cksums = read_cksums $dst_dir;
- return if !defined $dst_cksums;
- }
- my $files_touched = 0;
- my $dirs_touched = 0;
- if (-d $src) {
- $dirs_touched = 1;
- } else {
- $files_touched = 1;
- }
- return if move_file($src, $dst, $args);
- if (exists($src_cksums->{$src_file})) {
- $dst_cksums->{$dst_file} = $src_cksums->{$src_file};
- delete $src_cksums->{$src_file};
- } else {
- warn "WARNING: \"$src\" not in cksum or directory list.\n";
- }
- write_cksums $dst_dir, $dst_cksums, $files_touched, $dirs_touched;
- if ($src_dir ne $dst_dir) {
- write_cksums $src_dir, $src_cksums, $files_touched, $dirs_touched;
- }
-# move all files and directories in $src to $dst
-# - if $dst does not exist, $src is only allowed to contain one path, which is
-# renamed to $dst
-# - if $dst is a file, $src is only allowed to contain a single path (which
-# must be a file), which is renamed to $dst
-# - otherwise, all files and directories in $src are moved to $dst
-# $src: list of source paths
-# $dst: destination directory or file (in latter case only one src is allowed)
-sub move_files {
- my ($src, $dst, $args) = @_;
- if (!-d $dst && $#$src != 0) {
- die "move: only one source argument allowed when destination is a file\n";
- }
- if (!-d $dst && !-d $src->[0]) {
- move_rename $src->[0], $dst, $args;
- return;
- }
- if (!-e $dst && -d $src->[0]) {
- move_rename $src->[0], $dst, $args;
- return;
- }
- if (-e $dst && !-d $dst && -d $src->[0]) {
- die "move: can't move directory to file\n";
- }
- # Separate files by current dir so the cksum and dir files only need to be opened once
- my $src_files = sort_by_dir(@$src);
- my $dst_cksums = read_cksums $dst;
- return if !defined $dst_cksums;
- my $files_touched = 0;
- my $dirs_touched = 0;
- foreach my $src_dir (keys %$src_files) {
- my ($tmp_files_touched, $tmp_dirs_touched) = move_from_same_dir $src_dir, $src_files->{$src_dir}, $dst_cksums, $dst, $args;
- $files_touched ||= $tmp_files_touched;
- $dirs_touched ||= $tmp_dirs_touched;
- }
- write_cksums $dst, $dst_cksums, $files_touched, $dirs_touched;
-# remove a file or directory from the filesystem
-sub remove_file_dir {
- my ($path, $args) = @_;
- my $options = $args->{"f"} ? "-rf" : "-r";
- if (system("rm", $options, "--", $path)) {
- return 1;
- }
- if (-e $path) {
- warn "ERROR: Unable to remove \"$path\" from filesystem but " .
- "will still be removed from checksum database\n";
- }
- return 0;
-# remove all files in one directory, updating the checksum files in the process
-# note: the files are only allowed to be basenames, i.e., they must be the
-# actual filenames present in the checksum files
-sub remove_from_same_dir {
- my ($args, $dir, @files) = @_;
- my $cksums = read_cksums $dir;
- return if !defined $cksums;
- my $dirs_touched = 0;
- my $files_touched = 0;
- foreach my $file (@files) {
- if (exists $SPECIAL_FILES{$file}) {
- warn "ERROR: not removing special file $file\n";
- next;
- }
- my $fullpath = "$dir/$file";
- if (!-e $fullpath) {
- warn "\"$fullpath\": No such file or directory.\n";
- }
- next if remove_file_dir($fullpath, $args);
- if (exists $cksums->{$file}) {
- if (defined $cksums->{$file}) {
- $files_touched = 1;
- } else {
- $dirs_touched = 1;
- }
- delete $cksums->{$file};
- } else {
- warn "WARNING: \"$file\" not in cksum or directory list.\n";
- }
- }
- write_cksums $dir, $cksums, $files_touched, $dirs_touched;
-# remove all given files and directories, updating the appropriate checksum
-# files in the process
-sub remove_files {
- my $args = shift;
- my $sorted_files = sort_by_dir(@_);
- foreach my $dir (keys %$sorted_files) {
- remove_from_same_dir($args, $dir, @{$sorted_files->{$dir}});
- }
-# create the given directories, initializing them with empty checksum files
-# note: does not work like "mkdir -p", i.e., the new directories have to
-# be located inside already existing directories
-sub make_dirs {
- my @created_dirs;
- foreach (@_) {
- if (system("mkdir", "--", $_)) {
- warn "ERROR creating directory $_\n";
- next;
- }
- push(@created_dirs, $_);
- }
- # Separate files by current dir so the cksum and dir files only need to be opened once
- my %dirs;
- foreach my $dir (@created_dirs) {
- write_cksums $dir, {}, 1, 1;
- my $parent = dirname($dir);
- if (!exists($dirs{$parent})) {
- $dirs{$parent} = [];
- }
- push(@{$dirs{$parent}}, basename($dir));
- }
- foreach my $parent (keys %dirs) {
- my $parent_dirs = read_file "$parent/$DIR_FILE", {};
- next if !defined $parent_dirs;
- foreach my $dir (@{$dirs{$parent}}) {
- $parent_dirs->{$dir} = "";
- }
- write_file "$parent/$DIR_FILE", $parent_dirs;
- write_special_cksums $parent, $DIR_FILE;
- }
-# extract all special lumia files from $src_dir to $dst_dir, recreating the
-# entire directory structure in the process
-sub extract {
- my ($src_dir, $dst_dir, $args) = @_;
- my $iter = make_lumia_iter 0, $src_dir;
- my $options = $args->{"v"} ? "-av" : "-a";
- while (my $dir = $iter->()) {
- my $final_dir = abs2rel $dir, $src_dir;
- my $fulldir = catfile $dst_dir, $final_dir;
- system("mkdir", "-p", "--", $fulldir);
- foreach my $file (keys %SPECIAL_FILES) {
- my $filepath = catfile $dir, $file;
- if (-e $filepath) {
- system("cp", $options, "--", $filepath, catfile($fulldir, $file));
- }
- }
- }
-# update the checksums of the given files
-# ignores any directories given as arguments
-sub update {
- my @files;
- foreach (@_) {
- if (-d $_) {
- warn "Ignoring directory \"$_\"\n";
- } else {
- push @files, $_;
- }
- }
- my $sorted_files = sort_by_dir @files;
- foreach my $dir (keys %$sorted_files) {
- my $cksums = read_cksum_file "$dir/$CKSUM_FILE", {};
- next if !defined $cksums;
- my $changed = 0;
- foreach my $file (@{$sorted_files->{$dir}}) {
- my $cksum_output = get_cksum "$dir/$file";
- next if !defined $cksum_output;
- $cksums->{$file} = $cksum_output;
- $changed = 1;
- }
- if ($changed) {
- write_cksum_file "$dir/$CKSUM_FILE", $cksums;
- write_special_cksums $dir, $CKSUM_FILE;
- }
- }
-sub update_special {
- my $dir = shift;
- write_special_cksums $dir, $CKSUM_FILE, $DIR_FILE;
-my %args;
-GetOptions(\%args, "f|force", "q|quiet", "v|verbose", "h|help");
-pod2usage(-exitval => 0, -verbose => 2) if $args{"h"};
-pod2usage(-exitval => 1, -verbose => 1) if @ARGV < 1;
-my $cmd = shift;
-if ($cmd eq "mv") {
- die "mv requires at least two arguments\n" if @ARGV < 2;
- my @src = @ARGV[0..$#ARGV-1];
- move_files \@src, $ARGV[-1], \%args;
-} elsif ($cmd eq "rm") {
- die "rm requires at least one argument\n" if @ARGV < 1;
- remove_files \%args, @ARGV;
-} elsif ($cmd eq "addnew") {
- my $dir = @ARGV ? $ARGV[0] : ".";
- check_add_new_files $dir, \%args;
-} elsif ($cmd eq "checknew") {
- my $dir = @ARGV ? $ARGV[0] : ".";
- check_new_files $dir;
-} elsif ($cmd eq "checkold") {
- my $dir = @ARGV ? $ARGV[0] : ".";
- check_old_files $dir;
-} elsif ($cmd eq "rmold") {
- my $dir = @ARGV ? $ARGV[0] : ".";
- remove_old_files $dir, \%args;
-} elsif ($cmd eq "check") {
- if (@ARGV < 1) {
- check_files \%args, ".";
- } else {
- check_files \%args, @ARGV;
- }
-} elsif ($cmd eq "clean") {
- my $dir = @ARGV ? $ARGV[0] : ".";
- clean_files $dir, \%args;
-} elsif ($cmd eq "extract") {
- my $src_dir = ".";
- my $dst_dir;
- if (@ARGV == 2) {
- $src_dir = $ARGV[0];
- $dst_dir = $ARGV[1];
- } elsif (@ARGV == 1) {
- $dst_dir = $ARGV[0];
- } else {
- die "Invalid number of arguments\n";
- }
- if (!-d $src_dir) {
- die "ERROR: Directory \"$src_dir\" does not exist.\n";
- }
- if (!-d $dst_dir) {
- die "ERROR: Directory \"$dst_dir\" does not exist.\n";
- }
- extract $src_dir, $dst_dir;
-} elsif ($cmd eq "cp") {
- die "cp requires at least two arguments\n" if @ARGV < 2;
- my @src = @ARGV[0..$#ARGV-1];
- copy_files \@src, $ARGV[-1], \%args;
-} elsif ($cmd eq "mkdir") {
- die "mkdir requires at least one argument\n" if @ARGV < 1;
- make_dirs @ARGV;
-} elsif ($cmd eq "update") {
- die "update requires at least one argument\n" if @ARGV < 1;
- update @ARGV;
-} elsif ($cmd eq "updatespecial") {
- die "Invalid number of arguments\n" if @ARGV > 1;
- my $dir = @ARGV ? $ARGV[0] : ".";
- update_special $dir;
-} else {
- pod2usage(-exitval => 1, -verbose => 1);
-=head1 NAME
- - Manage checksums on a filesystem
-=head1 SYNOPSIS
-B<> command [-hqfv] arguments
-=head1 OPTIONS
-=over 8
-=item B<-h>, B<--help>
-Show the full documentation.
-=item B<-q>, B<--quiet>
-Only output errors.
-=item B<-f>, B<--force>
-Overwrite files without prompting for confirmation.
-=item B<-v>, B<--verbose>
-Print each file that is processed by the command.
-See the full documentation for details on which commands support which options
-and what they do.
-It does not matter if the options are written before or after the command.
-If C<--> is written anywhere on the command line, option parsing is stopped,
-so that files starting with a hyphen can still be specified.
-Note that C<-q> and C<-v> aren't exactly opposites - C<-q> applies to commands
-like B<check>, where it suppresses printing of the individual files, while
-C<-v> applies to commands like B<cp>, where it is just passed on to the system
-command called in the background.
-Note further that this is very inconsistent, like the rest of the program, but
-the author has made too many bad decisions to rectify that problem at the moment.
-=head1 COMMANDS
-Note that some commands support multiple files/directories as arguments and others,
-for which it would make just as much sense, don't. That's just the way it is.
-=over 8
-=item B<addnew> [-q] [directory]
-Walks through B<directory>, adding all new files to the checksum database.
-B<directory> defaults to the current directory.
-C<-q> suppresses the printing of each file or directory as it is added.
-=item B<checknew> [directory]
-Walks through B<directory>, printing all files that aren't part of the checksum
-database. B<directory> defaults to the current directory.
-=item B<checkold> [directory]
-Prints all files in the checksum database that do not exist on the filesystem anymore.
-B<directory> defaults to the current directory.
-=item B<rmold> [-q] [directory]
-Removes all files found by B<checkold> from the database. B<directory> defaults to
-the current directory.
-C<-q> suppresses the printing of each file as it is removed.
-=item B<check> [-q] file/directory ...
-Verifies the checksums of all files given, recursing through any directories. If no
-files or directories are given, the current directory is used.
-Note that the checksum database in the corresponding directory will be read again for
-every file given on the command line, even if 1000 files in the same directory are given.
-This problem does not occur when recursing through directories, so it is best to only
-give files directly when checking a few. This problem wouldn't be too difficult to
-fix, but, frankly, I'm too lazy, especially since I only added the feature to check
-files individually as a convenience when I want to quickly check a single file in a
-large directory.
-To explain why it is this way: The directory recursion is done using an iterator, which
-has the directories pushed onto its queue in the beginning. The iterator only returns
-directories, which are then checked all in one go, but this means that files given on
-the command line need to be handled specially.
-C<-q> suppresses the printing of all good checksums but still allows a message to
-be printed when a checksum failed.
-=item B<clean> [-q] [directory]
-Removes all lumia special files used to store the checksum database from B<directory>
-recursively. B<directory> defaults to the current directory.
-Note that this recurses through the entire directory tree, not just the part that is
-actually linked together by the checksum database.
-Warning: This just blindly removes all files with one of the special lumia names,
-even if they weren't actually created by
-C<-q> suppresses the printing of each file as it is deleted.
-=item B<extract> [-v] [source] destination
-Recreates the entire directory structure from B<source> in B<destination>, but only
-copies the special files used to store the checksum database. B<source> defaults to
-the current directory.
-C<-v> prints each file as it is copied.
-Note that this overwrites files in the destination directory without confirmation.
-=item B<mkdir> directory ...
-Creates the given directories, initializing them with empty checksum database files.
-=item B<update> file ...
-Recalculates the checksums for the given files and replaces them in the database.
-Note: Directories given as arguments are ignored.
-This is mainly meant to quickly "touch" a file after it was modified (e.g. a
-notes file that is occasionally updated).
-=item B<updatespecial> [directory]
-Recalculates the checksums for the special files C<.lumidify_archive_dirs> and
-C<.lumidify_archive_cksums> and writes them to C<.lumidify_archive_cksums.cksum>.
-B<directory> defaults to the current directory.
-This is only meant to be used if, for some reason, the checksum files had to
-be edited manually and thus don't match the checksums in C<.lumidify_archive_cksums.cksum>
-=item B<rm> [-f] file ...
-Removes the given files and directories recursively from the filesystem and
-checksum database. The following caveats apply:
-If any actual errors occur while deleting the file/directory (i.e. the system
-command C<rm> returns a non-zero exit value), the checksum or directory B<is
-left in the database>. If the system C<rm> does not return a non-zero exit value,
-but the file/directory still exists afterwards (e.g. there was a permission
-error and the user answered "n" when prompted), a warning message is printed,
-but the files B<are removed from the database> (if the database can be
-written to).
-It is an error if there are no checksum database files in the directory
-of a file named on the command line.
-C<-f> is passed through to the system C<rm> command.
-=item B<cp> [-vf] source target
-=item B<cp> [-vf] source ... directory
-Copies the given source files, updating the checksum database in the process.
-If the last argument is a file, there must be only one source argument, also a file,
-which is then copied to the target.
-If the last argument is a directory, all source arguments are copied into it.
-It is an error if a source or destination directory does not contain any
-checksum database files.
-B<cp> will issue a warning and skip to the next argument if it is asked to
-merge a directory with an already existing directory. For instance, attempting
-to run C<cp dir1 dir2>, where C<dir2> already contains a directory named
-C<dir1>, will result in an error. This may change in the future, when the
-program is modified to recursively copy the files manually, instead of simply
-calling the system C<cp> on each of the arguments. If this was supported in
-the current version, none of the checksums inside that directory would be
-updated, so it wouldn't be very useful.
-C<-v> is passed through to the system C<cp> command.
-C<-f> silently overwrites files without prompting the user, much like the
-C<-f> option in the system C<cp> command. This is handled manually by the
-program, though, in order to actually determine what the user chose. See
-also the caveat mentioned above.
-=item B<mv> [-f] source target
-=item B<mv> [-f] source ... directory
-Moves the given source files, updating the checksum database in the process.
-If the last argument is a file or does not exist, there must be only one source
-argument, which is renamed to the target name.
-If the last argument is an existing directory, all source arguments are moved
-into it.
-It is an error if a source or destination directory does not contain any
-checksum database files.
-B<mv> behaves the same as B<rm> with regards to checking if the source file
-is still present after the operation and other error handling.
-C<-f> is handled in the same manner as with B<cp>.
- is meant for managing checksums of files in order to prevent bitrot.
-It does this by storing several special files in each directory to keep track
-of the checksums:
-=over 8
-=item B<.lumidify_archive_cksums>
-Contains the checksums of all files in the directory.
-=item B<.lumidify_archive_dirs>
-Contains a list of all directories in the directory.
-=item B<.lumidify_archive_cksums.cksum>
-Contains the checksums of B<.lumidify_archive_cksums> and B<.lumidify_archive_dirs>
-in order to provide two-layer protection against bitrot.
-=item B<.lumidify_archive_ignore>
-Contains a list of files and directories that should be ignored by
-Note that this is only read and never written to, unless the command B<clean>
-is used. It is, however, still copied over by the B<extract> command.
-When the documentation for the commands talks about the "checksum database",
-it simply means these files.
-All file/directory names are enclosed in quotes, with any backslashes or quotes
-inside the name escaped with another backslash. The names are allowed to have
-newlines in them.
-The list files only contain a list of filenames, with a newline between the
-closing quote of one name and the opening quote of the next one.
-The checksum files additionally contain the output of the checksum program
-used and a space before the starting quote of the filename.
-There are already several programs that can be used to check for bitrot,
-as listed in L</"SEE ALSO">. However, all programs I tried either were
-much too complicated for my taste or just did everything behind my back.
-I wanted a simple tool that did exactly what I told it to and also allowed
-me to keep the old checksums when reorganizing files, in order to avoid
-regenerating the checksums from corrupt files. Since I couldn't find those
-features in any program I tried, I wrote my own.
-It may strike some readers as a peculiar idea to save the checksum files in
-I<every single directory>, but this choice was made after much deliberation.
-The other option I could think of was to have one big database, but that
-would have made all commands much more difficult to implement and additionally
-necessitated opening the entire database for every operation. With individual
-files in each directory, operations like B<cp> become quite trivial (ignoring
-all the edge cases) since only the toplevel checksums need to be copied to
-the new destination, and any subdirectories already contain the checksums.
-This method is not without its drawbacks, however. The most glaring problem
-I have found is that there is no way to store the checksums of read-only
-directories or any special directories that cannot be littered with the
-checksum files because that would clash with other software. Despite these
-drawbacks, however, I decided to stick with it because it works for almost
-all cases and doesn't have any of the serious drawbacks that other options
-would have had.
-The names of the special files were chosen to be ".lumidify_archive*" not
-out of vanity, but mainly because I couldn't think of any regular files
-with those names, making them a good choice to avoid clashes.
-The name of the program, C<> (for "lumidify archive"), was similarly
-chosen because it did not clash with any programs installed on my system and
-thus allowed for easy tab-completion.
-By default, the simple cksum algorithm is used to get the checksums. This
-is not very secure, but the main purpose of the program is to prevent
-bitrot, for which cksum should be sufficient, especially since it is much
-faster than other algorithms.
-There is currently no convenient way to change the algorithm other than
-changing the $CKSUM_CMD and $CKSUM_NUMFIELDS variables at the top of $CKSUM_CMD must be the command that returns the checksum
-when it is given a file, and $CKSUM_NUMFIELDS specifies the number of
-space-separated fields the checksum consists of. This has to be specified
-in order to determine where the checksum ends and the filename begins in
-the output. This would be redundant if all implementations of cksum
-supported '-q' for outputting only the checksum, but that only seems to
-be supported by some implementations.
-=over 8
-=item B<Security auditing>
-This program is B<NOT> designed to provide any security auditing, as should
-be clear from the fact that the checksums are stored right in the same
-directory as the files. See mtree(8) for that.
-If you want to, however, you could set $CKSUM_CMD to a secure hash (not cksum)
-and B<extract> the checksums to a separate directory, which you keep in a
-safe place. You could then use the regular C<cp> command to simply replace
-all the checksums with the ones from your backup, in case an attacker modified
-the checksum database in the directory with the actual files you're trying to
-protect. I don't know if there would be any point in doing that, though.
-=item B<Managing archives>
-This is the purpose I wrote the program for.
-You can simply initialize your archive directory with the B<addnew> command.
-Whenever you add new files, just run B<addnew> again. If you want to reorganize
-the archive, you can use the limited commands available.
-I usually just use rsync(1) to copy the entire archive directory over to other
-backup drives and then use the B<check> command again on the new drive.
-I also have checksums for the main data directory on my computer (except for
-things like git repositories, which I don't want littered with the database
-files). Here, I use the B<update> command for files that I edit more often
-and occasionally run B<check> on the entire directory.
-Since the database files are written in each directory, you can run the
-B<addnew> command in any subdirectory when you've added new files there.
-Due to the extensive use of iterators and the author's bad life choices,
-some functions, such as B<addnew> and B<check>, run more slowly than they
-would if they were programmed more efficiently, especially on many small
-files and folders. Too bad.
-This program was written on OpenBSD. It will probably work on most other
-reasonably POSIX-Compliant systems, although I cannot guarantee anything.
-$CKSUM_CMD may need to be modified at the top of The file
-operation commands are called directly with system(), so those need to
-be available.
-It will most certainly not work on Windows, but that shouldn't be a
-problem for anyone important.
-=head1 BUGS
-All system commands (unless I forgot some) are called with "--" before
-listing the actual files, so files beginning with hyphens should be
-supported. I have tested the commands with filenames starting with spaces
-and hyphens and also containing newlines, but there may very well be issues
-still. Please notify me if you find any filenames that do not work. Handling
-filenames properly is difficult.
-There are probably many other edge cases, especially in the B<mv>, B<cp>,
-and B<rm> commands. Please notify me if you find an issue.
-Operations on files containing newlines may cause Perl to print a warning
-"Unsuccessful stat on filename containing newline" even though nothing is
-wrong since (as described in B<mv> and B<rm>) existence of the file is
-checked afterwards. I didn't feel like disabling warnings, and no normal
-person should be working with files containing newlines anyways, so that's
-the way it is.
-Always 0, unless the arguments given were invalid. We don't do errors around here.
-On a more serious note - I should probably change that at some point.
-For the time being, if you want to run B<check> in a script, you can test
-the output printed when the C<-q> option is used, since this won't output
-anything if there are no errors. Do note, though, that actual errors (file not
-found, etc.) are printed to STDERR, while incorrect checksums are printed
-=head1 SEE ALSO
-par2(1), mtree(8), aide(1), bitrot(no man page)
-=head1 LICENSE
-Copyright (c) 2019, 2020, 2021 lumidify <nobody[at]>
-Permission to use, copy, modify, and/or distribute this software for any
-purpose with or without fee is hereby granted, provided that the above
-copyright notice and this permission notice appear in all copies.
diff --git a/tests/README b/tests/README
@@ -5,3 +5,6 @@ To run all tests, simply run
There aren't that many tests yet. Maybe I'll get around
to adding more tests for all the edge cases someday.
+These only work properly on OpenBSD.
+Maybe I'll fix that someday.