lumia

Archive checksum manager
git clone git://lumidify.org/lumia.git (fast, but not encrypted)
git clone https://lumidify.org/git/lumia.git (encrypted, but very slow)
Log | Files | Refs | README | LICENSE

commit bfbb35d091d986579edafe7b01ce0ce12dc1a27d
parent 0cae73baf7706b733eec0fadee2e11c7eb25b998
Author: lumidify <nobody@lumidify.org>
Date:   Fri, 17 Dec 2021 09:11:55 +0100

Add Makefile; improve documentation

Diffstat:
A.gitignore | 1+
ACHANGELOG | 4++++
AMakefile | 37+++++++++++++++++++++++++++++++++++++
MREADME | 11+++++++++--
Alumia | 1403+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dlumia.pl | 1403-------------------------------------------------------------------------------
Mtests/README | 3+++
7 files changed, 1457 insertions(+), 1405 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +lumia.1 diff --git a/CHANGELOG b/CHANGELOG @@ -0,0 +1,4 @@ +1.0 -> 1.1 +* Made it work with cksum implementation that don't have '-q' +* Added Makefile +* Improved documentation diff --git a/Makefile b/Makefile @@ -0,0 +1,37 @@ +.POSIX: + +NAME = lumia +VERSION = 1.1 + +PREFIX = /usr/local +MANPREFIX = ${PREFIX}/man + +MAN1 = ${NAME:=.1} +MISCFILES = Makefile README LICENSE CHANGELOG tests + +${MAN1}: ${NAME} + pod2man ${NAME} ${MAN1} + +install: ${MAN1} + mkdir -p "${DESTDIR}${PREFIX}/bin" + cp -f ${NAME} "${DESTDIR}${PREFIX}/bin" + chmod 755 "${DESTDIR}${PREFIX}/bin/${NAME}" + mkdir -p "${DESTDIR}${MANPREFIX}/man1" + cp -f ${MAN1} "${DESTDIR}${MANPREFIX}/man1" + chmod 644 "${DESTDIR}${MANPREFIX}/man1/${MAN1}" + +uninstall: + rm -f "${DESTDIR}${PREFIX}/bin/${NAME}" + rm -f "${DESTDIR}${MANPREFIX}/man1/${MAN1}" + +clean: + rm -f ${MAN1} + +dist: + rm -rf "${NAME}-${VERSION}" + mkdir -p "${NAME}-${VERSION}" + cp -rf ${NAME} ${MISCFILES} "${NAME}-${VERSION}" + tar cf - "${NAME}-${VERSION}" | gzip -c > "${NAME}-${VERSION}.tar.gz" + rm -rf "${NAME}-${VERSION}" + +.PHONY: all clean install uninstall dist diff --git a/README b/README @@ -1,3 +1,10 @@ -REQUIREMENTS: String::ShellQuote +REQUIREMENTS: Perl 5, String::ShellQuote -See the perldoc in lumia.pl for documentation (run perldoc -F lumia.pl). +lumia is meant for managing checksums of files in order to prevent bitrot. +It does this by storing several special files in each directory to keep +track of the checksums. + +See the perldoc in lumia for the documentation (run perldoc -F lumia.pl). +Alternatively, install it with 'make install' (as root; this just generates +a man page and copies lumia and the generated man page to the appropriate +system directories) and then run 'man lumia'. diff --git a/lumia b/lumia @@ -0,0 +1,1403 @@ +#!/usr/bin/env perl + +# TODO: some way to avoid writing .lumidify* in dirs but still index them? +# TODO: store modified date and checksum files with changed date +# TODO: add option to just check dir structure or maybe check if everything exists +# TODO: add option to compare cksums of two dirs +# TODO: exit status! + +use strict; +use warnings; +use File::Spec::Functions qw(catfile abs2rel); +use File::Basename qw(basename dirname); +use String::ShellQuote; +use Pod::Usage; +use Getopt::Long; + +# the file used to store checksums for files +my $CKSUM_FILE = ".lumidify_archive_cksums"; +# the file used to store directory names +my $DIR_FILE = ".lumidify_archive_dirs"; +# the file read to ignore files or directories +my $IGNORE_FILE = ".lumidify_archive_ignore"; +# the file containing checksums of $CKSUM_FILE and $DIR_FILE +my $DOUBLE_CKSUM_FILE = ".lumidify_archive_cksums.cksum"; + +# uncomment this instead of the lines below to use +# sha256 instead of cksum as the hash algorithm +# Note: this isn't really tested properly +#my $CKSUM_CMD = 'sha256 -q'; +#my $CKSUM_NUMFIELDS = 1; +my $CKSUM_CMD = 'cksum'; +my $CKSUM_NUMFIELDS = 2; + +my %SPECIAL_FILES = ( + $CKSUM_FILE => 1, + $DIR_FILE => 1, + $IGNORE_FILE => 1, + $DOUBLE_CKSUM_FILE => 1 +); + +# escape a filename for writing into the checksum files +sub escape_filename { + my $file = shift; + $file =~ s/\\/\\\\/g; + $file =~ s/"/\\"/g; + return $file; +} + +# make a generic file iterator +# $file_func determines whether a file should be returned by the iterator +# $dir_func is called for each directory and returns all files that +# should be added to the queue +sub make_file_iter { + my ($file_func, $dir_func, @queue) = @_; + return sub { + while (@queue) { + my $file = pop @queue; + if (-d $file) { + my $new_files = $dir_func->($file); + next if !defined $new_files; + push @queue, @$new_files; + } + return $file if $file_func->($file); + } + return; + }; +} + +# make a basic filename iterator, which simply returns all files +# for which $file_func returns a true value +sub make_file_iter_basic { + my ($file_func, @files) = @_; + make_file_iter $file_func, sub { + my $dh; + if (!opendir $dh, $_[0]) { + warn "WARNING: Unable to open directory \"$_[0]\"!"; + return []; + } + my @new_files = map "$_[0]/$_", grep {$_ ne "." && $_ ne ".."} readdir $dh; + closedir $dh; + return \@new_files; + }, @files; +} + +# make an interator that only returns the directories which are present +# in the $DIR_FILE files, in addition to the files and directories that +# were originally passed as arguments +# note: this returns nonexistent directories if those are still +# specified in the lumia files +sub make_lumia_iter { + my ($quiet, @dirs) = @_; + make_file_iter sub {1}, sub { + my $path = "$_[0]/$DIR_FILE"; + return [] if !-f $path; + my $dirs = read_file($path, {}); + return if !defined $dirs; + my @new_dirs; + foreach my $dir (keys %$dirs) { + my $dir_path = "$_[0]/$dir"; + if (!-d $dir_path) { + warn "ERROR: Directory \"$dir_path\" mentioned in " . + "\"$path\" does not exist or is not directory.\n" if !$quiet; + } + # still push it even when it doesn't exist so rmold can work properly + push @new_dirs, $dir_path; + } + return \@new_dirs; + }, @dirs; +} + +# remove all special lumia files from the given directory +sub clean_files { + my ($dir, $args) = @_; + my $iter = make_file_iter_basic sub {exists $SPECIAL_FILES{basename $_[0]};}, $dir; + while (my $file = $iter->()) { + if (!unlink $file) { + warn "WARNING: Unable to remove file \"$file\"!\n"; + } else { + print "Deleted \"$file\"\n" if !$args->{"q"}; + } + } +} + +# read a file, processing each line with $handle_cksum_func if set +# and writing the results into $cksums +# $handle_cksum_func must return two values, the checksum of the +# argument and the rest of the string (that is then parsed for +# the filename); if it returns undef, this function also returns undef +sub read_file { + my ($file, $cksums, $handle_cksum_func) = @_; + my $fh; + if (!open $fh, "<", $file) { + warn "ERROR: Unable to open file \"$file\": $!\n"; + return; + } + my $in_fn = 0; + my $cur_cksum; + my $cur_str; + my $cur_fn = ""; + foreach (<$fh>) { + next if (!$in_fn && /^$/); + if ($handle_cksum_func && !$in_fn) { + ($cur_cksum, $cur_str) = $handle_cksum_func->($_); + return undef if !defined $cur_cksum; + } else { + $cur_str = $_; + } + my $bs = 0; + foreach my $ch (split(//, $cur_str)) { + if ($ch eq "\\") { + $bs++; + $cur_fn .= "\\" if !($bs %= 2) && $in_fn; + } elsif ($bs % 2) { + $cur_fn .= $ch if $in_fn; + $bs = 0; + } elsif ($ch eq "\"") { + if ($in_fn) { + $in_fn = 0; + $cksums->{$cur_fn} = $cur_cksum; + $cur_fn = ""; + last; + } + $in_fn = 1; + } elsif ($in_fn) { + $cur_fn .= $ch; + } + } + } + close $fh; + if ($in_fn) { + warn "ERROR: Unterminated filename in file \"$file\"\n"; + return undef; + } + return $cksums; +} + +# read a single checksum file, writing the checksums into the hash $cksums and returning it +sub read_cksum_file { + my ($file, $cksums) = @_; + return read_file $file, $cksums, sub { + my $line = shift; + my @fields = split(/ /, $line, $CKSUM_NUMFIELDS+1); + if (@fields != $CKSUM_NUMFIELDS+1) { + warn "WARNING: Malformed line \"$line\" in file \"$file\"\n"; + return; + } + my $cur_cksum = join(" ", @fields[0..$CKSUM_NUMFIELDS-1]); + my $cur_str = $fields[$CKSUM_NUMFIELDS]; + return ($cur_cksum, $cur_str); + }; +} + +# read the checksums and directory names in $dir +sub read_cksums { + my $dir = shift; + my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}); + return undef if !defined $cksums; + $cksums = read_file("$dir/$DIR_FILE", $cksums); + return undef if !defined $cksums; + return $cksums; +} + +# get the checksum output for $path +# returns undef if $CKSUM_CMD returns an error +sub get_cksum { + my $path = shift; + my $path_esc = shell_quote $path; + my $cksum_output = `$CKSUM_CMD -- $path_esc 2>&1`; + if ($?) { + warn "ERROR getting cksum for file \"$path\":\n$cksum_output"; + return undef; + } + chomp $cksum_output; + my @fields = split(/ /, $cksum_output, $CKSUM_NUMFIELDS+1); + return join(" ", @fields[0..$CKSUM_NUMFIELDS-1]); +} + +# check the checksums in $dir/$cksum_file +# if $quiet is set, only print failed files +sub check_cksums { + my ($dir, $cksum_file, $quiet) = @_; + my $cksums = read_cksum_file("$dir/$cksum_file", {}); + return 0 if !defined $cksums; + my $failed = 1; + foreach my $file (keys %$cksums) { + my $path = "$dir/$file"; + my $output = get_cksum $path; + next if !defined $output; + if ($output eq $cksums->{$file}) { + print "OK $path\n" if !$quiet; + } else { + print "FAILED $path\n"; + $failed = 0; + } + } + return $failed; +} + +# check the checksums of all files and directories in @dirs +sub check_files { + my $args = shift; + my @dirs; + foreach my $file (@_) { + if (-d $file) { + push @dirs, $file; + next; + } + my $dir = dirname $file; + my $base = basename $file; + if (exists $SPECIAL_FILES{$base}) { + warn "ERROR: File is reserved for lumia: $file\n"; + next; + } + my $cksums = read_cksum_file("$dir/$CKSUM_FILE"); + next if !defined $cksums; + if (!exists $cksums->{$base}) { + warn "ERROR: File doesn't exist in checksums: $file\n"; + next; + } + my $output = get_cksum "$file"; + next if !defined $output; + if ($output eq $cksums->{$base}) { + print "OK $file\n" if !$args->{"q"}; + } else { + print "FAILED $file\n"; + } + } + my $iter = make_lumia_iter 0, @dirs; + while (my $file = $iter->()) { + check_cksums $file, $DOUBLE_CKSUM_FILE, $args->{"q"}; + check_cksums $file, $CKSUM_FILE, $args->{"q"}; + } +} + +# write the checksums of the special lumia files given as arguments +# to $DOUBLE_CKSUM_FILE in $dir +sub write_special_cksums { + my ($dir, @files) = @_; + my $cksum_file = "$dir/$DOUBLE_CKSUM_FILE"; + my $cksums = {}; + if (-f $cksum_file) { + $cksums = read_cksum_file $cksum_file, {}; + } + return if !defined $cksums; + foreach my $file (@files) { + my $cksum_output = get_cksum("$dir/$file"); + next if (!defined $cksum_output); + $cksums->{$file} = $cksum_output; + } + write_file($cksum_file, $cksums, 1); +} + +# search for new files that aren't present in the checksum files +# - if $file_func is set, it is called for each new file +# - if $before_dir_func is set, it is called before processing the +# files in each directory that has new files OR if a directory +# is entirely new (well, it only checks if $DOUBLE_CKSUM_FILE exists) +# - if $after_dir_func is set, it is called after processing the +# files in each directory that has new files +sub check_new_files { + my ($top_dir, $file_func, $before_dir_func, $after_dir_func) = @_; + my $iter = make_file_iter sub {1}, sub { + my $dir = shift; + my $dh; + if (!opendir $dh, $dir) { + warn "ERROR: Unable to open directory \"$dir\"!"; + return undef; + } + my $read_file_noerror = sub { + if (-f $_[0]) { + return $_[1]->($_[0], {}) // {}; + } + return {}; + }; + my $ignore = $read_file_noerror->("$dir/$IGNORE_FILE", \&read_file); + my $lumia_dirs = $read_file_noerror->("$dir/$DIR_FILE", \&read_file); + my $lumia_files = $read_file_noerror->("$dir/$CKSUM_FILE", \&read_cksum_file); + my @dirs; + my $found = 0; + while (my $file = readdir $dh) { + next if $file eq "." || $file eq ".."; + next if exists $ignore->{$file} || exists $SPECIAL_FILES{$file}; + if (!exists $lumia_dirs->{$file} && !exists $lumia_files->{$file}) { + if (!$found && defined $before_dir_func) { + last if !$before_dir_func->($dir); + } + if (defined $file_func) { + $file_func->($dir, $file); + } else { + print "$dir/$file\n"; + } + $found = 1; + } + push @dirs, "$dir/$file" if -d "$dir/$file"; + } + closedir $dh; + # also call $before_dir_func if the directory has not been initialized yet + if (!$found && !-f "$dir/$DOUBLE_CKSUM_FILE" && defined $before_dir_func) { + $before_dir_func->($dir); + } + if ($found && defined $after_dir_func) { + $after_dir_func->($dir); + } + return \@dirs; + }, $top_dir; + # Is this a horrible hack? I dunno, but it sure is sweet... + while ($iter->()) {} +} + +# add all new files in $top_dir to the checksum files +sub check_add_new_files { + my ($top_dir, $args) = @_; + my $changed_dirs = 0; + my $changed_files = 0; + check_new_files $top_dir, sub { + my ($dir, $file) = @_; + my $fullpath = "$dir/$file"; + if (-d $fullpath) { + my $dir_file = "$dir/$DIR_FILE"; + my $fh; + if (!open $fh, ">>", $dir_file) { + warn "ERROR: Unable to append to file \"$dir_file\"!"; + return; + } + print $fh '"' . escape_filename($file) . '"' . "\n"; + close $fh; + $changed_dirs = 1; + } else { + my $cksum_output = get_cksum $fullpath; + return if !defined $cksum_output; + my $cksum_file = "$dir/$CKSUM_FILE"; + my $fh; + if (!open $fh, ">>", $cksum_file) { + warn "ERROR: Unable to append to file \"$cksum_file\"!"; + return; + } + print $fh $cksum_output . ' "' . escape_filename($file) . '"' . "\n"; + close $fh; + $changed_files = 1; + } + print "Added \"$fullpath\"\n" if !$args->{"q"}; + }, sub { + if (-f "$_[0]/$DOUBLE_CKSUM_FILE") { + if (!check_cksums $_[0], $DOUBLE_CKSUM_FILE, 1) { + warn "Checksum files corrupt in \"$_[0]\", not adding new checksums!\n"; + return 0; + } + + } else { + write_cksums($_[0], {}, 1, 1); + } + return 1; + }, sub { + if ($changed_dirs) { + write_special_cksums $_[0], $DIR_FILE; + $changed_dirs = 0; + } + if ($changed_files) { + write_special_cksums $_[0], $CKSUM_FILE; + $changed_files = 0; + } + }; +} + +# write the "checksums" in $contents to $path +# if $is_cksum_file is set, the value each of the keys in $contents points +# to is written before the key +sub write_file { + my ($path, $contents, $is_cksum_file) = @_; + my $fh; + if (!open $fh, ">", $path) { + warn "ERROR: Unable to open \"$path\" for writing!"; + return; + } + foreach my $filename (keys %$contents) { + if ($is_cksum_file) { + print $fh "$contents->{$filename} "; + } + print $fh '"' . escape_filename($filename) . '"' . "\n"; + } + close $fh; +} + +# write the checksums in $contents to the file at $path +sub write_cksum_file { + my ($path, $contents) = @_; + write_file $path, $contents, 1; +} + +# write the checksums in $contents to $dir +# any keys that point to undef are taken to be directories and vice versa +# $files_modified and $dirs_modified control which of the special lumia +# files actually get written +# note: this doesn't use write_file, etc. in order to (possibly) be a bit more efficient +sub write_cksums { + my ($dir, $contents, $files_modified, $dirs_modified) = @_; + # No, this isn't efficient... + my @special_files; + my $dirs_fh; + my $files_fh; + if ($files_modified) { + my $path = "$dir/$CKSUM_FILE"; + if (!open $files_fh, ">", $path) { + warn "ERROR: Unable to open \"$path\" for writing!"; + return; + } + push @special_files, $CKSUM_FILE; + } + if ($dirs_modified) { + my $path = "$dir/$DIR_FILE"; + if (!open $dirs_fh, ">", $path) { + warn "ERROR: Unable to open \"$path\" for writing!"; + return; + } + push @special_files, $DIR_FILE; + } + foreach my $key (keys %$contents) { + if ($files_modified && defined $contents->{$key}) { + print $files_fh $contents->{$key} . ' "' . escape_filename($key) . '"' . "\n"; + } elsif ($dirs_modified && !defined $contents->{$key}) { + print $dirs_fh '"' . escape_filename($key) . '"' . "\n"; + } + } + close $files_fh if defined $files_fh; + close $dirs_fh if defined $dirs_fh; + if (@special_files) { + write_special_cksums $dir, @special_files; + } +} + +# show all files that are present in the checksum files but don't exist on the filesystem anymore +sub check_old_files { + my $top_dir = shift; + my $iter = make_lumia_iter 1, $top_dir; + while (my $dir = $iter->()) { + if (-e $dir) { + my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}) // {}; + foreach my $file (keys %$cksums) { + if (!-e "$dir/$file") { + warn "Nonexistent file: \"$dir/$file\"!\n"; + } + } + } else { + warn "Nonexistent directory: \"$dir\"!\n"; + } + } +} + +# clean up the lumia checksum files, removing any files that aren't present +# on the filesystem anymore +sub remove_old_files { + my ($top_dir, $args) = @_; + my $iter = make_lumia_iter 1, $top_dir; + while (my $dir = $iter->()) { + if (!-e $dir) { + my $parent = dirname $dir; + my $child = basename $dir; + my $lumia_dirs = read_file("$parent/$DIR_FILE", {}) // {}; + if (exists $lumia_dirs->{$child}) { + delete $lumia_dirs->{$child}; + write_file "$parent/$DIR_FILE", $lumia_dirs; + print "Removed \"$dir\" from \"$parent/$DIR_FILE\"\n" if !$args->{"q"}; + write_special_cksums $parent, $DIR_FILE; + } + } else { + my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}) // {}; + my $found = 0; + foreach my $file (keys %$cksums) { + if (!-e "$dir/$file") { + delete $cksums->{$file}; + print "Removed \"$dir/$file\" from \"$dir/$CKSUM_FILE\"\n" if !$args->{"q"}; + $found = 1; + } + } + if ($found) { + write_cksum_file "$dir/$CKSUM_FILE", $cksums; + write_special_cksums $dir, $CKSUM_FILE; + } + } + } +} + +# sort the given paths into hash based on the dirname +# returns: a hash with the keys being the dirnames of the given paths and +# each one pointing to an array containing the basenames of all paths +# that had this dirname +sub sort_by_dir { + my %sorted_files; + foreach my $file (@_) { + if (!-e $file) { + warn "ERROR: Source file \"$file\" doesn't exist.\n"; + next; + } + my $dir = dirname($file); + if (!exists($sorted_files{$dir})) { + $sorted_files{$dir} = []; + } + push(@{$sorted_files{$dir}}, basename($file)); + } + return \%sorted_files; +} + +# check if $dst exists and prompt the user whether it should be overwritten +# returns 0 if it can be overwritten or doesn't exist, 1 if it shouldn't be overwritten +sub prompt_overwrite { + my $dst = shift; + if (-e $dst) { + print STDERR "WARNING: \"$dst\" exists already. Do you want to replace it? (y/n) "; + my $choice = ""; + while ($choice ne "y" && $choice ne "n") { + $choice = <STDIN>; + chomp $choice; + } + if ($choice eq "n") { + warn "Not overwriting \"$dst\"\n"; + return 1; + } else { + return 0; + } + } + return 0; +} + +# copies the $src files to $dst and updates the checksums in $dst +# $src: list of source paths +# $dst: destination directory or file (in latter case only one src is allowed) +sub copy_files { + my ($src, $dst, $args) = @_; + my $dst_dir = $dst; + if (!-d $dst) { + $dst_dir = dirname $dst; + } + my $diff_name = 0; + # check if the file/dir is getting a different name or + # just being copied into a different directory + if (!-d $dst && !-d $src->[0]) { + $diff_name = 1; + } + if (!-e $dst && -d $src->[0]) { + $diff_name = 1; + } + my $dst_cksums = read_cksums $dst_dir; + return if !defined $dst_cksums; + my $src_sorted = sort_by_dir(@$src); + my $files_touched = 0; + my $dirs_touched = 0; + foreach my $src_dir (keys %$src_sorted) { + my $src_cksums = read_cksums $src_dir; + next if !defined $src_cksums; + foreach my $src_file (@{$src_sorted->{$src_dir}}) { + my $src_path = "$src_dir/$src_file"; + + my $dst_path = $diff_name ? $dst : "$dst_dir/$src_file"; + if (-d $dst_path && -d $src_path) { + warn "ERROR: Cannot copy directory to already existing directory\n"; + next; + } + if (exists $SPECIAL_FILES{$src_file} || exists $SPECIAL_FILES{basename $dst_path}) { + warn "ERROR: Not copying special file\n"; + next; + } + next if !$args->{"f"} && prompt_overwrite($dst_path); + my $options = $args->{"v"} ? "-av" : "-a"; + next if system("cp", $options, "--", $src_path, $dst); + + if (-d $src_path) { + $dirs_touched = 1; + } else { + $files_touched = 1; + } + + if (exists $src_cksums->{$src_file}) { + if ($diff_name) { + $dst_cksums->{basename $dst} = $src_cksums->{$src_file}; + } else { + $dst_cksums->{$src_file} = $src_cksums->{$src_file}; + } + } else { + warn "WARNING: \"$src_path\" not in cksum or directory list\n"; + } + } + } + write_cksums $dst_dir, $dst_cksums, $files_touched, $dirs_touched; +} + +# move a file (or directory) from $src to $dst, prompting for confirmation if $dst already exists; +# automatically appends the basename of $src to $dst if $dst is a directory +sub move_file { + my ($src, $dst, $args) = @_; + if (exists $SPECIAL_FILES{basename $src} || exists $SPECIAL_FILES{basename $dst}) { + warn "ERROR: Not moving special file\n"; + return 1; + } + if (-d $dst) { + $dst .= "/" . basename($src); + } + return 1 if !$args->{"f"} && prompt_overwrite($dst); + my $ret; + if ($args->{"v"}) { + $ret = system("mv", "-v", "--", $src, $dst); + } else { + $ret = system("mv", "--", $src, $dst); + } + return 1 if $ret; + if (-e $src) { + warn "ERROR: file could not be removed from source but will still be " . + "removed from checksum database\n"; + } + return 0; +} + +# move all files/directories in $src_files from $src_dir to $dst_dir ($src_files +# only contains the basenames of the files), removing them from the checksum files +# in $src_dir and adding them to $dst_cksums +sub move_from_same_dir { + my ($src_dir, $src_files, $dst_cksums, $dst_dir, $args) = @_; + my $src_cksums = read_cksums $src_dir; + return if !defined $src_cksums; + my $files_touched = 0; + my $dirs_touched = 0; + foreach my $src_file (@$src_files) { + my $fullpath = "$src_dir/$src_file"; + my $tmp_dirs_touched = 0; + my $tmp_files_touched = 0; + if (-d $fullpath) { + $tmp_dirs_touched = 1; + } else { + $tmp_files_touched = 1; + } + + next if move_file($fullpath, $dst_dir, $args); + + # need to be able to check if the path is a directory + # before actually moving it + $dirs_touched ||= $tmp_dirs_touched; + $files_touched ||= $tmp_files_touched; + if (exists $src_cksums->{$src_file}) { + $dst_cksums->{$src_file} = $src_cksums->{$src_file}; + delete $src_cksums->{$src_file}; + } else { + warn "WARNING: \"$src_dir/$src_file\" not in cksum or directory list.\n"; + } + } + write_cksums $src_dir, $src_cksums, $files_touched, $dirs_touched; + return ($files_touched, $dirs_touched); +} + +# rename a single file or directory from $src to $dst +sub move_rename { + my ($src, $dst, $args) = @_; + my $src_dir = dirname $src; + my $dst_dir = dirname $dst; + my $src_file = basename $src; + my $dst_file = basename $dst; + + my $src_cksums = read_cksums $src_dir; + return if !defined $src_cksums; + my $dst_cksums = {}; + # if a file is simply being renamed in the same dir, the cksums + # should only be loaded and written once + if ($src_dir eq $dst_dir) { + %$dst_cksums = %$src_cksums; + delete $dst_cksums->{$src_file}; + } else { + $dst_cksums = read_cksums $dst_dir; + return if !defined $dst_cksums; + } + + my $files_touched = 0; + my $dirs_touched = 0; + if (-d $src) { + $dirs_touched = 1; + } else { + $files_touched = 1; + } + + return if move_file($src, $dst, $args); + + if (exists($src_cksums->{$src_file})) { + $dst_cksums->{$dst_file} = $src_cksums->{$src_file}; + delete $src_cksums->{$src_file}; + } else { + warn "WARNING: \"$src\" not in cksum or directory list.\n"; + } + write_cksums $dst_dir, $dst_cksums, $files_touched, $dirs_touched; + if ($src_dir ne $dst_dir) { + write_cksums $src_dir, $src_cksums, $files_touched, $dirs_touched; + } +} + +# move all files and directories in $src to $dst +# - if $dst does not exist, $src is only allowed to contain one path, which is +# renamed to $dst +# - if $dst is a file, $src is only allowed to contain a single path (which +# must be a file), which is renamed to $dst +# - otherwise, all files and directories in $src are moved to $dst +# $src: list of source paths +# $dst: destination directory or file (in latter case only one src is allowed) +sub move_files { + my ($src, $dst, $args) = @_; + if (!-d $dst && $#$src != 0) { + die "move: only one source argument allowed when destination is a file\n"; + } + if (!-d $dst && !-d $src->[0]) { + move_rename $src->[0], $dst, $args; + return; + } + if (!-e $dst && -d $src->[0]) { + move_rename $src->[0], $dst, $args; + return; + } + if (-e $dst && !-d $dst && -d $src->[0]) { + die "move: can't move directory to file\n"; + } + # Separate files by current dir so the cksum and dir files only need to be opened once + my $src_files = sort_by_dir(@$src); + my $dst_cksums = read_cksums $dst; + return if !defined $dst_cksums; + my $files_touched = 0; + my $dirs_touched = 0; + foreach my $src_dir (keys %$src_files) { + my ($tmp_files_touched, $tmp_dirs_touched) = move_from_same_dir $src_dir, $src_files->{$src_dir}, $dst_cksums, $dst, $args; + $files_touched ||= $tmp_files_touched; + $dirs_touched ||= $tmp_dirs_touched; + } + write_cksums $dst, $dst_cksums, $files_touched, $dirs_touched; +} + +# remove a file or directory from the filesystem +sub remove_file_dir { + my ($path, $args) = @_; + my $options = $args->{"f"} ? "-rf" : "-r"; + if (system("rm", $options, "--", $path)) { + return 1; + } + if (-e $path) { + warn "ERROR: Unable to remove \"$path\" from filesystem but " . + "will still be removed from checksum database\n"; + } + return 0; +} + +# remove all files in one directory, updating the checksum files in the process +# note: the files are only allowed to be basenames, i.e., they must be the +# actual filenames present in the checksum files +sub remove_from_same_dir { + my ($args, $dir, @files) = @_; + my $cksums = read_cksums $dir; + return if !defined $cksums; + my $dirs_touched = 0; + my $files_touched = 0; + foreach my $file (@files) { + if (exists $SPECIAL_FILES{$file}) { + warn "ERROR: not removing special file $file\n"; + next; + } + my $fullpath = "$dir/$file"; + if (!-e $fullpath) { + warn "\"$fullpath\": No such file or directory.\n"; + } + next if remove_file_dir($fullpath, $args); + if (exists $cksums->{$file}) { + if (defined $cksums->{$file}) { + $files_touched = 1; + } else { + $dirs_touched = 1; + } + delete $cksums->{$file}; + } else { + warn "WARNING: \"$file\" not in cksum or directory list.\n"; + } + } + write_cksums $dir, $cksums, $files_touched, $dirs_touched; +} + +# remove all given files and directories, updating the appropriate checksum +# files in the process +sub remove_files { + my $args = shift; + my $sorted_files = sort_by_dir(@_); + foreach my $dir (keys %$sorted_files) { + remove_from_same_dir($args, $dir, @{$sorted_files->{$dir}}); + } +} + +# create the given directories, initializing them with empty checksum files +# note: does not work like "mkdir -p", i.e., the new directories have to +# be located inside already existing directories +sub make_dirs { + my @created_dirs; + foreach (@_) { + if (system("mkdir", "--", $_)) { + warn "ERROR creating directory $_\n"; + next; + } + push(@created_dirs, $_); + } + # Separate files by current dir so the cksum and dir files only need to be opened once + my %dirs; + foreach my $dir (@created_dirs) { + write_cksums $dir, {}, 1, 1; + my $parent = dirname($dir); + if (!exists($dirs{$parent})) { + $dirs{$parent} = []; + } + push(@{$dirs{$parent}}, basename($dir)); + } + foreach my $parent (keys %dirs) { + my $parent_dirs = read_file "$parent/$DIR_FILE", {}; + next if !defined $parent_dirs; + foreach my $dir (@{$dirs{$parent}}) { + $parent_dirs->{$dir} = ""; + } + write_file "$parent/$DIR_FILE", $parent_dirs; + write_special_cksums $parent, $DIR_FILE; + } +} + +# extract all special lumia files from $src_dir to $dst_dir, recreating the +# entire directory structure in the process +sub extract { + my ($src_dir, $dst_dir, $args) = @_; + my $iter = make_lumia_iter 0, $src_dir; + my $options = $args->{"v"} ? "-av" : "-a"; + while (my $dir = $iter->()) { + my $final_dir = abs2rel $dir, $src_dir; + my $fulldir = catfile $dst_dir, $final_dir; + system("mkdir", "-p", "--", $fulldir); + foreach my $file (keys %SPECIAL_FILES) { + my $filepath = catfile $dir, $file; + if (-e $filepath) { + system("cp", $options, "--", $filepath, catfile($fulldir, $file)); + } + } + } +} + +# update the checksums of the given files +# ignores any directories given as arguments +sub update { + my @files; + foreach (@_) { + if (-d $_) { + warn "Ignoring directory \"$_\"\n"; + } else { + push @files, $_; + } + } + my $sorted_files = sort_by_dir @files; + foreach my $dir (keys %$sorted_files) { + my $cksums = read_cksum_file "$dir/$CKSUM_FILE", {}; + next if !defined $cksums; + my $changed = 0; + foreach my $file (@{$sorted_files->{$dir}}) { + my $cksum_output = get_cksum "$dir/$file"; + next if !defined $cksum_output; + $cksums->{$file} = $cksum_output; + $changed = 1; + } + if ($changed) { + write_cksum_file "$dir/$CKSUM_FILE", $cksums; + write_special_cksums $dir, $CKSUM_FILE; + } + } +} + +sub update_special { + my $dir = shift; + write_special_cksums $dir, $CKSUM_FILE, $DIR_FILE; +} + +my %args; +Getopt::Long::Configure("bundling"); +GetOptions(\%args, "f|force", "q|quiet", "v|verbose", "h|help"); + +pod2usage(-exitval => 0, -verbose => 2) if $args{"h"}; +pod2usage(-exitval => 1, -verbose => 1) if @ARGV < 1; + +my $cmd = shift; + +if ($cmd eq "mv") { + die "mv requires at least two arguments\n" if @ARGV < 2; + my @src = @ARGV[0..$#ARGV-1]; + move_files \@src, $ARGV[-1], \%args; +} elsif ($cmd eq "rm") { + die "rm requires at least one argument\n" if @ARGV < 1; + remove_files \%args, @ARGV; +} elsif ($cmd eq "addnew") { + my $dir = @ARGV ? $ARGV[0] : "."; + check_add_new_files $dir, \%args; +} elsif ($cmd eq "checknew") { + my $dir = @ARGV ? $ARGV[0] : "."; + check_new_files $dir; +} elsif ($cmd eq "checkold") { + my $dir = @ARGV ? $ARGV[0] : "."; + check_old_files $dir; +} elsif ($cmd eq "rmold") { + my $dir = @ARGV ? $ARGV[0] : "."; + remove_old_files $dir, \%args; +} elsif ($cmd eq "check") { + if (@ARGV < 1) { + check_files \%args, "."; + } else { + check_files \%args, @ARGV; + } +} elsif ($cmd eq "clean") { + my $dir = @ARGV ? $ARGV[0] : "."; + clean_files $dir, \%args; +} elsif ($cmd eq "extract") { + my $src_dir = "."; + my $dst_dir; + if (@ARGV == 2) { + $src_dir = $ARGV[0]; + $dst_dir = $ARGV[1]; + } elsif (@ARGV == 1) { + $dst_dir = $ARGV[0]; + } else { + die "Invalid number of arguments\n"; + } + if (!-d $src_dir) { + die "ERROR: Directory \"$src_dir\" does not exist.\n"; + } + if (!-d $dst_dir) { + die "ERROR: Directory \"$dst_dir\" does not exist.\n"; + } + extract $src_dir, $dst_dir; +} elsif ($cmd eq "cp") { + die "cp requires at least two arguments\n" if @ARGV < 2; + my @src = @ARGV[0..$#ARGV-1]; + copy_files \@src, $ARGV[-1], \%args; +} elsif ($cmd eq "mkdir") { + die "mkdir requires at least one argument\n" if @ARGV < 1; + make_dirs @ARGV; +} elsif ($cmd eq "update") { + die "update requires at least one argument\n" if @ARGV < 1; + update @ARGV; +} elsif ($cmd eq "updatespecial") { + die "Invalid number of arguments\n" if @ARGV > 1; + my $dir = @ARGV ? $ARGV[0] : "."; + update_special $dir; +} else { + pod2usage(-exitval => 1, -verbose => 1); +} + +__END__ + +=head1 NAME + +lumia - Manage checksums on a filesystem + +=head1 SYNOPSIS + +B<lumia> command [-hqfv] arguments + +=head1 DESCRIPTION + +lumia is meant for managing checksums of files in order to prevent bitrot. +It does this by storing several special files in each directory to keep track +of the checksums: + +=over 8 + +=item B<.lumidify_archive_cksums> + +Contains the checksums of all files in the directory. + +=item B<.lumidify_archive_dirs> + +Contains a list of all directories in the directory. + +=item B<.lumidify_archive_cksums.cksum> + +Contains the checksums of B<.lumidify_archive_cksums> and B<.lumidify_archive_dirs> +just because I'm paranoid. + +=item B<.lumidify_archive_ignore> + +Contains a list of files and directories that should be ignored by lumia. +Note that this is only read and never written to, unless the command B<clean> +is used. It is, however, still copied over by the B<extract> command. + +=back + +When the documentation for the commands talks about the "checksum database", +it simply means these files. + +All file/directory names are enclosed in quotes, with any backslashes or quotes +inside the name escaped with another backslash. The names are allowed to have +newlines in them. + +The list files only contain a list of filenames, with a newline between the +closing quote of one name and the opening quote of the next one. + +The checksum files additionally contain the output of the checksum program +used and a space before the starting quote of the filename. + +=head1 OPTIONS + +=over 8 + +=item B<-h>, B<--help> + +Show the full documentation. + +=item B<-q>, B<--quiet> + +Only output errors. + +=item B<-f>, B<--force> + +Overwrite files without prompting for confirmation. + +=item B<-v>, B<--verbose> + +Print each file that is processed by the command. + +=back + +See the full documentation for details on which commands support which options +and what they do. + +It does not matter if the options are written before or after the command. + +If C<--> is written anywhere on the command line, option parsing is stopped, +so that files starting with a hyphen can still be specified. + +Note that C<-q> and C<-v> aren't exactly opposites - C<-q> applies to commands +like B<check>, where it suppresses printing of the individual files, while +C<-v> applies to commands like B<cp>, where it is just passed on to the system +command called in the background. + +Note further that this is very inconsistent, like the rest of the program, but +the author has made too many bad decisions to rectify that problem at the moment. + +=head1 COMMANDS + +Note that some commands support multiple files/directories as arguments and others, +for which it would make just as much sense, don't. That's just the way it is. + +=over 8 + +=item B<addnew> [-q] [directory] + +Walks through B<directory>, adding all new files to the checksum database. +B<directory> defaults to the current directory. + +C<-q> suppresses the printing of each file or directory as it is added. + +=item B<checknew> [directory] + +Walks through B<directory>, printing all files that aren't part of the checksum +database. B<directory> defaults to the current directory. + +=item B<checkold> [directory] + +Prints all files in the checksum database that do not exist on the filesystem anymore. +B<directory> defaults to the current directory. + +=item B<rmold> [-q] [directory] + +Removes all files found by B<checkold> from the database. B<directory> defaults to +the current directory. + +C<-q> suppresses the printing of each file as it is removed. + +=item B<check> [-q] file/directory ... + +Verifies the checksums of all files given, recursing through any directories. If no +files or directories are given, the current directory is used. + +Note that the checksum database in the corresponding directory will be read again for +every file given on the command line, even if 1000 files in the same directory are given. +This problem does not occur when recursing through directories, so it is best to only +give files directly when checking a few. This problem wouldn't be too difficult to +fix, but, frankly, I'm too lazy, especially since I only added the feature to check +files individually as a convenience when I want to quickly check a single file in a +large directory. + +To explain why it is this way: The directory recursion is done using an iterator, which +has the directories pushed onto its queue in the beginning. The iterator only returns +directories, which are then checked all in one go, but this means that files given on +the command line need to be handled specially. + +C<-q> suppresses the printing of all good checksums but still allows a message to +be printed when a checksum failed. + +=item B<clean> [-q] [directory] + +Removes all lumia special files used to store the checksum database from B<directory> +recursively. B<directory> defaults to the current directory. + +Note that this recurses through the entire directory tree, not just the part that is +actually linked together by the checksum database. + +Warning: This just blindly removes all files with one of the special lumia names, +even if they weren't actually created by lumia. + +C<-q> suppresses the printing of each file as it is deleted. + +=item B<extract> [-v] [source] destination + +Recreates the entire directory structure from B<source> in B<destination>, but only +copies the special files used to store the checksum database. B<source> defaults to +the current directory. + +C<-v> prints each file as it is copied. + +Note that this overwrites files in the destination directory without confirmation. + +=item B<mkdir> directory ... + +Creates the given directories, initializing them with empty checksum database files. + +=item B<update> file ... + +Recalculates the checksums for the given files and replaces them in the database. + +Note: Directories given as arguments are ignored. + +This is mainly meant to quickly "touch" a file after it was modified (e.g. a +notes file that is occasionally updated). + +=item B<updatespecial> [directory] + +Recalculates the checksums for the special files C<.lumidify_archive_dirs> and +C<.lumidify_archive_cksums> and writes them to C<.lumidify_archive_cksums.cksum>. +B<directory> defaults to the current directory. + +This is only meant to be used if, for some reason, the checksum files had to +be edited manually and thus don't match the checksums in C<.lumidify_archive_cksums.cksum> +anymore. + +=item B<rm> [-f] file ... + +Removes the given files and directories recursively from the filesystem and +checksum database. The following caveats apply: + +If any actual errors occur while deleting the file/directory (i.e. the system +command C<rm> returns a non-zero exit value), the checksum or directory B<is +left in the database>. If the system C<rm> does not return a non-zero exit value, +but the file/directory still exists afterwards (e.g. there was a permission +error and the user answered "n" when prompted), a warning message is printed, +but the files B<are removed from the database> (if the database can be +written to). + +It is an error if there are no checksum database files in the directory +of a file named on the command line. + +C<-f> is passed through to the system C<rm> command. + +=item B<cp> [-vf] source target + +=item B<cp> [-vf] source ... directory + +Copies the given source files, updating the checksum database in the process. + +If the last argument is a file, there must be only one source argument, also a file, +which is then copied to the target. + +If the last argument is a directory, all source arguments are copied into it. + +It is an error if a source or destination directory does not contain any +checksum database files. + +B<cp> will issue a warning and skip to the next argument if it is asked to +merge a directory with an already existing directory. For instance, attempting +to run C<cp dir1 dir2>, where C<dir2> already contains a directory named +C<dir1>, will result in an error. This may change in the future, when the +program is modified to recursively copy the files manually, instead of simply +calling the system C<cp> on each of the arguments. If this was supported in +the current version, none of the checksums inside that directory would be +updated, so it wouldn't be very useful. + +C<-v> is passed through to the system C<cp> command. + +C<-f> silently overwrites files without prompting the user, much like the +C<-f> option in the system C<cp> command. This is handled manually by the +program, though, in order to actually determine what the user chose. See +also the caveat mentioned above. + +=item B<mv> [-f] source target + +=item B<mv> [-f] source ... directory + +Moves the given source files, updating the checksum database in the process. + +If the last argument is a file or does not exist, there must be only one source +argument, which is renamed to the target name. + +If the last argument is an existing directory, all source arguments are moved +into it. + +It is an error if a source or destination directory does not contain any +checksum database files. + +B<mv> behaves the same as B<rm> with regards to checking if the source file +is still present after the operation and other error handling. + +C<-f> is handled in the same manner as with B<cp>. + +=back + +=head1 MOTIVATION + +There are already several programs that can be used to check for bitrot, +as listed in L</"SEE ALSO">. However, all programs I tried either were +much too complicated for my taste or just did everything behind my back. +I wanted a simple tool that did exactly what I told it to and also allowed +me to keep the old checksums when reorganizing files, in order to avoid +regenerating the checksums from corrupt files. Since I couldn't find those +features in any program I tried, I wrote my own. + +=head1 DESIGN DECISIONS + +It may strike some readers as a peculiar idea to save the checksum files in +I<every single directory>, but this choice was made after much deliberation. +The other option I could think of was to have one big database, but that +would have made all commands much more difficult to implement and additionally +necessitated opening the entire database for every operation. With individual +files in each directory, operations like B<cp> become quite trivial (ignoring +all the edge cases) since only the toplevel checksums need to be copied to +the new destination, and any subdirectories already contain the checksums. + +This method is not without its drawbacks, however. The most glaring problem +I have found is that there is no way to store the checksums of read-only +directories or any special directories that cannot be littered with the +checksum files because that would clash with other software. Despite these +drawbacks, however, I decided to stick with it because it works for almost +all cases and doesn't have any of the serious drawbacks that other options +would have had. + +The names of the special files were chosen to be ".lumidify_archive*" not +out of vanity, but mainly because I couldn't think of any regular files +with those names, making them a good choice to avoid clashes. + +The name of the program, C<lumia> (for "lumidify archive"), was similarly +chosen because it did not clash with any programs installed on my system and +thus allowed for easy tab-completion. + +=head1 HASH ALGORITHMS + +By default, the simple cksum algorithm is used to get the checksums. This +is not very secure, but the main purpose of the program is to prevent +bitrot, for which cksum should be sufficient, especially since it is much +faster than other algorithms. + +There is currently no convenient way to change the algorithm other than +changing the $CKSUM_CMD and $CKSUM_NUMFIELDS variables at the top of +lumia. $CKSUM_CMD must be the command that returns the checksum +when it is given a file, and $CKSUM_NUMFIELDS specifies the number of +space-separated fields the checksum consists of. This has to be specified +in order to determine where the checksum ends and the filename begins in +the output. This would be redundant if all implementations of cksum +supported '-q' for outputting only the checksum, but that only seems to +be supported by some implementations. + +=head1 USAGE SCENARIOS + +=over 8 + +=item B<Security auditing> + +This program is B<NOT> designed to provide any security auditing, as should +be clear from the fact that the checksums are stored right in the same +directory as the files. See mtree(8) for that. + +If you want to, however, you could set $CKSUM_CMD to a secure hash (not cksum) +and B<extract> the checksums to a separate directory, which you keep in a +safe place. You could then use the regular C<cp> command to simply replace +all the checksums with the ones from your backup, in case an attacker modified +the checksum database in the directory with the actual files you're trying to +protect. I don't know if there would be any point in doing that, though. + +=item B<Managing archives> + +This is the purpose I wrote the program for. + +You can simply initialize your archive directory with the B<addnew> command. +Whenever you add new files, just run B<addnew> again. If you want to reorganize +the archive, you can use the limited commands available. + +I usually just use rsync(1) to copy the entire archive directory over to other +backup drives and then use the B<check> command again on the new drive. + +I also have checksums for the main data directory on my computer (except for +things like git repositories, which I don't want littered with the database +files). Here, I use the B<update> command for files that I edit more often +and occasionally run B<check> on the entire directory. + +Since the database files are written in each directory, you can run the +B<addnew> command in any subdirectory when you've added new files there. + +=back + +=head1 PERFORMANCE + +Due to the extensive use of iterators and the author's bad life choices, +some functions, such as B<addnew> and B<check>, run more slowly than they +would if they were programmed more efficiently, especially on many small +files and folders. Too bad. + +=head1 PORTABILITY + +This program was written on OpenBSD. It will probably work on most other +reasonably POSIX-Compliant systems, although I cannot guarantee anything. +$CKSUM_CMD may need to be modified at the top of lumia. The file +operation commands are called directly with system(), so those need to +be available. + +It will most certainly not work on Windows, but that shouldn't be a +problem for anyone important. + +=head1 BUGS + +All system commands (unless I forgot some) are called with "--" before +listing the actual files, so files beginning with hyphens should be +supported. I have tested the commands with filenames starting with spaces +and hyphens and also containing newlines, but there may very well be issues +still. Please notify me if you find any filenames that do not work. Handling +filenames properly is difficult. + +There are probably many other edge cases, especially in the B<mv>, B<cp>, +and B<rm> commands. Please notify me if you find an issue. + +Operations on files containing newlines may cause Perl to print a warning +"Unsuccessful stat on filename containing newline" even though nothing is +wrong since (as described in B<mv> and B<rm>) existence of the file is +checked afterwards. I didn't feel like disabling warnings, and no normal +person should be working with files containing newlines anyways, so that's +the way it is. + +=head1 EXIT STATUS + +Always 0, unless the arguments given were invalid. We don't do errors around here. + +On a more serious note - I should probably change that at some point. +For the time being, if you want to run B<check> in a script, you can test +the output printed when the C<-q> option is used, since this won't output +anything if there are no errors. Do note, though, that actual errors (file not +found, etc.) are printed to STDERR, while incorrect checksums are printed +to STDOUT. + +=head1 SEE ALSO + +par2(1), mtree(8), aide(1), bitrot(no man page) + +=head1 LICENSE + +Copyright (c) 2019, 2020, 2021 lumidify <nobody[at]lumidify.org> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +=cut diff --git a/lumia.pl b/lumia.pl @@ -1,1403 +0,0 @@ -#!/usr/bin/env perl - -# TODO: some way to avoid writing .lumidify* in dirs but still index them? -# TODO: store modified date and checksum files with changed date -# TODO: add option to just check dir structure or maybe check if everything exists -# TODO: add option to compare cksums of two dirs -# TODO: exit status! - -use strict; -use warnings; -use File::Spec::Functions qw(catfile abs2rel); -use File::Basename qw(basename dirname); -use String::ShellQuote; -use Pod::Usage; -use Getopt::Long; - -# the file used to store checksums for files -my $CKSUM_FILE = ".lumidify_archive_cksums"; -# the file used to store directory names -my $DIR_FILE = ".lumidify_archive_dirs"; -# the file read to ignore files or directories -my $IGNORE_FILE = ".lumidify_archive_ignore"; -# the file containing checksums of $CKSUM_FILE and $DIR_FILE -my $DOUBLE_CKSUM_FILE = ".lumidify_archive_cksums.cksum"; - -# uncomment this instead of the lines below to use -# sha256 instead of cksum as the hash algorithm -# Note: this isn't really tested properly -#my $CKSUM_CMD = 'sha256 -q'; -#my $CKSUM_NUMFIELDS = 1; -my $CKSUM_CMD = 'cksum'; -my $CKSUM_NUMFIELDS = 2; - -my %SPECIAL_FILES = ( - $CKSUM_FILE => 1, - $DIR_FILE => 1, - $IGNORE_FILE => 1, - $DOUBLE_CKSUM_FILE => 1 -); - -# escape a filename for writing into the checksum files -sub escape_filename { - my $file = shift; - $file =~ s/\\/\\\\/g; - $file =~ s/"/\\"/g; - return $file; -} - -# make a generic file iterator -# $file_func determines whether a file should be returned by the iterator -# $dir_func is called for each directory and returns all files that -# should be added to the queue -sub make_file_iter { - my ($file_func, $dir_func, @queue) = @_; - return sub { - while (@queue) { - my $file = pop @queue; - if (-d $file) { - my $new_files = $dir_func->($file); - next if !defined $new_files; - push @queue, @$new_files; - } - return $file if $file_func->($file); - } - return; - }; -} - -# make a basic filename iterator, which simply returns all files -# for which $file_func returns a true value -sub make_file_iter_basic { - my ($file_func, @files) = @_; - make_file_iter $file_func, sub { - my $dh; - if (!opendir $dh, $_[0]) { - warn "WARNING: Unable to open directory \"$_[0]\"!"; - return []; - } - my @new_files = map "$_[0]/$_", grep {$_ ne "." && $_ ne ".."} readdir $dh; - closedir $dh; - return \@new_files; - }, @files; -} - -# make an interator that only returns the directories which are present -# in the $DIR_FILE files, in addition to the files and directories that -# were originally passed as arguments -# note: this returns nonexistent directories if those are still -# specified in the lumia files -sub make_lumia_iter { - my ($quiet, @dirs) = @_; - make_file_iter sub {1}, sub { - my $path = "$_[0]/$DIR_FILE"; - return [] if !-f $path; - my $dirs = read_file($path, {}); - return if !defined $dirs; - my @new_dirs; - foreach my $dir (keys %$dirs) { - my $dir_path = "$_[0]/$dir"; - if (!-d $dir_path) { - warn "ERROR: Directory \"$dir_path\" mentioned in " . - "\"$path\" does not exist or is not directory.\n" if !$quiet; - } - # still push it even when it doesn't exist so rmold can work properly - push @new_dirs, $dir_path; - } - return \@new_dirs; - }, @dirs; -} - -# remove all special lumia files from the given directory -sub clean_files { - my ($dir, $args) = @_; - my $iter = make_file_iter_basic sub {exists $SPECIAL_FILES{basename $_[0]};}, $dir; - while (my $file = $iter->()) { - if (!unlink $file) { - warn "WARNING: Unable to remove file \"$file\"!\n"; - } else { - print "Deleted \"$file\"\n" if !$args->{"q"}; - } - } -} - -# read a file, processing each line with $handle_cksum_func if set -# and writing the results into $cksums -# $handle_cksum_func must return two values, the checksum of the -# argument and the rest of the string (that is then parsed for -# the filename); if it returns undef, this function also returns undef -sub read_file { - my ($file, $cksums, $handle_cksum_func) = @_; - my $fh; - if (!open $fh, "<", $file) { - warn "ERROR: Unable to open file \"$file\": $!\n"; - return; - } - my $in_fn = 0; - my $cur_cksum; - my $cur_str; - my $cur_fn = ""; - foreach (<$fh>) { - next if (!$in_fn && /^$/); - if ($handle_cksum_func && !$in_fn) { - ($cur_cksum, $cur_str) = $handle_cksum_func->($_); - return undef if !defined $cur_cksum; - } else { - $cur_str = $_; - } - my $bs = 0; - foreach my $ch (split(//, $cur_str)) { - if ($ch eq "\\") { - $bs++; - $cur_fn .= "\\" if !($bs %= 2) && $in_fn; - } elsif ($bs % 2) { - $cur_fn .= $ch if $in_fn; - $bs = 0; - } elsif ($ch eq "\"") { - if ($in_fn) { - $in_fn = 0; - $cksums->{$cur_fn} = $cur_cksum; - $cur_fn = ""; - last; - } - $in_fn = 1; - } elsif ($in_fn) { - $cur_fn .= $ch; - } - } - } - close $fh; - if ($in_fn) { - warn "ERROR: Unterminated filename in file \"$file\"\n"; - return undef; - } - return $cksums; -} - -# read a single checksum file, writing the checksums into the hash $cksums and returning it -sub read_cksum_file { - my ($file, $cksums) = @_; - return read_file $file, $cksums, sub { - my $line = shift; - my @fields = split(/ /, $line, $CKSUM_NUMFIELDS+1); - if (@fields != $CKSUM_NUMFIELDS+1) { - warn "WARNING: Malformed line \"$line\" in file \"$file\"\n"; - return; - } - my $cur_cksum = join(" ", @fields[0..$CKSUM_NUMFIELDS-1]); - my $cur_str = $fields[$CKSUM_NUMFIELDS]; - return ($cur_cksum, $cur_str); - }; -} - -# read the checksums and directory names in $dir -sub read_cksums { - my $dir = shift; - my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}); - return undef if !defined $cksums; - $cksums = read_file("$dir/$DIR_FILE", $cksums); - return undef if !defined $cksums; - return $cksums; -} - -# get the checksum output for $path -# returns undef if $CKSUM_CMD returns an error -sub get_cksum { - my $path = shift; - my $path_esc = shell_quote $path; - my $cksum_output = `$CKSUM_CMD -- $path_esc 2>&1`; - if ($?) { - warn "ERROR getting cksum for file \"$path\":\n$cksum_output"; - return undef; - } - chomp $cksum_output; - my @fields = split(/ /, $cksum_output, $CKSUM_NUMFIELDS+1); - return join(" ", @fields[0..$CKSUM_NUMFIELDS-1]); -} - -# check the checksums in $dir/$cksum_file -# if $quiet is set, only print failed files -sub check_cksums { - my ($dir, $cksum_file, $quiet) = @_; - my $cksums = read_cksum_file("$dir/$cksum_file", {}); - return 0 if !defined $cksums; - my $failed = 1; - foreach my $file (keys %$cksums) { - my $path = "$dir/$file"; - my $output = get_cksum $path; - next if !defined $output; - if ($output eq $cksums->{$file}) { - print "OK $path\n" if !$quiet; - } else { - print "FAILED $path\n"; - $failed = 0; - } - } - return $failed; -} - -# check the checksums of all files and directories in @dirs -sub check_files { - my $args = shift; - my @dirs; - foreach my $file (@_) { - if (-d $file) { - push @dirs, $file; - next; - } - my $dir = dirname $file; - my $base = basename $file; - if (exists $SPECIAL_FILES{$base}) { - warn "ERROR: File is reserved for lumia.pl: $file\n"; - next; - } - my $cksums = read_cksum_file("$dir/$CKSUM_FILE"); - next if !defined $cksums; - if (!exists $cksums->{$base}) { - warn "ERROR: File doesn't exist in checksums: $file\n"; - next; - } - my $output = get_cksum "$file"; - next if !defined $output; - if ($output eq $cksums->{$base}) { - print "OK $file\n" if !$args->{"q"}; - } else { - print "FAILED $file\n"; - } - } - my $iter = make_lumia_iter 0, @dirs; - while (my $file = $iter->()) { - check_cksums $file, $DOUBLE_CKSUM_FILE, $args->{"q"}; - check_cksums $file, $CKSUM_FILE, $args->{"q"}; - } -} - -# write the checksums of the special lumia files given as arguments -# to $DOUBLE_CKSUM_FILE in $dir -sub write_special_cksums { - my ($dir, @files) = @_; - my $cksum_file = "$dir/$DOUBLE_CKSUM_FILE"; - my $cksums = {}; - if (-f $cksum_file) { - $cksums = read_cksum_file $cksum_file, {}; - } - return if !defined $cksums; - foreach my $file (@files) { - my $cksum_output = get_cksum("$dir/$file"); - next if (!defined $cksum_output); - $cksums->{$file} = $cksum_output; - } - write_file($cksum_file, $cksums, 1); -} - -# search for new files that aren't present in the checksum files -# - if $file_func is set, it is called for each new file -# - if $before_dir_func is set, it is called before processing the -# files in each directory that has new files OR if a directory -# is entirely new (well, it only checks if $DOUBLE_CKSUM_FILE exists) -# - if $after_dir_func is set, it is called after processing the -# files in each directory that has new files -sub check_new_files { - my ($top_dir, $file_func, $before_dir_func, $after_dir_func) = @_; - my $iter = make_file_iter sub {1}, sub { - my $dir = shift; - my $dh; - if (!opendir $dh, $dir) { - warn "ERROR: Unable to open directory \"$dir\"!"; - return undef; - } - my $read_file_noerror = sub { - if (-f $_[0]) { - return $_[1]->($_[0], {}) // {}; - } - return {}; - }; - my $ignore = $read_file_noerror->("$dir/$IGNORE_FILE", \&read_file); - my $lumia_dirs = $read_file_noerror->("$dir/$DIR_FILE", \&read_file); - my $lumia_files = $read_file_noerror->("$dir/$CKSUM_FILE", \&read_cksum_file); - my @dirs; - my $found = 0; - while (my $file = readdir $dh) { - next if $file eq "." || $file eq ".."; - next if exists $ignore->{$file} || exists $SPECIAL_FILES{$file}; - if (!exists $lumia_dirs->{$file} && !exists $lumia_files->{$file}) { - if (!$found && defined $before_dir_func) { - last if !$before_dir_func->($dir); - } - if (defined $file_func) { - $file_func->($dir, $file); - } else { - print "$dir/$file\n"; - } - $found = 1; - } - push @dirs, "$dir/$file" if -d "$dir/$file"; - } - closedir $dh; - # also call $before_dir_func if the directory has not been initialized yet - if (!$found && !-f "$dir/$DOUBLE_CKSUM_FILE" && defined $before_dir_func) { - $before_dir_func->($dir); - } - if ($found && defined $after_dir_func) { - $after_dir_func->($dir); - } - return \@dirs; - }, $top_dir; - # Is this a horrible hack? I dunno, but it sure is sweet... - while ($iter->()) {} -} - -# add all new files in $top_dir to the checksum files -sub check_add_new_files { - my ($top_dir, $args) = @_; - my $changed_dirs = 0; - my $changed_files = 0; - check_new_files $top_dir, sub { - my ($dir, $file) = @_; - my $fullpath = "$dir/$file"; - if (-d $fullpath) { - my $dir_file = "$dir/$DIR_FILE"; - my $fh; - if (!open $fh, ">>", $dir_file) { - warn "ERROR: Unable to append to file \"$dir_file\"!"; - return; - } - print $fh '"' . escape_filename($file) . '"' . "\n"; - close $fh; - $changed_dirs = 1; - } else { - my $cksum_output = get_cksum $fullpath; - return if !defined $cksum_output; - my $cksum_file = "$dir/$CKSUM_FILE"; - my $fh; - if (!open $fh, ">>", $cksum_file) { - warn "ERROR: Unable to append to file \"$cksum_file\"!"; - return; - } - print $fh $cksum_output . ' "' . escape_filename($file) . '"' . "\n"; - close $fh; - $changed_files = 1; - } - print "Added \"$fullpath\"\n" if !$args->{"q"}; - }, sub { - if (-f "$_[0]/$DOUBLE_CKSUM_FILE") { - if (!check_cksums $_[0], $DOUBLE_CKSUM_FILE, 1) { - warn "Checksum files corrupt in \"$_[0]\", not adding new checksums!\n"; - return 0; - } - - } else { - write_cksums($_[0], {}, 1, 1); - } - return 1; - }, sub { - if ($changed_dirs) { - write_special_cksums $_[0], $DIR_FILE; - $changed_dirs = 0; - } - if ($changed_files) { - write_special_cksums $_[0], $CKSUM_FILE; - $changed_files = 0; - } - }; -} - -# write the "checksums" in $contents to $path -# if $is_cksum_file is set, the value each of the keys in $contents points -# to is written before the key -sub write_file { - my ($path, $contents, $is_cksum_file) = @_; - my $fh; - if (!open $fh, ">", $path) { - warn "ERROR: Unable to open \"$path\" for writing!"; - return; - } - foreach my $filename (keys %$contents) { - if ($is_cksum_file) { - print $fh "$contents->{$filename} "; - } - print $fh '"' . escape_filename($filename) . '"' . "\n"; - } - close $fh; -} - -# write the checksums in $contents to the file at $path -sub write_cksum_file { - my ($path, $contents) = @_; - write_file $path, $contents, 1; -} - -# write the checksums in $contents to $dir -# any keys that point to undef are taken to be directories and vice versa -# $files_modified and $dirs_modified control which of the special lumia -# files actually get written -# note: this doesn't use write_file, etc. in order to (possibly) be a bit more efficient -sub write_cksums { - my ($dir, $contents, $files_modified, $dirs_modified) = @_; - # No, this isn't efficient... - my @special_files; - my $dirs_fh; - my $files_fh; - if ($files_modified) { - my $path = "$dir/$CKSUM_FILE"; - if (!open $files_fh, ">", $path) { - warn "ERROR: Unable to open \"$path\" for writing!"; - return; - } - push @special_files, $CKSUM_FILE; - } - if ($dirs_modified) { - my $path = "$dir/$DIR_FILE"; - if (!open $dirs_fh, ">", $path) { - warn "ERROR: Unable to open \"$path\" for writing!"; - return; - } - push @special_files, $DIR_FILE; - } - foreach my $key (keys %$contents) { - if ($files_modified && defined $contents->{$key}) { - print $files_fh $contents->{$key} . ' "' . escape_filename($key) . '"' . "\n"; - } elsif ($dirs_modified && !defined $contents->{$key}) { - print $dirs_fh '"' . escape_filename($key) . '"' . "\n"; - } - } - close $files_fh if defined $files_fh; - close $dirs_fh if defined $dirs_fh; - if (@special_files) { - write_special_cksums $dir, @special_files; - } -} - -# show all files that are present in the checksum files but don't exist on the filesystem anymore -sub check_old_files { - my $top_dir = shift; - my $iter = make_lumia_iter 1, $top_dir; - while (my $dir = $iter->()) { - if (-e $dir) { - my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}) // {}; - foreach my $file (keys %$cksums) { - if (!-e "$dir/$file") { - warn "Nonexistent file: \"$dir/$file\"!\n"; - } - } - } else { - warn "Nonexistent directory: \"$dir\"!\n"; - } - } -} - -# clean up the lumia checksum files, removing any files that aren't present -# on the filesystem anymore -sub remove_old_files { - my ($top_dir, $args) = @_; - my $iter = make_lumia_iter 1, $top_dir; - while (my $dir = $iter->()) { - if (!-e $dir) { - my $parent = dirname $dir; - my $child = basename $dir; - my $lumia_dirs = read_file("$parent/$DIR_FILE", {}) // {}; - if (exists $lumia_dirs->{$child}) { - delete $lumia_dirs->{$child}; - write_file "$parent/$DIR_FILE", $lumia_dirs; - print "Removed \"$dir\" from \"$parent/$DIR_FILE\"\n" if !$args->{"q"}; - write_special_cksums $parent, $DIR_FILE; - } - } else { - my $cksums = read_cksum_file("$dir/$CKSUM_FILE", {}) // {}; - my $found = 0; - foreach my $file (keys %$cksums) { - if (!-e "$dir/$file") { - delete $cksums->{$file}; - print "Removed \"$dir/$file\" from \"$dir/$CKSUM_FILE\"\n" if !$args->{"q"}; - $found = 1; - } - } - if ($found) { - write_cksum_file "$dir/$CKSUM_FILE", $cksums; - write_special_cksums $dir, $CKSUM_FILE; - } - } - } -} - -# sort the given paths into hash based on the dirname -# returns: a hash with the keys being the dirnames of the given paths and -# each one pointing to an array containing the basenames of all paths -# that had this dirname -sub sort_by_dir { - my %sorted_files; - foreach my $file (@_) { - if (!-e $file) { - warn "ERROR: Source file \"$file\" doesn't exist.\n"; - next; - } - my $dir = dirname($file); - if (!exists($sorted_files{$dir})) { - $sorted_files{$dir} = []; - } - push(@{$sorted_files{$dir}}, basename($file)); - } - return \%sorted_files; -} - -# check if $dst exists and prompt the user whether it should be overwritten -# returns 0 if it can be overwritten or doesn't exist, 1 if it shouldn't be overwritten -sub prompt_overwrite { - my $dst = shift; - if (-e $dst) { - print STDERR "WARNING: \"$dst\" exists already. Do you want to replace it? (y/n) "; - my $choice = ""; - while ($choice ne "y" && $choice ne "n") { - $choice = <STDIN>; - chomp $choice; - } - if ($choice eq "n") { - warn "Not overwriting \"$dst\"\n"; - return 1; - } else { - return 0; - } - } - return 0; -} - -# copies the $src files to $dst and updates the checksums in $dst -# $src: list of source paths -# $dst: destination directory or file (in latter case only one src is allowed) -sub copy_files { - my ($src, $dst, $args) = @_; - my $dst_dir = $dst; - if (!-d $dst) { - $dst_dir = dirname $dst; - } - my $diff_name = 0; - # check if the file/dir is getting a different name or - # just being copied into a different directory - if (!-d $dst && !-d $src->[0]) { - $diff_name = 1; - } - if (!-e $dst && -d $src->[0]) { - $diff_name = 1; - } - my $dst_cksums = read_cksums $dst_dir; - return if !defined $dst_cksums; - my $src_sorted = sort_by_dir(@$src); - my $files_touched = 0; - my $dirs_touched = 0; - foreach my $src_dir (keys %$src_sorted) { - my $src_cksums = read_cksums $src_dir; - next if !defined $src_cksums; - foreach my $src_file (@{$src_sorted->{$src_dir}}) { - my $src_path = "$src_dir/$src_file"; - - my $dst_path = $diff_name ? $dst : "$dst_dir/$src_file"; - if (-d $dst_path && -d $src_path) { - warn "ERROR: Cannot copy directory to already existing directory\n"; - next; - } - if (exists $SPECIAL_FILES{$src_file} || exists $SPECIAL_FILES{basename $dst_path}) { - warn "ERROR: Not copying special file\n"; - next; - } - next if !$args->{"f"} && prompt_overwrite($dst_path); - my $options = $args->{"v"} ? "-av" : "-a"; - next if system("cp", $options, "--", $src_path, $dst); - - if (-d $src_path) { - $dirs_touched = 1; - } else { - $files_touched = 1; - } - - if (exists $src_cksums->{$src_file}) { - if ($diff_name) { - $dst_cksums->{basename $dst} = $src_cksums->{$src_file}; - } else { - $dst_cksums->{$src_file} = $src_cksums->{$src_file}; - } - } else { - warn "WARNING: \"$src_path\" not in cksum or directory list\n"; - } - } - } - write_cksums $dst_dir, $dst_cksums, $files_touched, $dirs_touched; -} - -# move a file (or directory) from $src to $dst, prompting for confirmation if $dst already exists; -# automatically appends the basename of $src to $dst if $dst is a directory -sub move_file { - my ($src, $dst, $args) = @_; - if (exists $SPECIAL_FILES{basename $src} || exists $SPECIAL_FILES{basename $dst}) { - warn "ERROR: Not moving special file\n"; - return 1; - } - if (-d $dst) { - $dst .= "/" . basename($src); - } - return 1 if !$args->{"f"} && prompt_overwrite($dst); - my $ret; - if ($args->{"v"}) { - $ret = system("mv", "-v", "--", $src, $dst); - } else { - $ret = system("mv", "--", $src, $dst); - } - return 1 if $ret; - if (-e $src) { - warn "ERROR: file could not be removed from source but will still be " . - "removed from checksum database\n"; - } - return 0; -} - -# move all files/directories in $src_files from $src_dir to $dst_dir ($src_files -# only contains the basenames of the files), removing them from the checksum files -# in $src_dir and adding them to $dst_cksums -sub move_from_same_dir { - my ($src_dir, $src_files, $dst_cksums, $dst_dir, $args) = @_; - my $src_cksums = read_cksums $src_dir; - return if !defined $src_cksums; - my $files_touched = 0; - my $dirs_touched = 0; - foreach my $src_file (@$src_files) { - my $fullpath = "$src_dir/$src_file"; - my $tmp_dirs_touched = 0; - my $tmp_files_touched = 0; - if (-d $fullpath) { - $tmp_dirs_touched = 1; - } else { - $tmp_files_touched = 1; - } - - next if move_file($fullpath, $dst_dir, $args); - - # need to be able to check if the path is a directory - # before actually moving it - $dirs_touched ||= $tmp_dirs_touched; - $files_touched ||= $tmp_files_touched; - if (exists $src_cksums->{$src_file}) { - $dst_cksums->{$src_file} = $src_cksums->{$src_file}; - delete $src_cksums->{$src_file}; - } else { - warn "WARNING: \"$src_dir/$src_file\" not in cksum or directory list.\n"; - } - } - write_cksums $src_dir, $src_cksums, $files_touched, $dirs_touched; - return ($files_touched, $dirs_touched); -} - -# rename a single file or directory from $src to $dst -sub move_rename { - my ($src, $dst, $args) = @_; - my $src_dir = dirname $src; - my $dst_dir = dirname $dst; - my $src_file = basename $src; - my $dst_file = basename $dst; - - my $src_cksums = read_cksums $src_dir; - return if !defined $src_cksums; - my $dst_cksums = {}; - # if a file is simply being renamed in the same dir, the cksums - # should only be loaded and written once - if ($src_dir eq $dst_dir) { - %$dst_cksums = %$src_cksums; - delete $dst_cksums->{$src_file}; - } else { - $dst_cksums = read_cksums $dst_dir; - return if !defined $dst_cksums; - } - - my $files_touched = 0; - my $dirs_touched = 0; - if (-d $src) { - $dirs_touched = 1; - } else { - $files_touched = 1; - } - - return if move_file($src, $dst, $args); - - if (exists($src_cksums->{$src_file})) { - $dst_cksums->{$dst_file} = $src_cksums->{$src_file}; - delete $src_cksums->{$src_file}; - } else { - warn "WARNING: \"$src\" not in cksum or directory list.\n"; - } - write_cksums $dst_dir, $dst_cksums, $files_touched, $dirs_touched; - if ($src_dir ne $dst_dir) { - write_cksums $src_dir, $src_cksums, $files_touched, $dirs_touched; - } -} - -# move all files and directories in $src to $dst -# - if $dst does not exist, $src is only allowed to contain one path, which is -# renamed to $dst -# - if $dst is a file, $src is only allowed to contain a single path (which -# must be a file), which is renamed to $dst -# - otherwise, all files and directories in $src are moved to $dst -# $src: list of source paths -# $dst: destination directory or file (in latter case only one src is allowed) -sub move_files { - my ($src, $dst, $args) = @_; - if (!-d $dst && $#$src != 0) { - die "move: only one source argument allowed when destination is a file\n"; - } - if (!-d $dst && !-d $src->[0]) { - move_rename $src->[0], $dst, $args; - return; - } - if (!-e $dst && -d $src->[0]) { - move_rename $src->[0], $dst, $args; - return; - } - if (-e $dst && !-d $dst && -d $src->[0]) { - die "move: can't move directory to file\n"; - } - # Separate files by current dir so the cksum and dir files only need to be opened once - my $src_files = sort_by_dir(@$src); - my $dst_cksums = read_cksums $dst; - return if !defined $dst_cksums; - my $files_touched = 0; - my $dirs_touched = 0; - foreach my $src_dir (keys %$src_files) { - my ($tmp_files_touched, $tmp_dirs_touched) = move_from_same_dir $src_dir, $src_files->{$src_dir}, $dst_cksums, $dst, $args; - $files_touched ||= $tmp_files_touched; - $dirs_touched ||= $tmp_dirs_touched; - } - write_cksums $dst, $dst_cksums, $files_touched, $dirs_touched; -} - -# remove a file or directory from the filesystem -sub remove_file_dir { - my ($path, $args) = @_; - my $options = $args->{"f"} ? "-rf" : "-r"; - if (system("rm", $options, "--", $path)) { - return 1; - } - if (-e $path) { - warn "ERROR: Unable to remove \"$path\" from filesystem but " . - "will still be removed from checksum database\n"; - } - return 0; -} - -# remove all files in one directory, updating the checksum files in the process -# note: the files are only allowed to be basenames, i.e., they must be the -# actual filenames present in the checksum files -sub remove_from_same_dir { - my ($args, $dir, @files) = @_; - my $cksums = read_cksums $dir; - return if !defined $cksums; - my $dirs_touched = 0; - my $files_touched = 0; - foreach my $file (@files) { - if (exists $SPECIAL_FILES{$file}) { - warn "ERROR: not removing special file $file\n"; - next; - } - my $fullpath = "$dir/$file"; - if (!-e $fullpath) { - warn "\"$fullpath\": No such file or directory.\n"; - } - next if remove_file_dir($fullpath, $args); - if (exists $cksums->{$file}) { - if (defined $cksums->{$file}) { - $files_touched = 1; - } else { - $dirs_touched = 1; - } - delete $cksums->{$file}; - } else { - warn "WARNING: \"$file\" not in cksum or directory list.\n"; - } - } - write_cksums $dir, $cksums, $files_touched, $dirs_touched; -} - -# remove all given files and directories, updating the appropriate checksum -# files in the process -sub remove_files { - my $args = shift; - my $sorted_files = sort_by_dir(@_); - foreach my $dir (keys %$sorted_files) { - remove_from_same_dir($args, $dir, @{$sorted_files->{$dir}}); - } -} - -# create the given directories, initializing them with empty checksum files -# note: does not work like "mkdir -p", i.e., the new directories have to -# be located inside already existing directories -sub make_dirs { - my @created_dirs; - foreach (@_) { - if (system("mkdir", "--", $_)) { - warn "ERROR creating directory $_\n"; - next; - } - push(@created_dirs, $_); - } - # Separate files by current dir so the cksum and dir files only need to be opened once - my %dirs; - foreach my $dir (@created_dirs) { - write_cksums $dir, {}, 1, 1; - my $parent = dirname($dir); - if (!exists($dirs{$parent})) { - $dirs{$parent} = []; - } - push(@{$dirs{$parent}}, basename($dir)); - } - foreach my $parent (keys %dirs) { - my $parent_dirs = read_file "$parent/$DIR_FILE", {}; - next if !defined $parent_dirs; - foreach my $dir (@{$dirs{$parent}}) { - $parent_dirs->{$dir} = ""; - } - write_file "$parent/$DIR_FILE", $parent_dirs; - write_special_cksums $parent, $DIR_FILE; - } -} - -# extract all special lumia files from $src_dir to $dst_dir, recreating the -# entire directory structure in the process -sub extract { - my ($src_dir, $dst_dir, $args) = @_; - my $iter = make_lumia_iter 0, $src_dir; - my $options = $args->{"v"} ? "-av" : "-a"; - while (my $dir = $iter->()) { - my $final_dir = abs2rel $dir, $src_dir; - my $fulldir = catfile $dst_dir, $final_dir; - system("mkdir", "-p", "--", $fulldir); - foreach my $file (keys %SPECIAL_FILES) { - my $filepath = catfile $dir, $file; - if (-e $filepath) { - system("cp", $options, "--", $filepath, catfile($fulldir, $file)); - } - } - } -} - -# update the checksums of the given files -# ignores any directories given as arguments -sub update { - my @files; - foreach (@_) { - if (-d $_) { - warn "Ignoring directory \"$_\"\n"; - } else { - push @files, $_; - } - } - my $sorted_files = sort_by_dir @files; - foreach my $dir (keys %$sorted_files) { - my $cksums = read_cksum_file "$dir/$CKSUM_FILE", {}; - next if !defined $cksums; - my $changed = 0; - foreach my $file (@{$sorted_files->{$dir}}) { - my $cksum_output = get_cksum "$dir/$file"; - next if !defined $cksum_output; - $cksums->{$file} = $cksum_output; - $changed = 1; - } - if ($changed) { - write_cksum_file "$dir/$CKSUM_FILE", $cksums; - write_special_cksums $dir, $CKSUM_FILE; - } - } -} - -sub update_special { - my $dir = shift; - write_special_cksums $dir, $CKSUM_FILE, $DIR_FILE; -} - -my %args; -Getopt::Long::Configure("bundling"); -GetOptions(\%args, "f|force", "q|quiet", "v|verbose", "h|help"); - -pod2usage(-exitval => 0, -verbose => 2) if $args{"h"}; -pod2usage(-exitval => 1, -verbose => 1) if @ARGV < 1; - -my $cmd = shift; - -if ($cmd eq "mv") { - die "mv requires at least two arguments\n" if @ARGV < 2; - my @src = @ARGV[0..$#ARGV-1]; - move_files \@src, $ARGV[-1], \%args; -} elsif ($cmd eq "rm") { - die "rm requires at least one argument\n" if @ARGV < 1; - remove_files \%args, @ARGV; -} elsif ($cmd eq "addnew") { - my $dir = @ARGV ? $ARGV[0] : "."; - check_add_new_files $dir, \%args; -} elsif ($cmd eq "checknew") { - my $dir = @ARGV ? $ARGV[0] : "."; - check_new_files $dir; -} elsif ($cmd eq "checkold") { - my $dir = @ARGV ? $ARGV[0] : "."; - check_old_files $dir; -} elsif ($cmd eq "rmold") { - my $dir = @ARGV ? $ARGV[0] : "."; - remove_old_files $dir, \%args; -} elsif ($cmd eq "check") { - if (@ARGV < 1) { - check_files \%args, "."; - } else { - check_files \%args, @ARGV; - } -} elsif ($cmd eq "clean") { - my $dir = @ARGV ? $ARGV[0] : "."; - clean_files $dir, \%args; -} elsif ($cmd eq "extract") { - my $src_dir = "."; - my $dst_dir; - if (@ARGV == 2) { - $src_dir = $ARGV[0]; - $dst_dir = $ARGV[1]; - } elsif (@ARGV == 1) { - $dst_dir = $ARGV[0]; - } else { - die "Invalid number of arguments\n"; - } - if (!-d $src_dir) { - die "ERROR: Directory \"$src_dir\" does not exist.\n"; - } - if (!-d $dst_dir) { - die "ERROR: Directory \"$dst_dir\" does not exist.\n"; - } - extract $src_dir, $dst_dir; -} elsif ($cmd eq "cp") { - die "cp requires at least two arguments\n" if @ARGV < 2; - my @src = @ARGV[0..$#ARGV-1]; - copy_files \@src, $ARGV[-1], \%args; -} elsif ($cmd eq "mkdir") { - die "mkdir requires at least one argument\n" if @ARGV < 1; - make_dirs @ARGV; -} elsif ($cmd eq "update") { - die "update requires at least one argument\n" if @ARGV < 1; - update @ARGV; -} elsif ($cmd eq "updatespecial") { - die "Invalid number of arguments\n" if @ARGV > 1; - my $dir = @ARGV ? $ARGV[0] : "."; - update_special $dir; -} else { - pod2usage(-exitval => 1, -verbose => 1); -} - -__END__ - -=head1 NAME - -lumia.pl - Manage checksums on a filesystem - -=head1 SYNOPSIS - -B<lumia.pl> command [-hqfv] arguments - -=head1 OPTIONS - -=over 8 - -=item B<-h>, B<--help> - -Show the full documentation. - -=item B<-q>, B<--quiet> - -Only output errors. - -=item B<-f>, B<--force> - -Overwrite files without prompting for confirmation. - -=item B<-v>, B<--verbose> - -Print each file that is processed by the command. - -=back - -See the full documentation for details on which commands support which options -and what they do. - -It does not matter if the options are written before or after the command. - -If C<--> is written anywhere on the command line, option parsing is stopped, -so that files starting with a hyphen can still be specified. - -Note that C<-q> and C<-v> aren't exactly opposites - C<-q> applies to commands -like B<check>, where it suppresses printing of the individual files, while -C<-v> applies to commands like B<cp>, where it is just passed on to the system -command called in the background. - -Note further that this is very inconsistent, like the rest of the program, but -the author has made too many bad decisions to rectify that problem at the moment. - -=head1 COMMANDS - -Note that some commands support multiple files/directories as arguments and others, -for which it would make just as much sense, don't. That's just the way it is. - -=over 8 - -=item B<addnew> [-q] [directory] - -Walks through B<directory>, adding all new files to the checksum database. -B<directory> defaults to the current directory. - -C<-q> suppresses the printing of each file or directory as it is added. - -=item B<checknew> [directory] - -Walks through B<directory>, printing all files that aren't part of the checksum -database. B<directory> defaults to the current directory. - -=item B<checkold> [directory] - -Prints all files in the checksum database that do not exist on the filesystem anymore. -B<directory> defaults to the current directory. - -=item B<rmold> [-q] [directory] - -Removes all files found by B<checkold> from the database. B<directory> defaults to -the current directory. - -C<-q> suppresses the printing of each file as it is removed. - -=item B<check> [-q] file/directory ... - -Verifies the checksums of all files given, recursing through any directories. If no -files or directories are given, the current directory is used. - -Note that the checksum database in the corresponding directory will be read again for -every file given on the command line, even if 1000 files in the same directory are given. -This problem does not occur when recursing through directories, so it is best to only -give files directly when checking a few. This problem wouldn't be too difficult to -fix, but, frankly, I'm too lazy, especially since I only added the feature to check -files individually as a convenience when I want to quickly check a single file in a -large directory. - -To explain why it is this way: The directory recursion is done using an iterator, which -has the directories pushed onto its queue in the beginning. The iterator only returns -directories, which are then checked all in one go, but this means that files given on -the command line need to be handled specially. - -C<-q> suppresses the printing of all good checksums but still allows a message to -be printed when a checksum failed. - -=item B<clean> [-q] [directory] - -Removes all lumia special files used to store the checksum database from B<directory> -recursively. B<directory> defaults to the current directory. - -Note that this recurses through the entire directory tree, not just the part that is -actually linked together by the checksum database. - -Warning: This just blindly removes all files with one of the special lumia names, -even if they weren't actually created by lumia.pl. - -C<-q> suppresses the printing of each file as it is deleted. - -=item B<extract> [-v] [source] destination - -Recreates the entire directory structure from B<source> in B<destination>, but only -copies the special files used to store the checksum database. B<source> defaults to -the current directory. - -C<-v> prints each file as it is copied. - -Note that this overwrites files in the destination directory without confirmation. - -=item B<mkdir> directory ... - -Creates the given directories, initializing them with empty checksum database files. - -=item B<update> file ... - -Recalculates the checksums for the given files and replaces them in the database. - -Note: Directories given as arguments are ignored. - -This is mainly meant to quickly "touch" a file after it was modified (e.g. a -notes file that is occasionally updated). - -=item B<updatespecial> [directory] - -Recalculates the checksums for the special files C<.lumidify_archive_dirs> and -C<.lumidify_archive_cksums> and writes them to C<.lumidify_archive_cksums.cksum>. -B<directory> defaults to the current directory. - -This is only meant to be used if, for some reason, the checksum files had to -be edited manually and thus don't match the checksums in C<.lumidify_archive_cksums.cksum> -anymore. - -=item B<rm> [-f] file ... - -Removes the given files and directories recursively from the filesystem and -checksum database. The following caveats apply: - -If any actual errors occur while deleting the file/directory (i.e. the system -command C<rm> returns a non-zero exit value), the checksum or directory B<is -left in the database>. If the system C<rm> does not return a non-zero exit value, -but the file/directory still exists afterwards (e.g. there was a permission -error and the user answered "n" when prompted), a warning message is printed, -but the files B<are removed from the database> (if the database can be -written to). - -It is an error if there are no checksum database files in the directory -of a file named on the command line. - -C<-f> is passed through to the system C<rm> command. - -=item B<cp> [-vf] source target - -=item B<cp> [-vf] source ... directory - -Copies the given source files, updating the checksum database in the process. - -If the last argument is a file, there must be only one source argument, also a file, -which is then copied to the target. - -If the last argument is a directory, all source arguments are copied into it. - -It is an error if a source or destination directory does not contain any -checksum database files. - -B<cp> will issue a warning and skip to the next argument if it is asked to -merge a directory with an already existing directory. For instance, attempting -to run C<cp dir1 dir2>, where C<dir2> already contains a directory named -C<dir1>, will result in an error. This may change in the future, when the -program is modified to recursively copy the files manually, instead of simply -calling the system C<cp> on each of the arguments. If this was supported in -the current version, none of the checksums inside that directory would be -updated, so it wouldn't be very useful. - -C<-v> is passed through to the system C<cp> command. - -C<-f> silently overwrites files without prompting the user, much like the -C<-f> option in the system C<cp> command. This is handled manually by the -program, though, in order to actually determine what the user chose. See -also the caveat mentioned above. - -=item B<mv> [-f] source target - -=item B<mv> [-f] source ... directory - -Moves the given source files, updating the checksum database in the process. - -If the last argument is a file or does not exist, there must be only one source -argument, which is renamed to the target name. - -If the last argument is an existing directory, all source arguments are moved -into it. - -It is an error if a source or destination directory does not contain any -checksum database files. - -B<mv> behaves the same as B<rm> with regards to checking if the source file -is still present after the operation and other error handling. - -C<-f> is handled in the same manner as with B<cp>. - -=back - -=head1 DESCRIPTION - -lumia.pl is meant for managing checksums of files in order to prevent bitrot. -It does this by storing several special files in each directory to keep track -of the checksums: - -=over 8 - -=item B<.lumidify_archive_cksums> - -Contains the checksums of all files in the directory. - -=item B<.lumidify_archive_dirs> - -Contains a list of all directories in the directory. - -=item B<.lumidify_archive_cksums.cksum> - -Contains the checksums of B<.lumidify_archive_cksums> and B<.lumidify_archive_dirs> -in order to provide two-layer protection against bitrot. - -=item B<.lumidify_archive_ignore> - -Contains a list of files and directories that should be ignored by lumia.pl. -Note that this is only read and never written to, unless the command B<clean> -is used. It is, however, still copied over by the B<extract> command. - -=back - -When the documentation for the commands talks about the "checksum database", -it simply means these files. - -All file/directory names are enclosed in quotes, with any backslashes or quotes -inside the name escaped with another backslash. The names are allowed to have -newlines in them. - -The list files only contain a list of filenames, with a newline between the -closing quote of one name and the opening quote of the next one. - -The checksum files additionally contain the output of the checksum program -used and a space before the starting quote of the filename. - -=head1 MOTIVATION - -There are already several programs that can be used to check for bitrot, -as listed in L</"SEE ALSO">. However, all programs I tried either were -much too complicated for my taste or just did everything behind my back. -I wanted a simple tool that did exactly what I told it to and also allowed -me to keep the old checksums when reorganizing files, in order to avoid -regenerating the checksums from corrupt files. Since I couldn't find those -features in any program I tried, I wrote my own. - -=head1 DESIGN DECISIONS - -It may strike some readers as a peculiar idea to save the checksum files in -I<every single directory>, but this choice was made after much deliberation. -The other option I could think of was to have one big database, but that -would have made all commands much more difficult to implement and additionally -necessitated opening the entire database for every operation. With individual -files in each directory, operations like B<cp> become quite trivial (ignoring -all the edge cases) since only the toplevel checksums need to be copied to -the new destination, and any subdirectories already contain the checksums. - -This method is not without its drawbacks, however. The most glaring problem -I have found is that there is no way to store the checksums of read-only -directories or any special directories that cannot be littered with the -checksum files because that would clash with other software. Despite these -drawbacks, however, I decided to stick with it because it works for almost -all cases and doesn't have any of the serious drawbacks that other options -would have had. - -The names of the special files were chosen to be ".lumidify_archive*" not -out of vanity, but mainly because I couldn't think of any regular files -with those names, making them a good choice to avoid clashes. - -The name of the program, C<lumia.pl> (for "lumidify archive"), was similarly -chosen because it did not clash with any programs installed on my system and -thus allowed for easy tab-completion. - -=head1 HASH ALGORITHMS - -By default, the simple cksum algorithm is used to get the checksums. This -is not very secure, but the main purpose of the program is to prevent -bitrot, for which cksum should be sufficient, especially since it is much -faster than other algorithms. - -There is currently no convenient way to change the algorithm other than -changing the $CKSUM_CMD and $CKSUM_NUMFIELDS variables at the top of -lumia.pl. $CKSUM_CMD must be the command that returns the checksum -when it is given a file, and $CKSUM_NUMFIELDS specifies the number of -space-separated fields the checksum consists of. This has to be specified -in order to determine where the checksum ends and the filename begins in -the output. This would be redundant if all implementations of cksum -supported '-q' for outputting only the checksum, but that only seems to -be supported by some implementations. - -=head1 USAGE SCENARIOS - -=over 8 - -=item B<Security auditing> - -This program is B<NOT> designed to provide any security auditing, as should -be clear from the fact that the checksums are stored right in the same -directory as the files. See mtree(8) for that. - -If you want to, however, you could set $CKSUM_CMD to a secure hash (not cksum) -and B<extract> the checksums to a separate directory, which you keep in a -safe place. You could then use the regular C<cp> command to simply replace -all the checksums with the ones from your backup, in case an attacker modified -the checksum database in the directory with the actual files you're trying to -protect. I don't know if there would be any point in doing that, though. - -=item B<Managing archives> - -This is the purpose I wrote the program for. - -You can simply initialize your archive directory with the B<addnew> command. -Whenever you add new files, just run B<addnew> again. If you want to reorganize -the archive, you can use the limited commands available. - -I usually just use rsync(1) to copy the entire archive directory over to other -backup drives and then use the B<check> command again on the new drive. - -I also have checksums for the main data directory on my computer (except for -things like git repositories, which I don't want littered with the database -files). Here, I use the B<update> command for files that I edit more often -and occasionally run B<check> on the entire directory. - -Since the database files are written in each directory, you can run the -B<addnew> command in any subdirectory when you've added new files there. - -=back - -=head1 PERFORMANCE - -Due to the extensive use of iterators and the author's bad life choices, -some functions, such as B<addnew> and B<check>, run more slowly than they -would if they were programmed more efficiently, especially on many small -files and folders. Too bad. - -=head1 PORTABILITY - -This program was written on OpenBSD. It will probably work on most other -reasonably POSIX-Compliant systems, although I cannot guarantee anything. -$CKSUM_CMD may need to be modified at the top of lumia.pl. The file -operation commands are called directly with system(), so those need to -be available. - -It will most certainly not work on Windows, but that shouldn't be a -problem for anyone important. - -=head1 BUGS - -All system commands (unless I forgot some) are called with "--" before -listing the actual files, so files beginning with hyphens should be -supported. I have tested the commands with filenames starting with spaces -and hyphens and also containing newlines, but there may very well be issues -still. Please notify me if you find any filenames that do not work. Handling -filenames properly is difficult. - -There are probably many other edge cases, especially in the B<mv>, B<cp>, -and B<rm> commands. Please notify me if you find an issue. - -Operations on files containing newlines may cause Perl to print a warning -"Unsuccessful stat on filename containing newline" even though nothing is -wrong since (as described in B<mv> and B<rm>) existence of the file is -checked afterwards. I didn't feel like disabling warnings, and no normal -person should be working with files containing newlines anyways, so that's -the way it is. - -=head1 EXIT STATUS - -Always 0, unless the arguments given were invalid. We don't do errors around here. - -On a more serious note - I should probably change that at some point. -For the time being, if you want to run B<check> in a script, you can test -the output printed when the C<-q> option is used, since this won't output -anything if there are no errors. Do note, though, that actual errors (file not -found, etc.) are printed to STDERR, while incorrect checksums are printed -to STDOUT. - -=head1 SEE ALSO - -par2(1), mtree(8), aide(1), bitrot(no man page) - -=head1 LICENSE - -Copyright (c) 2019, 2020, 2021 lumidify <nobody[at]lumidify.org> - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -=cut diff --git a/tests/README b/tests/README @@ -5,3 +5,6 @@ To run all tests, simply run alltests.sh There aren't that many tests yet. Maybe I'll get around to adding more tests for all the edge cases someday. + +These only work properly on OpenBSD. +Maybe I'll fix that someday.