#!/bin/bash

# Stripdups - provides a method of managing image files and eliminating
# duplicates.

# Currently it relies on sed, ImageMagick, and vim.  I'm unsure how
# checksums are created in IM, so while I'm comfotable with accepting it
# at its word when duplicates are found, I'm not offering any guarantees
# that it won't generate false positives.

# If vim starts remove any lines that contain filenames you wish to
# save, you will be given a delay before the script starts to delete
# files, pressing Ctrl-C during this pause should abort, it won't clean
# up after itself at this point.

# This script probably has bugs, it doesn't check to see if
# $BASENAME.txt or $BASENAME.html exist so it will overwrite them if they
# do.  When vim comes up it shows all duplicates in $pwd, it doesn't
# check to see if you modified the file before starting to delete the
# files listed, it doesn't check for $BASENAME, etc.

# This program may be distributed freely provided the credit/warnings
# remain.  The author may be contacted at mcsuper5@freeshell.org, and
# the latest version can be found at
# http://mcsuper5.freeshell.org/stripdups/index.html
 
# Michael J. Chappell 			26 June 2004

version="0.07.1"
version_date="23 Jul 2004"

#constants
tmplt="$$.XXXXXX"
dfile=~/var/images.dat

# variables with defaults 
appdat='yes'		# append new checksums to database
lfile='/dev/stderr'	# log errors to stderr by default
pause='5'		# delay before deleting files
safe='-i'		# options to pass to rm, remove the -i if you like
			# living dangerously.
tn='tn'			# default thumbnail directory
tnsize='96x96'		# thumbnail dimensions.

_help ()
{
less <<EOF
Usage:

stripdups [-f][-l][-q][-s][--log][--force][--quiet][--safe] BASENAME
stripdups [-h][-v][--help][--version]

-f,--force
    Don't prompt before deleting files.  Should not be used with -s,--safe.
-h,--help
    What you see is what you get.
-l,--log
    Log ImageMagick errors to stripdups.log.  Conflicts with
    -q,-v,--quiet,--verbose.
-q,--quiet
    Send ImageMagick errors to the bit bucket.  Conflicts with
    -l,-v,--log,--verbose.
-s,--safe (default)
    Prompt before deleting files.  Should not be used with -f,--force.
-t,--test
    Do not append to database.
-v,--verbose (default)
    Send ImageMagick errors to stderr.  Conflicts with
    -l,-q,--log,--quiet
-V,--version
    Prints out version number.

If multiple options conflict the last one specified will be used.

Michael J. Chappell						23 Jul 2004
mcsuper5@freeshell.org
EOF

return ;
}

rdup=`mktemp $tmplt`
sdup=`mktemp $tmplt`
tdup=`mktemp $tmplt`
tmp1=`mktemp $tmplt`
tmp2=`mktemp $tmplt`
dtmp=`mktemp $tmplt`

shopt -s nullglob	# if wildcard pattern matchs no files don't
			# return literal pattern. 
progname=$0		# save the program name.
if [ $# -lt 1 ]; then 
	_help ;
	exit 127  
else
	while [ $# -ge 1 ]; do
		case $1 in
		-f | --force )
			safe='' ;;
		-h | --help )
			_help ;
			exit 0 ;;
		-l | --log )
			lfile='stripdups.log' ;;
		-q | --quiet )
			lfile='/dev/null' ;;
		-s | --safe )
			safe='-i' ;;
		-t | --test )
			appdat='no' ;;
		-v | --quiet )
			lfile='/dev/stderr' ;;
		-V | --version )
			echo $progname $version_date 1>&2 
			exit 0 ;;
		-*) 
			_help ;
			exit 127 ;;
		*)
			BASENAME="$1"
		esac
		shift
	done
fi
				
# Generate listing : checksum dimensions filesize filename
# Quick hack so only the first line of multi-image files is 
# displayed.
for i in snapshot*[0-9] *{jpg,gif,png,JPG,GIF,PNG,JPEG,jpeg,bmp,BMP} ;
do
    # We use head here because multi-image GIFs are processed by
    # frame, so you will have a false positive if you split a
    # multi-image GIF, and process it against the original.
    identify -format "%# %wx%h %b \"%f\"" "$i" | \
    head -n 1 >>${tmp1} 2>>${lfile}
done;

# Find multiple occurrences of any checksum
sort ${tmp1} -o ${sdup}
# uniq -D -t' ' -W 1 ${sdup} >${tdup}
# Somewhat more portable way of acomplishing the above (hopefully)
cut -d' ' -f 1 ${sdup} | uniq -d > ${rdup}
for i in `cat ${rdup}` ; 
do 
    grep $i ${sdup} ;
done >${tdup}
echo "#The following files exist in database already." >>${tdup}
for i in `cut -d' ' -f 1 ${sdup}` ; 
do 
    grep $i ${dfile} ;
done >>${tdup}


if [ -s ${tdup} ]; then	# process only if duplicates exist.
    # give the user a way to preserve filenames.
    vim ${tdup}

    # Give the user an opportunity to abort.
    echo All files listed in the file that you just editted will be removed.
    echo 
    echo Press Ctrl-C NOW to abort.
    sleep $pause

    for i in `cut -d'"' -f 2 ${tdup}` ; 
    do 
	    # remove file
	    rm $safe "$i"

	    # remove filename from the text file
	    sed -e "/$i/d" ${tmp1} > ${tmp2} ;
	    sync ;	# shouldn't be required, but aside from time shouldn't
			# hurt.
	    cp ${tmp2} ${tmp1} ; 
    done
fi	# processed only if duplicates exist in current directory.

cp ${tmp1} ${BASENAME}.txt

# Append to database
if [ ${appdat} == 'yes' ] ; then
    cat ${dfile} ${BASENAME}.txt >>${dtmp}
    sort ${dtmp} >${dfile}
fi

# Do preliminary cleanup.
rm ${dtmp}
rm ${rdup}
rm ${sdup}
rm ${tdup}
rm ${tmp1}
rm ${tmp2}

# Remember to modify for case where tn exists
mkdir -p ${tn}	# Create thumbnail directory

# Create thumbnails ( run in the background )
for i in snapshot*[0-9] *jpg *JPG *jpeg *JPEG *bmp *BMP *gif *GIF *png *PNG ;
do
	convert -resize $tnsize "$i" "${tn}/$i" ;
done &

# Create html page
echo "<html><body><table>" > ${BASENAME}.html
for i in snapshot*[0-9] *jpg *JPG *jpeg *JPEG *bmp *BMP *gif *GIF *png *PNG ;
do
	echo "<tr>"
	echo "  <td><a href=\"$i\"><img src=\"${tn}/$i\"></a></td>"
	echo "  <td>`identify -format "%wx%h</td><td>%b" "$i" |\
		head -n 1 2>>${lfile}`</td>"
	echo "  <td><a href=\"$i\">$i</a></td>"
	echo "</tr>"
done >> ${BASENAME}.html
echo "</table></body></html>" >> ${BASENAME}.html

