#!/bin/bash # -------------------------------- # ------ rmdup ------- # -------------------------------- # --- Find duplicated files --- # --- and prompt for removal. --- # -------------------------------- # by giosue@vitaglione.it on 1970-01-01 # -------------------------------- # useful developerìs links # http://www.delorie.com/gnu/docs/gawk/gawk.html # # HELP="\ rmdup \ Find duplicated files and prompt for removals. \n\ \n \n\ \n\ " # -------------------------------- # -------------------------------- BASEDIR="./.rmdup" TEMPFILE="$BASEDIR/.tempfile" HASHESTXT="$BASEDIR/hashes.txt" HASHESTMP="$BASEDIR/hashes.tmp" DUPLHASHES="$BASEDIR/duplicatedHashes.tmp" REMOVEDFILES="$BASEDIR/.removedFiles.txt" TRASH="$BASEDIR/.trash" # -------------------------------- # -------------------------------- function mainMenu { mkdir $BASEDIR 2> /dev/null mkdir $TRASH 2> /dev/null dialog \ --backtitle "rmdup - ReMove DUPlicated files" \ --title "Main menu:" \ --menu "Choose:" 15 55 9 \ 1 "List current directories" \ 2 "Calculate MD5 file hashes" \ 3 "List all file hashes" \ 4 "List duplicated files" \ g "DELETE/RENAME duplicated files" \ h "Help" \ Q "Quit" 2> $TEMPFILE output="$(cat $TEMPFILE)" rm -f $TEMPFILE if [ "$output" = "1" ]; then tree -d > $TEMPFILE dialog --textbox $TEMPFILE 22 77 rm -f $TEMPFILE mainMenu fi if [ "$output" = "2" ]; then calculateHashes mainMenu fi if [ "$output" = "3" ]; then dialog --textbox $HASHESTXT 22 77 mainMenu fi if [ "$output" = "4" ]; then # shows duplicate hashes as into $DUPLHASHES # first, check $HASHESTXT is present checkHashesPresence case $? in 0) # ok, hashes found, goahead a=0; #nop ;; 1) #not found, exit deleteDuplicates return 0 ;; esac cleanHashesFromRemovedMark updateDuplHashes dialog --textbox $DUPLHASHES 22 77 mainMenu fi if [ "$output" = "g" ]; then processDuplicates mainMenu fi if [ "$output" = "h" ]; then echo $HELP > $TEMPFILE dialog --textbox $TEMPFILE 22 77 rm $TEMPFILE fi if [ "$output" = "Q" ]; then exit fi } #end mainMenu() # -------------------------------- # -------------------------------- function calculateHashes { numfiles="$(ls -Ra | wc -l)" let "tot=$numfiles/100" >$HASHESTXT >$HASHESTMP i="0" dialog --infobox "Creating hashes..." 6 40 for file in $(find . -type f |grep -v $BASEDIR |sed 's/ /+/g' ); do let "i += 1" file="$( echo $file | sed 's/+/ /g' )" #echo " $i - $file" let "p=$i/$tot" #progress% md5sum "$file" >> $HASHESTMP echo "$p" | dialog --gauge " Hashing: [$i/$numfiles]\n $file" 10 75 done #find . -type f \ #-exec echo "aaa$i" >> tmp.txt \ #-exec let "i += 1" \ #-exec md5sum "{}">> $HASHESTMP \ #';' dialog --infobox "Sorting..." 6 40 sort $HASHESTMP > $HASHESTXT rm $HASHESTMP # HASHESTXT -> $DUPLHASHES updateDuplHashes >$REMOVEDFILES dialog --msgbox "File hashes calculation, finished." 6 40 } #end calculateHashes() # -------------------------------- # -------------------------------- function askAndDelete { # input: "$line" "=$i/$ntot" "="$allnames none none # return: 0=ok, go next item. 1=stay on the same item params=" " until [ -z "$1" ] # Until uses up arguments passed... do params="$params $1" shift done hash=$(echo "$params" | cut -d= -f1) number=$(echo "$params" | cut -d= -f2) name1=$(echo $params | cut -d= -f3) name2=$(echo $params | cut -d= -f4) name1=`echo $name1| sed 's/"$//'| sed 's/^ *//' | sed 's/ *$//'` name2=`echo $name2| sed 's/"$//'| sed 's/^ *//' | sed 's/ *$//'` name1=$name1\" name2=$name2\" # for debug #dialog --msgbox "askAndDeelte:\n params:\n$params \n\n name1: \n$name1 \n name2: \n$name2" 30 70 returncode=0 dialog --clear --ok-label "Ok" \ --backtitle "$number" \ --ok-label "Skip" \ --extra-button \ --extra-label "REMOVE" \ --help-button \ --help-label "Rename" \ --cancel-label "Quit" \ --menu "Found following duplicated files: \n\n \ Choose a file, hit REMOVE or RENAME.\n Hit on SKIP to go to the next duplicated files." \ 15 77 5 \ "1" "$name1" \ "2" "$name2" 2> $TEMPFILE retval=$? choice="$(cat $TEMPFILE)" #dialog --msgbox "choice=$choice, retval=$retval" 10 77 case $retval in 0) # skip #echo "OK '$choice' skipped." ;; 1) # quit echo "QUIT." #mainMenu exit ;; 2) # rename choice=${choice:5:1} #echo "Rename. -$choice-" case $choice in 1) renameFile "$name1" ;; 2) renameFile "$name2" ;; *) ;; esac returncode=1 ;; 3) # remove #echo "remove '$choice' chosen." case $choice in 1) deleteFile "$name1" ;; 2) deleteFile "$name2" ;; *) ;; esac returncode=1 ;; 255) # echo "ESC pressed." ;; *) echo "Unexpected code $retval" ;; esac return $returncode } #end askAndDelete() # -------------------------------- # -------------------------------- # asks for confirmation and remove a specific file function deleteFile { # input: $1 complete name of the file to be deleted # return: 0=Nothig done. 1=File removed. -1=Remove not succeded. name="$(echo "$1"| sed 's/"$//'| sed 's/^"//')" #name=$1 res=`/bin/ls -al "$name"` dialog --yesno "About to DELETE: $name \n $res " 8 80 if [ $? = 0 ]; then #dialog --msgbox "Removing file: $name" 0 0 mv "$name" $TRASH/ 2> $TEMPFILE a=0 else a=1 #nop #dialog --infobox "File NOT removed." 0 0 #sleep 1 fi res="$(cat $TEMPFILE)" dialog --msgbox "removed: $name \n\n$res" 9 75 if [ -e "$name" ]; then # file still exists a=0 else # file has been removed successfully # mark entry from $HASHESTXT with a __REMOVED__ # escape '/' and spaces from $name tmp="$(echo $name | sed s:/:\\\\/:g|sed s:[[:blank:]]:\\\\\ :g )" #dialog --msgbox " name=$name \n tmp=$tmp \n " 9 75 # mark entry from $HASHESTXT with a __REMOVED__ awk " /$tmp/ { s = \$1 \" __REMOVED__\"; for(i=2;i<=NF;i++) s=s \$i \" \"; print s; } !/$tmp/ {print;} " $HASHESTXT > $HASHESTMP mv $HASHESTMP $HASHESTXT fi } #end deleteFile() # -------------------------------- # -------------------------------- # asks for new name and rename a specific file function renameFile { # input: $1 full name of the file oldname="$(echo "$1"| sed 's/"$//'| sed 's/^"//')" dialog --inputbox "Rename file: $oldname" 6 77 "$oldname" 2> $TEMPFILE retval=$? case $retval in 0) newname="$(cat $TEMPFILE)" echo "New name is: $newname" mv "$oldname" "$newname" 2> $TEMPFILE ;; 255) dialog --msgbox "File not renamed: \n $name " 6 77 ;; esac res="$(cat $TEMPFILE)" dialog --msgbox "renamed: \"$name\" ---> \"$newname\" \n\n$res" 14 75 if [ -e "$oldname" ]; then # file still exists a=0 else # file has been renamed successfully # change its name also into $HASHESTXT # escape '/' and spaces from $oldname tmp="$(echo $oldname | sed s:/:\\\\/:g|sed s:[[:blank:]]:\\\\\ :g )" #dialog --msgbox " name=$name \n tmp=$tmp \n " 9 75 # replace oldname with newname from $HASHESTXT awk " /$tmp/ { print \$1 \" $newname\"; } !/$tmp/ {print;} " $HASHESTXT > $HASHESTMP mv $HASHESTMP $HASHESTXT fi } #end renameFile() # -------------------------------- # -------------------------------- # Create list of unique hashes from $HASHTXT function updateDuplHashes { cat $HASHESTXT | awk '{print $1}' | uniq -d > $DUPLHASHES } # -------------------------------- # -------------------------------- function checkHashesPresence { if [ -f "$HASHESTXT" ] then return 0; else dialog --msgbox "No hashes found. \nCalculate MD5 file hashes, first of all." 7 50 return 1; fi } # -------------------------------- # -------------------------------- function cleanHashesFromRemovedMark { # take away all lines marked with __REMOVED__ awk ' !/__REMOVED__/ {print;} ' $HASHESTXT > $HASHESTMP mv $HASHESTMP $HASHESTXT } # -------------------------------- # -------------------------------- # for each enty into $DUPLHASHES # find two files hacing that hash, # and prompt for renaming/removal. function processDuplicates { # needs $HASHESTXT checkHashesPresence case $? in 0) # ok, hashes found, goahead a=0; #nop ;; 1) #not found, exit deleteDuplicates return 0 ;; esac cleanHashesFromRemovedMark updateDuplHashes #exec < $DUPLHASHES ntot="$(wc -l $DUPLHASHES | awk '/.*/ { print $1;}')" i=1 #while read line while [ $i -le "$ntot" ] do # get the hash line="$(cat $DUPLHASHES | head -$i |tail -1)" #dialog --msgbox "line=$line" 10 60 # get the files with that hash allnames="$(cat $HASHESTXT | grep $line | awk '/.*/ {for(i=2;i <=NF;i++) s=s $i " "; } { s=s "= ";} END {print s;}')" #dialog --msgbox "deleteDuplicates: allnames=\n $allnames" 20 70 # process the files (delete or rename) askAndDelete "$line" "=$i/$ntot" "="$allnames none none case $? in 0) # increase counter let "i += 1" ;; 1) # stay on the same item a=0 #nop ;; esac done rm $DUPLHASHES dialog --msgbox "All duplicated files processed !" 5 40 } #end processDuplicates() #-------------------------------- #-------------------------------- #-------------------------------- # Program start #-------------------------------- mainMenu exit #-------------------------------- # program end #--------------------------------