Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # Purpose of this script:
- #
- # Accurately eliminate duplicate files within a given folder. Optionally, consolidate all files
- # to a common folder. Duplicate names within the consolidate folder will be made unique.
- #
- # Methodology:
- # Find folders within a specified top folder which are not bundles. Find files
- # within each found folder [and within subfolders ]. because each folder is
- # handled individually.)
- # Find potential bundles and checksum their entire content as one item.
- # Get MD5 checksums for each item and save an index of all checksums and
- # filepaths. Sort the index by checksum. Compare each
- # item's checksum and if a duplicate is found, move the duplicate to the trash.
- # Compare each item's name and if a duplicate is found, append incrementing #X
- # to the name prior to the last period (as in "Filename #2.jpg")
- # Ignore iTunes and iPhoto folders
- #
- # Optionally move files and potential bundles to the top-level folder
- # Delete .DS_Store, .FBCIndex*, and .FBCSemaphoreFile files and subsequently
- # delete empty subfolders. Optionally regroup map files into folders based on
- # the name of .frq file if found, or .shp file
- #
- # This script is a partial rewrite of the code from JulieJulieJulie's Consolidate17 script.
- # See:
- # http://discussions.apple.com/message.jspa?messageID=11577992#11577992
- #
- # additional discussion:
- # https://discussions.apple.com/thread/2434165?start=0&tstart=0
- # https://discussions.apple.com/thread/8276669
- #
- # Copyright 2018 rccharles
- #
- # GNU General Public License
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, version 3
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # For a copy of the GNU General Public License see
- # <http://www.gnu.org/licenses/>.
- #
- copyRight="Copyright (c) 2010 & 2018 rccharles. GNU General Public License."
- #
- #
- # debug info
- export PS4='+(${BASH_SOURCE}:${LINENO}):'
- ## not in the tiger version of bash ${FUNCNAME[0]:+${FUNCNAME[0]}(): }'
- ##########
- ### Functions (subroutines which can be called by name)
- function helpHelpIsAllINeed () {
- echo
- echo "${savedCommandName##*/} [ options ] [ directory-to-compress ]"
- echo
- echo "options:"
- echo "-h Display quick help."
- echo
- echo "-m Moving files is disabled."
- echo "-M Move files to top level."
- echo
- echo "-r Regrouping map files is disabled."
- echo "-R Regroup map files."
- echo
- echo "-V Display very detailed messages."
- echo
- echo "example:"
- echo " ./${savedCommandName##*/} -mrV /Users/mac/cons/see"
- echo
- }
- function Show_Time () {
- let currentRunTime="$( date +%s ) - ${Seconds_since_the_epoch_start}"
- let stepRunTime=currentRunTime-previousRunTime
- echo " Elapsed time in seconds since start of processing: ${currentRunTime}" \
- " Time of this step: ${stepRunTime}"
- echo
- let previousRunTime=currentRunTime
- }
- function echoTrashInfo () {
- echo " -${trashFileName}"
- echo "${fileName} > ${trashFileName}" \
- >>"${saveFolder}/Consolidate_Trashed_List_${runDateTime}.txt"
- case "${fileType}" in
- f ) let trashFileCount+=1
- ;;
- d ) let trashDirectoryCount+=1
- ;;
- * ) Echo
- Echo "Error --- Error in echoTrashInfo. fileName=${fileName};"
- Echo "Error --- fileType=${fileType};"
- Echo
- ;;
- esac
- }
- declare folder \
- MainFolder \
- MoveFiles="Question" \
- RegroupMapFiles="Question" \
- runDateTime=$(date '+%Y-%m-%d_%H.%M') \
- Seconds_since_the_epoch_start
- declare -x tab=$'\t' \
- trashFolder \
- verbose="Yes" \
- veryVerbose="Yes" \
- whereIsTrash
- declare -i count=0 \
- offsetMainFolder \
- currentRunTime \
- previousRunTime=0 \
- stepRunTime \
- trashDirectoryCount=0 \
- trashFileCount=0
- declare -a filePath \
- fileName \
- fileSum \
- folders
- savedCommandName="$0"
- echo
- echo "${savedCommandName} script revised $(GetFileInfo -m $0)"
- echo
- bigVersion="Lizard 4"
- echo "=============================================================="
- echo
- echo " ${bigVersion}"
- echo " ${bigVersion}"
- echo " ${bigVersion}"
- echo
- echo "=============================================================="
- echo
- echo "${copyRight}"
- echo
- # If the utility md5 is not present then just quit, it's needed for the checksums.
- [ ! -x /sbin/md5 ] && exit 2
- # Check for command-line options used when calling the script
- if [ $# -gt 0 ] ; then
- while getopts "hmMrRV" Option ; do
- case "${Option}" in
- h ) helpHelpIsAllINeed
- echo "Bye for now."
- exit 7
- ;;
- m ) echo "-m argument used on command line, moving files is disabled."
- echo
- MoveFiles="No"
- ;;
- M ) echo "Moving files. -M argument used on command line."
- echo
- MoveFiles="Yes"
- ;;
- r ) echo "-r argument used on command line, regrouping map files is disabled."
- echo
- RegroupMapFiles="No"
- ;;
- R ) echo "Regrouping map files. -R argument used on command line."
- echo
- RegroupMapFiles="Yes"
- ;;
- V ) echo "Display very detailed messages. -V argument used on command line."
- echo
- veryVerbose="Yes"
- ;;
- * ) echo "Unknown argument among arguments $* on command line."
- helpHelpIsAllINeed
- exit 6
- ;;
- esac
- done
- fi
- # We're done with switches / options from the command line
- shift $(($OPTIND - 1))
- # Main folder can be declared on the command line or interactively
- MainFolder="${1}"
- # If the main folder doesn't exist, prompt once, then just quit if it's still not there.
- if [ ! -d "${MainFolder}" ] ; then
- echo "Folder not specified. Type the path to the top-level folder or drag"
- echo "it into this Terminal window, then click back in this window and press return:"
- read MainFolder
- echo
- fi
- [ ! -d "${MainFolder}" ] \
- && echo "Folder not found. Was ${MainFolder}" \
- && exit 4
- [ "${MainFolder}" = "/" ] \
- && echo "You must be crazy." \
- && exit 5
- echo "Holding down Control-c on the keyboard will stop this script in an emergency."
- echo
- if [ "${MoveFiles}" = "Question" ] ; then
- echo "WARNING: allowing the script to move files could cause problems!"
- echo
- echo "Do you want to move all files and bundles to the top level at the end of the run?"
- read -n1 -p "Press 'n' to NOT move files and keep the folder structure intact; otherwise press any key. "
- if [ "${REPLY}" = "n" -o "${REPLY}" = "N" ] ; then
- MoveFiles="No"
- else
- MoveFiles="Yes"
- fi
- echo
- fi
- if [ "${RegroupMapFiles}" = "Question" ] ; then
- echo "Do you want to disable regrouping of map files into folders based on the .shp filename?"
- read -n1 -p "Press 'n' to NOT regroup map files; otherwise press any key. "
- if [ "${REPLY}" = "n" -o "${REPLY}" = "N" ] ; then
- RegroupMapFiles="No"
- else
- RegroupMapFiles="Yes"
- fi
- echo
- fi
- [ "${veryVerbose}" = "Yes" ] \
- && ulimit -a \
- && df \
- && echo
- [ "${veryVerbose}" = "Yes" ] \
- && echo "MainFolder=${MainFolder};"
- # get physical path
- if [ -d "$MainFolder" ]; then
- pushd $PWD
- cd "$MainFolder"
- MainFolder=$( /bin/pwd -P )
- popd
- else
- # just to be sure.
- echo "You need to supply a directory name for ${MainFolder}"
- exit 8
- fi
- let offsetMainFolder="${#MainFolder}"-1
- # Ensure that the path is in a consistant format. i.e. no //
- # drop trailing directory separator if needed
- [ "${MainFolder:${offsetMainFolder}}" = "/" ] \
- && MainFolder="${MainFolder:0:${offsetMainFolder}}"
- # Place all created files in one directory.
- saveFolder="${MainFolder}/saved--Consolidated--Data"
- # Figure out the trash folder name. Each user has his or her own trash folder.
- # Each volume has a different trash folder. On the startup drive,
- # the trash folders are in the users home folder. On the other volumes, the
- # trash folder contains a folder with the user id for each user.
- #
- # This isn't perfect.
- if [ $( echo -n "${MainFolder:0:7}" | tr '[:upper:]' '[:lower:]' ) \
- = "/users/" ] ; then
- whereIsTrash="u"
- trashFolder="${HOME}/.Trash"
- elif [ $( echo -n "${MainFolder:0:9}" | tr '[:upper:]' '[:lower:]' ) \
- = "/volumes/" ] ; then
- whereIsTrash="v"
- diskName="${MainFolder:9}"
- diskName="${diskName%%/*}"
- trashFolder="/Volumes/${diskName}/.Trashes"
- if [ ! -d ${trashFolder} ] ; then
- # Needs work. Should be ok, since folder is created when formated.
- Echo "Creating trash folder. ${trashFolder};"
- saveUmask=$( umask )
- mkdir ${trashFolder}
- umask ${saveUmask}
- fi
- currentId=$( id -ru )
- trashFolder="${trashFolder}/${currentId}"
- if [ ! -d ${trashFolder} ] ; then
- Echo "Creating user's individual trash folder. ${trashFolder};"
- saveUmask=$( umask )
- umask 077
- mkdir ${trashFolder}
- umask ${saveUmask}
- fi
- else
- echo "Not sure where main folder resides. defulat to ~/.Trash"
- whereIsTrash="u"
- trashFolder="${HOME}/.Trash"
- fi
- [ "${veryVerbose}" = "Yes" ] \
- && echo "MainFolder=${MainFolder};" \
- && echo "saveFolder=${saveFolder};" \
- && echo "trashFolder=${trashFolder};"
- [ ! -d "${saveFolder}" ] \
- && mkdir "${saveFolder}"
- #Housekeeping for Index files.
- # If the index archive file does not exist,
- # create it so that it will be there for sorting and merging later
- [ ! -e "${saveFolder}/Index_Archive.txt" ] \
- && touch "${saveFolder}/Index_Archive.txt"
- # If the index file exists, archive it and be sure to delete the index.
- if [ -e "${saveFolder}/Index.txt" ] ; then
- echo "----------------------------" >> "${saveFolder}/Index_Archive.txt"
- cat "${saveFolder}/Index.txt" >> "${saveFolder}/Index_Archive.txt"
- rm "${saveFolder}/Index.txt"
- fi
- touch "${saveFolder}/Index.txt"
- # In case of a crash, clean up from previous run.
- [ -e "${saveFolder}/names.txt" ] \
- && rm "${saveFolder}/names.txt"
- touch "${saveFolder}/names.txt"
- # Ensure that the trashed list file will exist even if no files were trashed
- [ ! -e "${saveFolder}/Consolidate_Trashed_List_${runDateTime}.txt" ] \
- && touch "${saveFolder}/Consolidate_Trashed_List_${runDateTime}.txt"
- [ "${veryVerbose}" = "Yes" ] \
- && ls -l "${saveFolder}/Index.txt" \
- && ls -l "${saveFolder}/names.txt" \
- && ls -l "${saveFolder}/Consolidate_Trashed_List_${runDateTime}.txt" \
- && ls -ld "${trashFolder}" \
- && ls -ld "${trashFolder}/.." \
- && echo
- echo "--> Deleting .DS_Store and .FBC files from subfolders."
- find -d "${MainFolder}" -type f \( -name ".DS_Store" -o -name ".FBCIndex*" \
- -o -name ".FBCSemaphoreFile" \) -delete
- if [ "${veryVerbose}" = "Yes" ] ; then
- echo "Gathering debug information. May take a moment. Counting files."
- fileCountBefore=$( find ${MainFolder} -type f \
- \! \( -path "${saveFolder}" -o -path "${saveFolder}/*" \) \
- | wc -l )
- trashCountBefore=$( find ${trashFolder} -type f | wc -l )
- fi
- # Set the internal field separator to newline to preserve spaces in file paths
- IFS=$'\n'
- # Set the start time now that the user interaction is done
- Seconds_since_the_epoch_start=$( date +%s )
- ##########
- ### Find items
- echo "--> Finding folders and ignoring any folder whose name contains a period and it's content (potential packages)."
- echo "--> then finding files & generating list of MD5 checksums for file content."
- find "${MainFolder}" -type d \
- \! \( -name "*.*" -o -name "iPhoto Library" -o -name "iTunes" \) \
- \! \( -path "*/*.*/*" -o -path "*/iPhoto Library/*" -o -path "*/iTunes/*" \
- -o -path "${saveFolder}" -o -path "${saveFolder}/*" \) \
- | (
- while read folder
- do
- [ "${veryVerbose}" = "Yes" ] \
- && echo "folder=${folder};"
- # Perform a 'find' within each of the folders for files in that folder
- # (and not in subdirectories) Skip hidden files.
- find "${folder}" -maxdepth 1 -type f \! -name ".*" \
- | (
- while read theFilePath
- do
- # Get an MD5 checksum for each file's combined content of both
- # data and resource forks
- fileCheckSum=$( cat "${theFilePath}" "${theFilePath}/rsrc" | md5 )
- echo " ${fileCheckSum} ${theFilePath}"
- echo "${fileCheckSum}${tab}f${tab}${theFilePath}" \
- >> "${saveFolder}/Index.txt"
- done
- )
- done
- )
- Show_Time
- echo "--> Finding and processing potential packages/bundles."
- echo "--> Note; we look for a period in the base filename which" \
- "may not be acurate in all cases."
- # Find folders within $MainFolder which are possible packages and bundles
- find "${MainFolder}" -type d \
- \( -iname "*.*" -o -iname "iPhoto Library" -o -iname "iTunes" \) \
- \! \( -name ".*" -o -ipath "*/*.*/*" -o -ipath "*/iPhoto Library/*" \
- -o -ipath "*/iTunes/*" \) \
- | (
- while read folder
- do
- fileCheckSum=$( find "${folder}" -type f \! -name ".DS_Store" \
- -exec cat '{}' '{}/rsrc' \; | md5 )
- echo "${fileCheckSum} ${folder}"
- echo "${fileCheckSum}${tab}d${tab}${folder}" >> "${saveFolder}/Index.txt"
- done
- )
- ######### ;;;;;;;;
- echo
- Show_Time
- echo "--> Sorting list of files and potential bundles by checksum then"
- echo " Compare checksums."
- sort -t "${tab}" "${saveFolder}/Index.txt" |
- (
- declare previousCheckSum=""
- declare -i trashCount=0
- # Process both files and application bundles.
- while read theData
- do
- checkSum="${theData:0:32}"
- fileType="${theData:33:1}"
- fileName="${theData:35}"
- [ "${veryVerbose}" = "Yes" ] \
- && echo "${checkSum} ${fileName};"
- if [ "${checkSum}" = "${previousCheckSum}" ] ; then
- # Duplicate file. Move current file to trash.
- [ "${verbose}" = "Yes" ] \
- && echo -n " ="
- # Make up trash name.
- # Base name on path name so we can remember
- # where file came from
- trashFileName="${fileName}"
- case "${whereIsTrash}" in
- u ) trashFileName="${trashFileName:7}"
- trashFileName="${trashFileName#*/}" # chop user id
- ;;
- v) trashFileName="${trashFileName:9}"
- ;;
- * ) echo "<><><><> Serious error. whereIsTrash=${whereIsTrash};"
- esac
- trashFileName=$(echo -n "${trashFileName}" | tr "/" "~" )
- trashFileName="${trashFolder}/${trashFileName}"
- # don't let a directory be copyied into another directory.
- # Happens with applications.
- # Might as well avoid files too.
- [ ! -e "${trashFileName}" ] \
- && mv -n "${fileName}" "${trashFileName}"
- if [ -e "${fileName}" ] ; then
- # Move didn't work.
- myRandom=$RANDOM
- trashFileName="${trashFileName}~$(date '+%Y-%m-%d_%H.%M')~${myRandom}"
- mv -n "${fileName}" "${trashFileName}"
- if [ -e "${fileName}" ] ; then
- # Odd , second move didn't work.
- echo
- echo "big time error "
- echo "<><> fileName=${fileName};" \
- "myRandom=${myRandom}" \
- "trashFileName=${trashFileName};"
- echo
- else
- # Ok, moved file to trash
- echoTrashInfo
- fi
- else
- # Ok, moved file to trash
- echoTrashInfo
- fi
- else
- # Output data for next step.
- # Mac OS X hfs+ defaults to case independent names, so
- # sort on lower case.
- # ie A folder may not contain the names abc & Abc.
- lowerBaseName=$( echo -n "${fileName##*/}" | \
- tr '[:upper:]' '[:lower:]' )
- echo "${lowerBaseName}${tab}${fileName}" \
- >>"${saveFolder}/names.txt"
- fi
- previousCheckSum="${checkSum}"
- done
- outLine="Total trashed files: ${trashFileCount}"
- outLine="${outLine} Total trashed applications/bundles: ${trashDirectoryCount}"
- echo
- echo "${outLine}"
- echo
- {
- echo
- echo "${outLine}"
- echo
- }>> "${saveFolder}/Consolidate_Trashed_List_${runDateTime}.txt"
- )
- Show_Time
- ##########
- ### Compare names
- echo "--> Sorting list of names."
- # Sort (by basename)
- sort -t "${tab}" "${saveFolder}/names.txt" |
- (
- declare previousBaseName="" \
- baseName
- declare -i alterCount
- let alterCount=0
- while read theData
- do
- echo "..${theData}"
- lowerBaseName="${theData%${tab}*}"
- fileName="${theData#*${tab}}"
- baseName="${fileName##*/}"
- [ "${veryVerbose}" = "Yes" ] \
- && echo "${lowerBaseName}; ${baseName}; ${fileName};"
- if [ "${previousBaseName}" = "${lowerBaseName}" ] ; then
- [ "${verbose}" = "Yes" ] \
- && echo -n " ="
- # Rename files with duplicate names by appending #X prior to
- # the filename extension (before the last period.)
- directoryName="${fileName%/*}"
- extension="${baseName##*.}"
- # Was an extension found?
- if [ "${extension}" != "${baseName}" ] ; then
- # extension found, since a period was in the data
- extension=".${extension}"
- name="${baseName%.*}"
- else
- extension=""
- name="${baseName}"
- fi
- while [ "1" = "1" ] ; do # do forever... simulate do until
- let alterCount+=1
- actualAlteration="${alterCount}"
- if [ ${alterCount} -gt 9999 ] ; then
- echo
- echo "================== odd: alterCount too big!!! "
- echo "previousBaseName=${previousBaseName}; baseName=${baseName}; fileName=${fileName}; "
- echo "================== odd: alterCount too big!!! "
- echo
- actualAlteration="${alterCount}${RANDOM}"
- break
- fi
- # Manually implement do until.
- # If file doesn't exist, break. Unused name found.
- [ ! -e "${directoryName}/${name} #${actualAlteration}${extension}" ] \
- && break
- done
- # rename
- newName="${directoryName}/${name} #${actualAlteration}${extension}"
- mv -n "${fileName}" "${newName}"
- if [ -e "${fileName}" ] ; then
- # Odd , rename didn't work.
- echo
- echo "another big time error "
- echo "<><><> fileName=${fileName};" \
- "newName=${newName}"
- echo
- else
- # all went well with rename
- echo " +${newName}"
- fi
- else
- let alterCount=0
- fi
- previousBaseName="${lowerBaseName}"
- done
- )
- Show_Time
- ##########
- ### Moving items to top-level
- if [ "${MoveFiles}" = "Yes" ] ; then
- echo "--> Finding possible packages/bundles and moving them to the top-level."
- find "${MainFolder}" -mindepth 1 -type d \
- \( -iname "*.*" -o -iname "iPhoto Library" -o -iname "iTunes" \) \
- \! \( -name ".*" -o -ipath "*/*.*/*" -o -ipath "*/iPhoto Library/*" \
- -o -ipath "*/iTunes/*" \) \
- -exec mv -n '{}' "${MainFolder}" \;
- echo "--> Finding files and moving them to the top-level."
- folders=( $(find "${MainFolder}" -mindepth 1 -type d \
- \! \( -name "*.*" -o -name "iPhoto Library" -o -name "iTunes" \) \
- \! \( -path "*/*.*/*" -o -path "*/iPhoto Library/*" -o -path "*/iTunes/*" \
- -o -path "${saveFolder}" -o -path "${saveFolder}/*" \) ) )
- # Perform a 'find' within each of the folders for files in that folder
- # (and not in subdirectories)
- for folder in ${folders[*]} ; do
- find "${folder}" -maxdepth 1 -type f \! -name ".*" -exec mv -n '{}' "${MainFolder}" \;
- done
- fi
- echo "--> Deleting empty subfolders. (This only deletes folders which are completely empty.)"
- find -d "${MainFolder}" -type d -empty -delete
- ##########
- ### Regroup map files into their own folders
- if [ "${RegroupMapFiles}" = "Yes" ] ; then
- echo "--> Finding map file groups in top-level and regrouping them to their own folders in the top-level."
- # deal with .frq first, .frq indicates group with differing filename end prior to .shp
- Maps=( $( find "${MainFolder}" -maxdepth 1 -iname "*.frq" ) )
- for Map in ${Maps[*]} ; do
- [ ! -e "${Map}" ] && continue
- [ "${Map}" = "" ] && continue
- fullfilename=$( basename "${Map}" )
- filename="${fullfilename%.*}"
- if [ ! -e "${saveFolder}/${filename}.map" ] ; then
- mkdir "${saveFolder}/${filename}.map"
- find "${MainFolder}" -maxdepth 1 -iname "${filename}*.*" -type f \
- -exec mv -n '{}' "${saveFolder}/${filename}.map" \;
- fi
- done
- Maps=( $( find "${MainFolder}" -maxdepth 1 -iname "*.shp" ) )
- for Map in ${Maps[*]} ; do
- [ ! -e "${Map}" ] && continue
- [ "${Map}" = "" ] && continue
- fullfilename=$( basename "${Map}" )
- filename="${fullfilename%.*}"
- if [ ! -e "${saveFolder}/${filename}.map" ] ; then
- mkdir "${saveFolder}/${filename}.map"
- find "${MainFolder}" -maxdepth 1 -iname "${filename}.*" -type f \
- -exec -n mv '{}' "${saveFolder}/${filename}.map" \;
- fi
- done
- fi
- Show_Time
- echo
- if [ "${veryVerbose}" = "Yes" ] ; then
- echo "Gathering post run debug information. May take a moment." \
- "Counting files."
- fileCountAfter=$( find ${MainFolder} -type f \
- \! \( -path "${saveFolder}" -o -path "${saveFolder}/*" \) \
- | wc -l )
- trashCountAfter=$( find ${trashFolder} -type f | wc -l )
- let trashDiff=trashCountAfter-trashCountBefore
- let calculateFileCountAfter=fileCountAfter+trashDiff
- echo
- echo "Information on files and directories in folder/directory ${MainFolder}"
- echo "fileCountBefore=${fileCountBefore};" \
- "fileCountAfter=${fileCountAfter};"
- echo
- echo "trashDiff=${trashDiff}; " \
- "trashCountBefore=${trashCountBefore};" \
- "trashCountAfter=${trashCountAfter};"
- echo
- echo "==========="
- echo "calculateFileCountAfter=${calculateFileCountAfter};"
- echo
- if [ "${fileCountBefore}" -ne "${calculateFileCountAfter}" ] ; then
- echo "- - - - - - - -"
- echo "- - - - - - - - Error: Some files have gone missing."
- echo "- - - - - - - -" \
- "fileCountBefore should be equal to calculateFileCountAfter"
- echo "- - - - - - - - Where files deleted from the trash?"
- echo "- - - - - - - -"
- fi
- fi
- echo "DONE."
- echo "NOTE: Invisible files and files with unresolved name conflicts may be left within any remaining subfolders!"
- echo
- echo "${savedCommandName} script revised $(GetFileInfo -m $0)"
- echo
- echo "Bye from ${bigVersion}"
- echo
- exit 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement