- 'DeDup v1.0.1
- '
- '
- '
- '
- 'Convert duplicate files into hardlinks or symlinks to save disk space.
- 'Requires Windows Vista or newer version.
- '
- 'Usage: dedup [options] {path} [options]
- '
- 'Options:
- '/c Continue to next file even if errors occur.
- '/m Use symlinks instead of hardlinks.
- '/s Process subdirectories.
- '/t Test/simulation mode. Does not actually change anything.
- '
- 'Notes:
- '- This script can not detect hardlinks. They will be seen as separate files.
- ' So, this script will keep converting files if processing the same directory
- ' for multiple times.
- '- Hardlink file dates will always be the same as the link target file's.
- ' Symlink file dates will always be set to the current time.
- '- Once hardlinks are moved into different drve, the links will break and
- ' become separate copies. Once symlinks or their target file are moved into
- ' different directory, the links will no longer point to existing file.
- sub help
- set f = fs.opentextfile(wscript.scriptfullname)
- do while true
- s = f.readline
- if s = "" then wscript.quit 1
- wscript.stdout.writeline mid(s, 2)
- loop
- wscript.quit 1
- end sub
- 'treat number as 64-bit integer and convert it to binary string
- function qwordAsStr(n)
- dim r, i
- r = ""
- for i = 0 to 7
- r = r & chr(n - int(n / 256) * 256)
- n = int(n / 256)
- next
- qwordasstr = r
- end function
- 'returns binary string of: {8 chars file size},{20 chars sha1}
- function calcHash(path)
- dim h
- ds.loadfromfile path
- ds.position = 0
- hs.position = 0
- hs.seteos
- hs.type = 1 'binary
- hs.write cr.computehash_2(
- hs.position = 0
- hs.type = 2 'text
- hs.charset = "x-user-defined"
- calchash = qwordasstr(ds.size) & hs.readtext
- end function
- 'calculate hash on specified directory
- sub processDir(path)
- dim l, f
- set l = fs.getfolder(path)
- for each f in l.files
- if (f.attributes and 1024) = 0 then
- redim preserve hashfiles(ubound(hashfiles) + 1)
- hashfiles(ubound(hashfiles)) = calchash(f.path) & f.path
- end if
- wscript.stdout.write "."
- next
- if not recurs then exit sub
- for each f in l.subfolders
- processdir f.path
- next
- end sub
- 'compare binary string
- function binComp(s1, s2)
- dim i, a, b
- for i = 1 to 28
- a = asc(mid(s1, i, 1))
- b = asc(mid(s2, i, 1))
- if a < b then
- bincomp = -1
- exit function
- elseif a > b then
- bincomp = 1
- exit function
- end if
- next
- bincomp = 0
- end function
- 'array quicksort. modified for binary string array.
- 'original author: Christopher J. Scharer
- sub array_quicksort(byref rarr_arraytosort(), byval rlng_low, _
- byval rlng_high)
- dim var_pivot, lng_swap, lng_low, lng_high
- lng_low = rlng_low
- lng_high = rlng_high
- var_pivot = rarr_arraytosort((rlng_low + rlng_high) / 2)
- do while lng_low <= lng_high
- do while bincomp(rarr_arraytosort(lng_low), var_pivot) < 0 and _
- lng_low < rlng_high
- lng_low = lng_low + 1
- loop
- do while bincomp(var_pivot, rarr_arraytosort(lng_high)) < 0 and _
- lng_high > rlng_low
- lng_high = lng_high - 1
- loop
- if lng_low <= lng_high then
- lng_swap = rarr_arraytosort(lng_low)
- rarr_arraytosort(lng_low) = rarr_arraytosort(lng_high)
- rarr_arraytosort(lng_high) = lng_swap
- lng_low = lng_low + 1
- lng_high = lng_high - 1
- end if
- loop
- if rlng_low < lng_high then
- array_quicksort rarr_arraytosort, rlng_low, lng_high
- end if
- if lng_low < rlng_high then
- array_quicksort rarr_arraytosort, lng_low, rlng_high
- end if
- end sub
- 'format number with thousand separator
- function comma(byval n)
- dim r, i
- n = cstr(int(n))
- i = len(n) - 2
- r = ""
- do while i > 1
- r = "," & mid(n, i, 3) & r
- i = i - 3
- loop
- comma = left(n, i + 2) & r
- end function
- function strSize(n, byval sign)
- if sign and (n > 0) then
- sign = "+"
- else
- sign = ""
- end if
- if n >= 1073741824 then
- strsize = "(" & sign & comma(n / 1073741824) & " GB) "
- elseif n >= 1048576 then
- strsize = "(" & sign & comma(n / 1048576) & " MB) "
- elseif n >= 1024 then
- strsize = "(" & sign & comma(n / 1024) & " KB) "
- else
- strsize = ""
- end if
- end function
- 'process command line parameters
- set fs = createobject("scripting.filesystemobject")
- path = ""
- igerr = false
- test = false
- slink = false
- recurs = false
- for each s in wscript.arguments
- if left(s, 1) = "/" then
- select case ucase(s)
- case "/C" igerr = true
- case "/M" slink = true
- case "/S" recurs = true
- case "/T" test = true
- case else help
- end select
- elseif path = "" then
- path = s
- else
- help
- end if
- next
- if path = "" then help
- df = fs.getfolder(path).drive.freespace
- set ds = createobject("")
- ds.type = 1 'binary
- set hs = createobject("")
- on error resume next
- set cr = createobject("")
- if err.number <> 0 then
- wscript.stdout.writeline _
- "This script requires .NET Framework of any version."
- wscript.quit 2
- end if
- on error goto 0
- wscript.stdout.write "Gathering file information"
- redim hashfiles(-1) 'hash+path
- processdir path
- wscript.stdout.writeline
- wscript.stdout.writeline "Sorting file information..."
- array_quicksort hashfiles, 0, ubound(hashfiles)
- set ws = createobject("")
- uniqcount = 0
- dupecount = 0
- okcount = 0
- errcount = 0
- freed = 0
- redim dups(-1) '[[{8 chars file size},{20 chars sha1}], ...]
- prevhash = ""
- prevfile = ""
- for each s in hashfiles
- h = left(s, 28)
- f = mid(s, 29)
- if h <> prevhash then
- if ubound(dups) >= 0 then
- uniqcount = uniqcount + 1
- dupecount = dupecount + ubound(dups)
- freed = freed + ubound(dups) * fs.getfile(dups(0)(1)).size
- wscript.stdout.writeline vbcrlf & "Uniq: " & dups(0)(1)
- on error resume next
- for i = 1 to ubound(dups)
- wscript.stdout.writeline "Link: " & dups(i)(1)
- if not test then
- err.clear
- set sf = fs.getfile(dups(i)(1))
- sn =
- = sn & ".todelete"
- if err.number = 0 then
- if slink then
- s = ""
- else
- s = "/h "
- end if
- set xc = ws.exec("cmd.exe /c mklink " & s & """" & dups(i)(1) & _
- """ """ & dups(0)(1) & """")
- if err.number = 0 then
- do while xc.status = 0
- wscript.sleep 50
- loop
- if xc.exitcode = 0 then
- okcount = okcount + 1
- fs.deletefile dups(i)(1) & ".todelete"
- else
- do while not xc.stdout.atendofstream
- wscript.stdout.writeline xc.stdout.readline
- loop
- = sn
- errcount = errcount + 1
- if not igerr then wscript.quit
- end if
- else
- wscript.stdout.writeline err.description
- = sn
- errcount = errcount + 1
- if not igerr then wscript.quit
- end if
- else
- wscript.stdout.writeline err.description
- errcount = errcount + 1
- if not igerr then wscript.quit
- end if
- end if
- next
- on error goto 0
- redim dups(-1)
- end if
- elseif ubound(dups) >= 0 then
- redim preserve dups(ubound(dups) + 1)
- dups(ubound(dups)) = array(h, f)
- else
- redim preserve dups(ubound(dups) + 2)
- dups(ubound(dups) - 1) = array(prevhash, prevfile)
- dups(ubound(dups)) = array(h, f)
- end if
- prevhash = h
- prevfile = f
- next
- df = fs.getfolder(path).drive.freespace - df
- if df > 0 then
- df = "+" & comma(df) & " Bytes " & strsize(df, true)
- else
- df = comma(df) & " Bytes " & strsize(df, false)
- end if
- wscript.stdout.writeline vbcrlf & _
- "Found " & comma(dupecount) & " duplicates of " & comma(uniqcount) & _
- " unique files." & _
- vbcrlf & _
- comma(okcount) & " duplicates has been successfully linked. " & _
- comma(errcount) & " have failed." & _
- vbcrlf & _
- comma(freed) & " Bytes " & strsize(freed, false) & _
- "of disk space is supposedly be freed." & _
- vbcrlf & _
- "Actual disk free space difference: " & df
