Skip to main content

C# console program to remove duplicate files from a given directory hierarchy.

namespace DeleteDupeFiles
{
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Security.Cryptography;

    /// <summary>
    /// Removes duplicate files from a given directory hierarchy.
    /// </summary>
    class Program
    {
        static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine("Arguments: <directory>");
                return;
            }

            var root = args[0];
            var allFiles = Directory.GetFiles(root, "*", SearchOption.AllDirectories).OrderBy(f => f).ToList();
            var filesToDelete = new List<string>();

            //using (var hasher = SHA1.Create())
            //using (var hasher = SHA256.Create())
            using (var hasher = MD5.Create()) // MD5 == ~20x faster than SHA256
            {
                var groups = allFiles.Select(f => new { File = f, Hash = BitConverter.ToString(hasher.ComputeHash(File.ReadAllBytes(f))) })
                    .GroupBy(p => new { RelativeFile = Path.GetFileName(p.File), p.Hash }, p => p.File)
                    .Where(g => g.Count() > 1)
                    .ToList();

                foreach (var group in groups)
                {
                    // Skip the "earliest" file - fortunately versions
                    // always come before "developer" or "unstable".
                    filesToDelete.AddRange(group.Skip(1));
                }
            }

            if (filesToDelete.Count == 0)
            {
                Console.WriteLine("No duplicates found");
                return;
            }

            Console.WriteLine("Files to delete:");
            foreach (var file in filesToDelete)
            {
                Console.WriteLine(file.StartsWith(
                    root, StringComparison.OrdinalIgnoreCase)
                        ? file.Substring(root.Length)
                        : file);
            }

            Console.WriteLine();
            Console.Write("Delete files? ");
            string response = Console.ReadLine();
            if (response == "y")
            {
                filesToDelete.ForEach(action: f =>
                {
                    Console.WriteLine("Deleting: '{0}'", f);
                    File.Delete(f);
                });
            }
        }
    }
}