Click here to Skip to main content
11,417,750 members (46,807 online)
Click here to Skip to main content
Add your own
alternative version

Duplicate songs detector via audio fingerprinting

, 20 Jun 2013 GPL3
Explains sound fingerprinting algorithm, with a practical example of detecting duplicate files on the user's local drive.
DuplicatesDetector.zip
DuplicatesDetector
Bass.Net.dll
encog-core-cs.dll
Microsoft.DirectX.DirectSound.dll
NAudio.dll
Ninject.dll
Ninject.pdb
perms.csv
SoundFingerprinting.dll
SoundFingerprinting.DuplicatesDetector.exe
SoundFingerprinting.DuplicatesDetector.pdb
SoundFingerprinting.pdb
WPFToolkit.dll
x64
bass.dll
basscd.dll
bassenc.dll
bassflac.dll
bassmidi.dll
bassmix.dll
basswma.dll
bass_fx.dll
libfftw3-3.dll
libfftw3f-3.dll
libfftw3l-3.dll
tags.dll
x86
bass.dll
bassenc.dll
bassflac.dll
bassmix.dll
basswma.dll
bass_fx.dll
libfftw3-3.dll
libfftw3f-3.dll
libfftw3l-3.dll
tags.dll
release.zip
bass.dll
Bass.Net.dll
bassflac.dll
bassmix.dll
bass_fx.dll
DuplicateTracks.exe
perms.csv
tags.dll
WPFToolkit.dll
release2.zip
bass.dll
Bass.Net.dll
bassflac.dll
bassmix.dll
bass_fx.dll
DuplicateTracks.exe
perms.csv
tags.dll
WPFToolkit.dll
release3.zip
Release3
bass.dll
Bass.Net.dll
bassflac.dll
bassmix.dll
bass_fx.dll
DuplicateTracks.exe
perms.csv
tags.dll
WPFToolkit.dll
release4.zip
bass.dll
Bass.Net.dll
bassflac.dll
bassmix.dll
bass_fx.dll
DuplicateTracks.exe
encog-core-cs.dll
Microsoft.DirectX.DirectSound.dll
Ninject.dll
perms.csv
SoundfingerprintingLib.dll
tags.dll
WPFToolkit.dll
Release5.zip
Release5
bass.dll
Bass.Net.dll
bassflac.dll
bassmix.dll
bass_fx.dll
encog-core-cs.dll
Microsoft.DirectX.DirectSound.dll
Ninject.dll
perms.csv
Soundfingerprinting.dll
Soundfingerprinting.DuplicatesDetector.exe
tags.dll
WPFToolkit.dll
sources.zip
DuplicateTracks
.svn
all-wcprops
entries
prop-base
props
text-base
DuplicateTracks.sln.svn-base
tmp
prop-base
props
text-base
DuplicateTracks
.svn
all-wcprops
entries
prop-base
Application.ico.svn-base
props
text-base
App.xaml.cs.svn-base
App.xaml.svn-base
Application.ico.svn-base
DuplicateTracks.csproj.svn-base
MainWindow.xaml.cs.svn-base
MainWindow.xaml.svn-base
MainWindowResourceDictionary.xaml.svn-base
RelayCommand.cs.svn-base
tmp
prop-base
props
text-base
Application.ico
Audio
.svn
all-wcprops
entries
prop-base
props
text-base
BassProxy.cs.svn-base
IAudio.cs.svn-base
tmp
prop-base
props
text-base
DataAccess
.svn
all-wcprops
entries
prop-base
props
text-base
Hashes.cs.svn-base
HashType.cs.svn-base
IPermutations.cs.svn-base
IStorage.cs.svn-base
LocalPermutations.cs.svn-base
RamStorage.cs.svn-base
Repository.cs.svn-base
tmp
prop-base
props
text-base
Fingerprinting
.svn
all-wcprops
entries
prop-base
props
text-base
CSVWriter.cs.svn-base
FingerprintManager.cs.svn-base
IncrementalRandomStride.cs.svn-base
IncrementalStaticStride.cs.svn-base
IStride.cs.svn-base
MinHash.cs.svn-base
RandomStride.cs.svn-base
StaticStride.cs.svn-base
tmp
prop-base
props
text-base
MathUtils
.svn
all-wcprops
entries
prop-base
props
text-base
Complex.cs.svn-base
ComplexF.cs.svn-base
Fourier.cs.svn-base
FourierDirection.cs.svn-base
HaarWavelet.cs.svn-base
HanningWindow.cs.svn-base
IWaveletDecomposition.cs.svn-base
IWindowFunction.cs.svn-base
tmp
prop-base
props
text-base
Images
.svn
all-wcprops
entries
prop-base
icon.png.svn-base
props
text-base
icon.png.svn-base
tmp
prop-base
props
text-base
icon.png
Model
.svn
all-wcprops
entries
prop-base
props
text-base
HashSignature.cs.svn-base
Item.cs.svn-base
ResultItem.cs.svn-base
Track.cs.svn-base
tmp
prop-base
props
text-base
Properties
.svn
all-wcprops
entries
prop-base
props
text-base
AssemblyInfo.cs.svn-base
tmp
prop-base
props
text-base
Services
.svn
all-wcprops
entries
prop-base
props
text-base
FolderBrowserDialogService.cs.svn-base
IFolderBrowserDialogService.cs.svn-base
IGenericViewWindow.cs.svn-base
IMessageBoxService.cs.svn-base
IOpenFileDialogService.cs.svn-base
ISaveFileDialogService.cs.svn-base
IWindowService.cs.svn-base
MessageBoxService.cs.svn-base
OpenFileDialogService.cs.svn-base
SaveFileDialogService.cs.svn-base
ServiceContainer.cs.svn-base
ServiceInjector.cs.svn-base
WindowService.cs.svn-base
tmp
prop-base
props
text-base
Themes
.svn
all-wcprops
entries
prop-base
props
text-base
Brushes.xaml.svn-base
Converters.xaml.svn-base
Datagrid.xaml.svn-base
ProgressBar.xaml.svn-base
RoundedButton.xaml.svn-base
TextBlock.xaml.svn-base
tmp
prop-base
props
text-base
View
ViewModel
.svn
all-wcprops
entries
prop-base
props
text-base
BooleanToVisibilityConverter.cs.svn-base
GenericViewModel.cs.svn-base
Helper.cs.svn-base
MainWindowViewModel.cs.svn-base
PathListViewModel.cs.svn-base
ReportViewModel.cs.svn-base
RepositoryGateway.cs.svn-base
ViewModelBase.cs.svn-base
tmp
prop-base
props
text-base
.svn
all-wcprops
entries
prop-base
props
text-base
GenericView.xaml.cs.svn-base
GenericView.xaml.svn-base
PathListView.xaml.cs.svn-base
PathListView.xaml.svn-base
ReportView.xaml.cs.svn-base
ReportView.xaml.svn-base
tmp
prop-base
props
text-base
NativeLibs
.svn
all-wcprops
entries
prop-base
bass.dll.svn-base
bassflac.dll.svn-base
bassmix.dll.svn-base
bass_fx.dll.svn-base
tags.dll.svn-base
props
text-base
bass.dll.svn-base
bassflac.dll.svn-base
bassmix.dll.svn-base
bass_fx.dll.svn-base
tags.dll.svn-base
tmp
prop-base
props
text-base
bass.dll
bassflac.dll
bassmix.dll
bass_fx.dll
tags.dll
Permutations
.svn
all-wcprops
entries
prop-base
props
text-base
perms.csv.svn-base
tmp
prop-base
props
text-base
perms.csv
sources2.zip
Application.ico
icon.png
bass.dll
bassflac.dll
bassmix.dll
bass_fx.dll
tags.dll
perms.csv
sources3.zip
Application.ico
icon.png
bass.dll
bassflac.dll
bassmix.dll
bass_fx.dll
tags.dll
perms.csv
sources4.zip
Externals
bass.dll
Bass.Net.dll
bassflac.dll
bassmix.dll
bass_fx.dll
encog-core-cs.dll
Microsoft.DirectX.DirectSound.dll
Ninject.dll
perms.csv
tags.dll
Sources
DuplicateTracks
DuplicateTracks
Application.ico
DataAccess
Images
icon.png
Infrastructure
Model
Properties
Services
Themes
View
ViewModel
SoundfingerprintingLib
SoundfingerprintingLib
AudioProxies
Strides
DbStorage
Entities
Utils
Fingerprinting
ConstantQ
FFT
Wavelets
Windows
Hashing
NeuralHashing
ActivationFunctions
Ensemble
Learning
MMI
NeuralTrainer
Utils
Properties
SoundTools
BassResampler
DbFiller
DrawningTool
FFMpegResampler
FilePermutations
Misc
NetworkEnsembling
NetworkTrainer
PermutationGenerator
Properties
Settings.settings
QueryDb
Resources
2-Music.ico
WaveletDecomposition
sources5.zip
src
Externals
bass.dll
Bass.Net.dll
bassflac.dll
bassmix.dll
bass_fx.dll
encog-core-cs.dll
Microsoft.DirectX.DirectSound.dll
Ninject.dll
tags.dll
WPFToolkit.dll
Scripts
Soundfingerprinting
Soundfingerprinting.DuplicatesDetector
Application.ico
DataAccess
Images
icon.png
Infrastructure
Model
Permutations
perms.csv
Properties
Services
Themes
View
ViewModel
Soundfingerprinting.SoundTools
BassResampler
DbFiller
DrawningTool
FFMpegResampler
FilePermutations
Misc
NetworkEnsembling
NetworkTrainer
PermutationGenerator
Properties
Settings.settings
QueryDb
Resources
2-Music.ico
WaveletDecomposition
AudioProxies
Strides
DbStorage
Entities
Utils
Fingerprinting
ConstantQ
FFT
Wavelets
Windows
Hashing
NeuralHashing
ActivationFunctions
Ensemble
Learning
MMI
NeuralTrainer
Utils
Properties
// Sound Fingerprinting framework
// https://code.google.com/p/soundfingerprinting/
// Code license: GNU General Public License v2
// ciumac.sergiu@gmail.com

using System;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Windows.Forms;
using System.Xml.Serialization;
using SoundfingerprintingLib.AudioProxies;
using SoundfingerprintingLib.AudioProxies.Strides;
using SoundfingerprintingLib.Fingerprinting;
using SoundfingerprintingLib.Hashing;
using SoundTools.Properties;

namespace SoundTools.Misc
{
    /// <summary>
    /// Miscellaneous empirical tests
    /// </summary>
    public partial class WinMisc : Form
    {
        /// <summary>
        /// Constructor
        /// </summary>
        public WinMisc()
        {
            InitializeComponent();
            Icon = Resources.Sound;
        }

        /// <summary>
        /// Path to *.mp3 file was selected
        /// </summary>
        private void TbPathToFileMouseDoubleClick(object sender, MouseEventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog {Filter = Resources.MusicFilter, FileName = "music.mp3"};
            if (ofd.ShowDialog() == DialogResult.OK)
            {
                _tbPathToFile.Text = ofd.FileName;
            }
        }


        /// <summary>
        /// Path to output file was chosen
        /// </summary>
        private void TbOutputPathMouseDoubleClick(object sender, MouseEventArgs e)
        {
            SaveFileDialog ofd = new SaveFileDialog { Filter = Resources.ExportFilter, FileName = "results.txt" };
            if (ofd.ShowDialog() == DialogResult.OK)
            {
                 _tbOutputPath.Text = ofd.FileName;
            }
        }

        /// <summary>
        /// Dump information into file
        /// </summary>
        private void BtnDumpInfoClick(object sender, EventArgs e)
        {
            if (String.IsNullOrEmpty(_tbPathToFile.Text))
            {
                MessageBox.Show(Resources.ErrorNoFileToAnalyze, Resources.SelectFile, MessageBoxButtons.OK, MessageBoxIcon.Information);
                return;
            }
            if (String.IsNullOrEmpty(_tbOutputPath.Text))
            {
                MessageBox.Show(Resources.SelectPathToDump, Resources.SelectFile, MessageBoxButtons.OK, MessageBoxIcon.Information);
                return;
            }
            if (!File.Exists(Path.GetFullPath(_tbPathToFile.Text)))
            {
                MessageBox.Show(Resources.NoSuchFile, Resources.NoSuchFile, MessageBoxButtons.OK, MessageBoxIcon.Information);
                return;
            }
            if(_chbCompare.Checked)
            {
                if (String.IsNullOrEmpty(_tbSongToCompare.Text))
                {
                    MessageBox.Show(Resources.ErrorNoFileToAnalyze, Resources.SelectFile, MessageBoxButtons.OK, MessageBoxIcon.Information);
                    return;
                }
            }
            Action action =
                () =>
                {
                    using (BassProxy proxy = new BassProxy())
                    {
                        FadeControls(false);
                        int minFreq = (int) _nudFreq.Value;
                        int topWavelets = (int) _nudTopWavelets.Value;
                        int stride = (int) _nudStride.Value;
                        IStride objStride = (_chbStride.Checked) ? (IStride) new RandomStride(0, stride) : new StaticStride(stride);
                        FingerprintManager manager = new FingerprintManager() {MinFrequency = minFreq, TopWavelets = topWavelets};
                        DumpResults resultObj = new DumpResults();
                        string pathToInput = _tbPathToFile.Text;
                        string pathToOutput = _tbOutputPath.Text;
                        int hashTables = (int)_nudTables.Value;
                        int hashKeys = (int)_nudKeys.Value;
                        stride = (int)_nudQueryStride.Value;
                        int numFingerprints = (int)_nudNumberOfSubsequent.Value;
                        IStride queryStride = (_chbQueryStride.Checked) ? (IStride)new RandomStride(0, stride) : new StaticStride(stride);
                        queryStride = new StaticStride(5115, 5115 / 2); 
                        GetFingerprintSimilarity(manager, objStride, queryStride, numFingerprints, proxy, pathToInput, resultObj);
                        GetHashSimilarity(manager, objStride, queryStride, numFingerprints, hashTables, hashKeys, proxy, pathToInput, resultObj);

                        if (_chbCompare.Checked)
                        {
                            string pathToDifferent = _tbSongToCompare.Text;
                            GetFingerprintSimilarity(manager, objStride, proxy, pathToInput, pathToDifferent, resultObj);
                        }
                        resultObj.Info.MinFrequency = minFreq;
                        resultObj.Info.TopWavelets = topWavelets;
                        resultObj.Info.StrideSize = stride;
                        resultObj.Info.RandomStride = _chbStride.Checked;
                        resultObj.Info.Filename = pathToInput;
                        resultObj.ComparisonDone = _chbCompare.Checked;

                        XmlSerializer serializer = new XmlSerializer(typeof (DumpResults));
                        TextWriter writer = new StreamWriter(pathToOutput);
                        serializer.Serialize(writer, resultObj);
                        writer.Close();
                    }
                };
            action.BeginInvoke(
                (result) =>
                {
                    action.EndInvoke(result);
                    FadeControls(true);
                }, null);
            
        }

        /// <summary>
        /// Get fingerprint similarity between 2 different songs.
        /// </summary>
        /// <param name="manager">Fingerprint manager used in file decomposition</param>
        /// <param name="stride">Stride object parameter</param>
        /// <param name="proxy">Proxy to the audio object</param>
        /// <param name="path">Path to first file</param>
        /// <param name="differentPath">Path to different file</param>
        /// <param name="results">Results object to be filled with the corresponding data</param>
        private static void GetFingerprintSimilarity(FingerprintManager manager, IStride stride, IAudio proxy, string path, string differentPath, DumpResults results)
        {
            int startindex = 0;
            int count = 0;
            double sum = 0;

            List<bool[]> imglista = manager.CreateFingerprints (proxy, path, stride);
            List<bool[]> imglistb = manager.CreateFingerprints (proxy, differentPath, stride);


            count = imglista.Count > imglistb.Count ? imglistb.Count : imglista.Count;
            double max = double.MinValue;
            for (int i = 0; i < count; i++)
            {
                int j = i;
                double value = MinHash.CalculateSimilarity(imglista[i], imglistb[j]);
                if (value > max)
                    max = value;    
                sum += value;
            }

            results.SumJaqFingerprintSimilarityBetweenDiffertSongs = sum;
            results.AverageJaqFingerprintsSimilarityBetweenDifferentSongs = sum/count;
            results.MaxJaqFingerprintsSimilarityBetweenDifferentSongs = max;
        }

        /// <summary>
        /// Get fingerprint similarity of one song
        /// </summary>
        /// <param name="manager">Fingerprint manager used in file decomposition</param>
        /// <param name="dbstride">Database creation stride</param>
        /// <param name="queryStride">Query stride</param>
        /// <param name="numberOfItemsToCompare">Number of subsequent elements to compare with</param>
        /// <param name="proxy">Proxy</param>
        /// <param name="path">Path to first file</param>
        /// <param name="results">Results object to be filled with the corresponding data</param>
        private static void GetFingerprintSimilarity(FingerprintManager manager, IStride dbstride, IStride queryStride, int numberOfItemsToCompare, IAudio proxy, string path, DumpResults results)
        {
            int startindex = 0;
            int count = 0;
            double sum = 0;

            List<bool[]> list = manager.CreateFingerprints(proxy, path, dbstride);
            List<bool[]> listToCompare = manager.CreateFingerprints(proxy, path, queryStride);

            count = list.Count;
            int toCompare = listToCompare.Count;

            double max = double.MinValue;

            for (int i = 0; i < count; i++)
            {
                for (int j = 0; j < toCompare; j++)
                {
                    double value = MinHash.CalculateSimilarity(list[i], listToCompare[j]);
                    if (value > max)
                        max = value;
                    sum += value;
                }
            }

            results.Results.SumJaqFingerprintsSimilarity = sum;
            results.Results.AverageJaqFingerprintSimilarity = sum / (count * toCompare);
            results.Results.MaxJaqFingerprintSimilarity = max;
        }

        /// <summary>
        /// Get hash similarity of one song
        /// </summary>
        /// <param name="manager">Fingerprint manager</param>
        /// <param name="dbstride">Database stride between fingerprints</param>
        /// <param name="queryStride">Query stride between fingerprints</param>
        /// <param name="numberOfFingerprintsToAnalyze">Number of fingerprints to analyze</param>
        /// <param name="hashTables">Number of hash tables in the LSH transformation</param>
        /// <param name="hashKeys">Number of hash keys per table in the LSH transformation</param>
        /// <param name="proxy">Audio proxy</param>
        /// <param name="path">Path to analyzed file</param>
        /// <param name="results">Results object to be filled with the appropriate data</param>
        private static void GetHashSimilarity(FingerprintManager manager, IStride dbstride, IStride queryStride, int numberOfFingerprintsToAnalyze, int hashTables, int hashKeys, IAudio proxy, string path, DumpResults results)
        {
            double sum = 0;
            int hashesCount = 0;
            int startindex = 0;

            List<bool[]> listDb = manager.CreateFingerprints (proxy, path, dbstride);
            List<bool[]> listQuery = manager.CreateFingerprints (proxy, path, queryStride);
            IPermutations perms = new DbPermutations(ConfigurationManager.ConnectionStrings["FingerprintConnectionString"].ConnectionString);
            MinHash minHash = new MinHash(perms);
            List<int[]> minHashDb = listDb.Select(minHash.ComputeMinHashSignature).ToList();
            List<int[]> minHashQuery = listQuery.Select(minHash.ComputeMinHashSignature).ToList();

            /*Calculate Min Hash signature similarity by comparing 2 consecutive signatures*/
            int countDb = minHashDb.Count;
            int countQuery = minHashQuery.Count;
            int minHashSignatureLen = minHashDb[0].Length;
            int similarMinHashValues = 0;
            for (int i = 0; i < countDb; i++)
            {
                for (int j = 0; j < countQuery; j++)
                {
                    for (int k = 0; k < minHashSignatureLen; k++)
                        if (minHashDb[i][k] == minHashQuery[j][k])
                            similarMinHashValues++;
                }
            }
            results.Results.SumIdenticalMinHash = similarMinHashValues;
            results.Results.AverageIdenticalMinHash = (double)similarMinHashValues / (countDb * countQuery * minHashSignatureLen);

            /*Group min hash signatures into LSH Buckets*/
            List<Dictionary<int, long>> lshBucketsDb =
                minHashDb.Select(item => minHash.GroupMinHashToLSHBuckets(item, hashTables, hashKeys)).ToList();

            List<Dictionary<int, long>> lshBucketsQuery =
                minHashQuery.Select(item => minHash.GroupMinHashToLSHBuckets(item, hashTables, hashKeys)).ToList();

            int countSignatures = lshBucketsDb.Count;
            sum = 0;
            foreach(Dictionary<int, long> a in lshBucketsDb)
            {
                var aValues = a.Values;
                foreach (Dictionary<int, long> b in lshBucketsQuery)
                {
                    var bValues = b.Values;
                    hashesCount += aValues.Intersect(bValues).Count();
                 }
            }

            results.Results.SumJaqLSHBucketSimilarity = -1;
            results.Results.AverageJaqLSHBucketSimilarity = -1;
            results.Results.TotalIdenticalLSHBuckets = hashesCount;
        }

        /// <summary>
        /// Fade all controls
        /// </summary>
        /// <param name="isVisible">Set the parameters as visible/invisible</param>
        private void FadeControls(bool isVisible)
        {
            Invoke(new Action(
                () =>
                {
                    _tbOutputPath.Enabled = isVisible;
                    _tbPathToFile.Enabled = isVisible;
                    _nudFreq.Enabled = isVisible;
                    _nudTopWavelets.Enabled = isVisible;
                    _btnDumpInfo.Enabled = isVisible;
                    _nudStride.Enabled = isVisible;
                    _chbStride.Enabled = isVisible;
                }));
        }

        /// <summary>
        /// Check box checked
        /// </summary>
        private void ChbCompareCheckedChanged(object sender, EventArgs e)
        {
            _tbSongToCompare.Enabled = !_tbSongToCompare.Enabled;
        }

        /// <summary>
        /// Song to compare select
        /// </summary>
        private void TbSongToCompareMouseDoubleClick(object sender, MouseEventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog { Filter = Resources.MusicFilter, FileName = "music.mp3" };
            if (ofd.ShowDialog() == DialogResult.OK)
            {
                _tbSongToCompare.Text = ofd.FileName;
            }
        }

        /// <summary>
        /// On window form loading event
        /// </summary>
        private void WinMiscLoad(object sender, EventArgs e)
        {
            FingerprintManager manager = new FingerprintManager();
            _nudFreq.Value = manager.MinFrequency;
            _nudTopWavelets.Value = manager.TopWavelets;
        }

        
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)

Share

About the Author

Ciumac Sergiu
Software Developer
Moldova (Republic Of) Moldova (Republic Of)
Interested in computer science, math, research, and everything that relates to innovation. Fan of agnostic programming, don't mind developing under any platform/framework if it explores interesting topics. In search of a better programming paradigm.
Follow on   Twitter   LinkedIn

| Advertise | Privacy | Terms of Use | Mobile
Web04 | 2.8.150427.4 | Last Updated 20 Jun 2013
Article Copyright 2011 by Ciumac Sergiu
Everything else Copyright © CodeProject, 1999-2015
Layout: fixed | fluid