Click here to Skip to main content
15,894,907 members
Articles / Desktop Programming / WPF

Duplicate songs detector via audio fingerprinting

Rate me:
Please Sign up or sign in to vote.
4.96/5 (337 votes)
23 Jun 2020MIT44 min read 1.3M   20.4K   533  
Explains sound fingerprinting algorithm, with a practical example of detecting duplicate files on the user's local drive.
The aim of this article is to show an efficient algorithm of signal processing which will allow one to have a competent system of sound fingerprinting and signal recognition. I'll try to come with some explanations of the article's algorithm, and also speak about how it can be implemented using the C# programming language. Additionally, I'll try to cover topics of digital signal processing that are used in the algorithm, thus you'll be able to get a clearer image of the entire system. And as a proof of concept, I'll show you how to develop a simple WPF MVVM application.
// Sound Fingerprinting framework
// https://code.google.com/p/soundfingerprinting/
// Code license: GNU General Public License v2
// ciumac.sergiu@gmail.com

// <copyright file="Histogram.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
// http://mathnetnumerics.codeplex.com
//
// Copyright (c) 2009-2010 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>

using System;
using System.Collections.Generic;
using System.Linq;

namespace SoundfingerprintingLib.Hashing
{
    /// <summary>
    ///   Extension methods to return basic statistics on set of data.
    /// </summary>
    public static class Statistics
    {
        /// <summary>
        ///   Calculates the sample mean.
        /// </summary>
        /// <param name = "data">The data to calculate the mean of.</param>
        /// <returns>The mean of the sample.</returns>
        public static double Mean(this IEnumerable<double> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double mean = 0;
            ulong m = 0;
            foreach (double item in data)
            {
                mean += (item - mean)/++m;
            }

            return mean;
        }

        /// <summary>
        ///   Calculates the sample mean.
        /// </summary>
        /// <param name = "data">The data to calculate the mean of.</param>
        /// <returns>The mean of the sample.</returns>
        public static double Mean(this IEnumerable<double?> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double mean = 0;
            ulong m = 0;
            foreach (double? item in data)
            {
                if (item.HasValue)
                {
                    mean += (item.Value - mean)/++m;
                }
            }

            return mean;
        }

        /// <summary>
        ///   Calculates the unbiased population variance estimator (on a dataset of size N will use an N-1 normalizer).
        /// </summary>
        /// <param name = "data">The data to calculate the variance of.</param>
        /// <returns>The unbiased population variance of the sample.</returns>
        public static double Variance(this IEnumerable<double> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double variance = 0;
            double t = 0;
            ulong j = 0;

            IEnumerator<double> iterator = data.GetEnumerator();
            if (iterator.MoveNext())
            {
                j++;
                t = iterator.Current;
            }

            while (iterator.MoveNext())
            {
                j++;
                double xi = iterator.Current;
                t += xi;
                double diff = (j*xi) - t;
                variance += (diff*diff)/(j*(j - 1));
            }

            return variance/(j - 1);
        }

        /// <summary>
        ///   Computes the unbiased population variance estimator (on a dataset of size N will use an N-1 normalizer) for nullable data.
        /// </summary>
        /// <param name = "data">The data to calculate the variance of.</param>
        /// <returns>The population variance of the sample.</returns>
        public static double Variance(this IEnumerable<double?> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double variance = 0;
            double t = 0;
            ulong j = 0;

            IEnumerator<double?> iterator = data.GetEnumerator();

            while (true)
            {
                bool hasNext = iterator.MoveNext();
                if (!hasNext)
                {
                    break;
                }

                if (iterator.Current.HasValue)
                {
                    j++;
                    t = iterator.Current.Value;
                    break;
                }
            }

            while (iterator.MoveNext())
            {
                if (iterator.Current.HasValue)
                {
                    j++;
                    double xi = iterator.Current.Value;
                    t += xi;
                    double diff = (j*xi) - t;
                    variance += (diff*diff)/(j*(j - 1));
                }
            }

            return variance/(j - 1);
        }

        /// <summary>
        ///   Calculates the biased population variance estimator (on a dataset of size N will use an N normalizer).
        /// </summary>
        /// <param name = "data">The data to calculate the variance of.</param>
        /// <returns>The biased population variance of the sample.</returns>
        public static double PopulationVariance(this IEnumerable<double> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double variance = 0;
            double t = 0;
            ulong j = 0;

            IEnumerator<double> iterator = data.GetEnumerator();
            if (iterator.MoveNext())
            {
                j++;
                t = iterator.Current;
            }

            while (iterator.MoveNext())
            {
                j++;
                double xi = iterator.Current;
                t += xi;
                double diff = (j*xi) - t;
                variance += (diff*diff)/(j*(j - 1));
            }

            return variance/j;
        }

        /// <summary>
        ///   Computes the biased population variance estimator (on a dataset of size N will use an N normalizer) for nullable data.
        /// </summary>
        /// <param name = "data">The data to calculate the variance of.</param>
        /// <returns>The population variance of the sample.</returns>
        public static double PopulationVariance(this IEnumerable<double?> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double variance = 0;
            double t = 0;
            ulong j = 0;

            IEnumerator<double?> iterator = data.GetEnumerator();

            while (true)
            {
                bool hasNext = iterator.MoveNext();
                if (!hasNext)
                {
                    break;
                }

                if (iterator.Current.HasValue)
                {
                    j++;
                    t = iterator.Current.Value;
                    break;
                }
            }

            while (iterator.MoveNext())
            {
                if (iterator.Current.HasValue)
                {
                    j++;
                    double xi = iterator.Current.Value;
                    t += xi;
                    double diff = (j*xi) - t;
                    variance += (diff*diff)/(j*(j - 1));
                }
            }

            return variance/j;
        }

        /// <summary>
        ///   Calculates the unbiased sample standard deviation (on a dataset of size N will use an N-1 normalizer).
        /// </summary>
        /// <param name = "data">The data to calculate the standard deviation of.</param>
        /// <returns>The standard deviation of the sample.</returns>
        public static double StandardDeviation(this IEnumerable<double> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            return Math.Sqrt(Variance(data));
        }

        /// <summary>
        ///   Calculates the unbiased sample standard deviation (on a dataset of size N will use an N-1 normalizer).
        /// </summary>
        /// <param name = "data">The data to calculate the standard deviation of.</param>
        /// <returns>The standard deviation of the sample.</returns>
        public static double StandardDeviation(this IEnumerable<double?> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            return Math.Sqrt(Variance(data));
        }

        /// <summary>
        ///   Calculates the biased sample standard deviation (on a dataset of size N will use an N normalizer).
        /// </summary>
        /// <param name = "data">The data to calculate the standard deviation of.</param>
        /// <returns>The standard deviation of the sample.</returns>
        public static double PopulationStandardDeviation(this IEnumerable<double> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            return Math.Sqrt(PopulationVariance(data));
        }

        /// <summary>
        ///   Calculates the biased sample standard deviation (on a dataset of size N will use an N normalizer).
        /// </summary>
        /// <param name = "data">The data to calculate the standard deviation of.</param>
        /// <returns>The standard deviation of the sample.</returns>
        public static double PopulationStandardDeviation(this IEnumerable<double?> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            return Math.Sqrt(PopulationVariance(data));
        }

        /// <summary>
        ///   Returns the minimum value in the sample data.
        /// </summary>
        /// <param name = "data">The sample data.</param>
        /// <returns>The minimum value in the sample data.</returns>
        public static double Minimum(this IEnumerable<double?> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double min = double.MaxValue;
            ulong count = 0;
            foreach (double? d in data)
            {
                if (d.HasValue)
                {
                    min = Math.Min(min, d.Value);
                    count++;
                }
            }

            if (count == 0)
            {
                throw new ArgumentException("CollectionEmpty");
            }

            return min;
        }

        /// <summary>
        ///   Returns the maximum value in the sample data.
        /// </summary>
        /// <param name = "data">The sample data.</param>
        /// <returns>The maximum value in the sample data.</returns>
        public static double Maximum(this IEnumerable<double?> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double max = double.MinValue;
            ulong count = 0;
            foreach (double? d in data)
            {
                if (d.HasValue)
                {
                    max = Math.Max(max, d.Value);
                    count++;
                }
            }

            if (count == 0)
            {
                throw new ArgumentException("CollectionEmpty");
            }

            return max;
        }

        /// <summary>
        ///   Returns the minimum value in the sample data.
        /// </summary>
        /// <param name = "data">The sample data.</param>
        /// <returns>The minimum value in the sample data.</returns>
        public static double Minimum(this IEnumerable<double> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double min = double.MaxValue;
            ulong count = 0;
            foreach (double d in data)
            {
                min = Math.Min(min, d);
                count++;
            }

            if (count == 0)
            {
                throw new ArgumentException("CollectionEmpty");
            }

            return min;
        }

        /// <summary>
        ///   Returns the maximum value in the sample data.
        /// </summary>
        /// <param name = "data">The sample data.</param>
        /// <returns>The maximum value in the sample data.</returns>
        public static double Maximum(this IEnumerable<double> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            double max = double.MinValue;
            ulong count = 0;
            foreach (double d in data)
            {
                max = Math.Max(max, d);
                count++;
            }

            if (count == 0)
            {
                throw new ArgumentException("CollectionEmpty");
            }

            return max;
        }

        /// <summary>
        ///   Calculates the sample median.
        /// </summary>
        /// <param name = "data">The data to calculate the median of.</param>
        /// <returns>The median of the sample.</returns>
        public static double Median(this IEnumerable<double> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }
            IOrderedEnumerable<double> order = data.OrderBy((item) => item);
            float result;
            int length = order.Count();
            if (length%2 == 0)
            {
                int middle = length/2;
                result = (float) (order.ElementAt(middle) + order.ElementAt(middle - 1))/2;
            }
            else
            {
                result = (float) order.ElementAt(length/2);
            }
            return result;
        }

        /// <summary>
        ///   Calculates the sample median.
        /// </summary>
        /// <param name = "data">The data to calculate the median of.</param>
        /// <returns>The median of the sample.</returns>
        public static double Median(this IEnumerable<double?> data)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }

            List<double> nonNull = new List<double>();
            foreach (double? value in data)
            {
                if (value.HasValue)
                {
                    nonNull.Add(value.Value);
                }
            }

            if (nonNull.Count == 0)
            {
                throw new ArgumentException("CollectionEmpty");
            }

            return nonNull.Median();
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License


Written By
Software Developer
Moldova (Republic of) Moldova (Republic of)
Interested in computer science, math, research, and everything that relates to innovation. Fan of agnostic programming, don't mind developing under any platform/framework if it explores interesting topics. In search of a better programming paradigm.

Comments and Discussions