Click here to Skip to main content
15,884,099 members
Articles / Web Development / ASP.NET

HTTP Data Client - Web Scraping

Rate me:
Please Sign up or sign in to vote.
4.79/5 (8 votes)
21 Jul 2011CPOL12 min read 47.3K   1.7K   56  
A HTTPWebRequest based library which abstracts how data is retrieved from web sources.
using System.IO;
using System.Collections.Generic;
using System.Runtime.Serialization.Formatters.Binary;

namespace HttpData.Client
{
    /// <summary>
    /// Provides functionality for cache management.
    /// </summary>
    public class HDPCache
    {
        #region Private Variables
        private HDPCacheDefinition cacheDefinition;
        private HDPCacheStorage storageCache;
        private Dictionary<string, HDPCacheObject> memoryCache;
        #endregion

        #region Constants
        #endregion

        #region Properties
        #endregion

        #region .ctor
        /// <summary>
        /// Instantiate a new HDPCache object.
        /// </summary>
        /// <param name="cacheDefinition">Initializes HDPCache object with the specified HDPCacheDefinition object.</param>
        public HDPCache(HDPCacheDefinition cacheDefinition)
        {
            this.cacheDefinition = cacheDefinition;

            if (!string.IsNullOrEmpty(cacheDefinition.StorageName))
                storageCache = new HDPCacheStorage(cacheDefinition.StorageName);

            memoryCache = new Dictionary<string, HDPCacheObject>();
        }
        #endregion

        #region Public Methods
        /// <summary>
        /// Stores the cached buffer on the disk.
        /// </summary>
        /// <param name="buffer">Buffer containing the cache data.</param>
        /// <param name="fileName">The name of the file which will store the cached data.</param>
        /// <returns>The path of the file which will store the cache data.</returns>
        public string WriteToDisk(byte[] buffer, string fileName)
        {
            return storageCache.WriteToDisk(buffer, fileName);
        }

        /// <summary>
        /// Add a HDPCacheObject object to the cache collection.
        /// </summary>
        /// <param name="obj">HDPCacheObject object which will be added to the cache collection.</param>
        public void AddObject(HDPCacheObject obj)
        {
            if (!memoryCache.ContainsKey(obj.Key))
            {
                if (cacheDefinition.RealtimePersistance)
                {
                    storageCache.AddObject(obj);
                    return;
                }

                if (cacheDefinition.MemorySizeLimit > 0)
                {
                    while ((GetCacheMemorySize() / 1024) > cacheDefinition.MemorySizeLimit)
                    {
                        if (cacheDefinition.UseStorage)
                        {
                            string key = GetKey(0);
                            if (key != null)
                            {
                                HDPCacheObject oldObj = memoryCache[key];
                                PersistObject(oldObj);
                            }
                        }
                        else
                            memoryCache.Clear();
                    }

                    if ((GetCacheMemorySize() / 1024) > cacheDefinition.MemorySizeLimit)
                        memoryCache.Add(obj.Key, obj);
                }
                else if (cacheDefinition.ObjectsNumberLimit > 0)
                {
                    if (memoryCache.Count > cacheDefinition.ObjectsNumberLimit)
                    {
                        if (cacheDefinition.UseStorage)
                        {
                            string key = GetKey(0);
                            if (key != null)
                            {
                                HDPCacheObject oldObj = memoryCache[key];
                                PersistObject(oldObj);
                            }
                        }
                        else
                            memoryCache.Clear();
                    }
                    else
                        memoryCache.Add(obj.Key, obj);
                }
                else
                    memoryCache.Add(obj.Key, obj);
            }
        }

        /// <summary>
        /// Get a HDPCacheObject from the cache collection.
        /// </summary>
        /// <param name="key">The key associated with the HDPCacheObject object.</param>
        /// <returns>HDPCacheObject retrieved from the collection.</returns>
        public HDPCacheObject RetrieveObject(string key)
        {
            if (memoryCache.ContainsKey(key))
                return memoryCache[key];

            if (cacheDefinition.UseStorage && cacheDefinition.RetrieveFromStorage)
            {
                HDPCacheObject obj = storageCache.RetrieveObject(key);

                if (obj == null)
                    return null;

                memoryCache.Add(obj.Key, obj);
                memoryCache = SortDictionary(memoryCache);

                if (cacheDefinition.MemorySizeLimit > 0)
                {
                    while ((GetCacheMemorySize() / 1024) > cacheDefinition.MemorySizeLimit)
                    {
                        if (cacheDefinition.UseStorage)
                        {
                            string _key = GetKey(0);
                            HDPCacheObject oldObj = memoryCache[_key];

                            if (_key != null)
                                PersistObject(oldObj);
                        }
                    }
                }
                else if (cacheDefinition.ObjectsNumberLimit > 0)
                {
                    if (memoryCache.Count > cacheDefinition.ObjectsNumberLimit)
                    {
                        if (cacheDefinition.UseStorage)
                        {
                            string _key = GetKey(0);
                            HDPCacheObject oldObj = memoryCache[_key];

                            if (_key != null)
                                PersistObject(oldObj);
                        }
                    }
                }

                if (!cacheDefinition.RealtimePersistance)
                    storageCache.RemoveObject(obj.Key);

                return obj;
            }

            return null;
        }

        /// <summary>
        /// Saves the memory cache to the cache storage on the disk.
        /// </summary>
        public void PersistCache()
        {
            if (cacheDefinition.UseStorage)
            {
                foreach (HDPCacheObject obj in memoryCache.Values)
                    storageCache.AddObject(obj);
            }
        }

        /// <summary>
        /// Remove a HDPCacheObject object from the cache collection.
        /// </summary>
        /// <param name="key">The key associated with the HDPCacheObject object.</param>
        public void RemoveObject(string key)
        {
            if (memoryCache.ContainsKey(key))
                memoryCache.Remove(key);
            else if (cacheDefinition.UseStorage)
                storageCache.RemoveObject(key);
        }

        /// <summary>
        /// Clear the memory cache and the cache storage.
        /// </summary>
        public void Purge()
        {
            memoryCache.Clear();

            if (cacheDefinition.UseStorage)
                storageCache.Purge();
        }

        /// <summary>
        /// Close the cache storage.
        /// </summary>
        public void CloseStorageConnection()
        {
            storageCache.Close();
        }
        #endregion

        #region Private Methods
        private void PersistObject(HDPCacheObject obj)
        {
            if (memoryCache.ContainsKey(obj.Key))
                memoryCache.Remove(obj.Key);

            storageCache.AddObject(obj);
        }

        private long GetCacheMemorySize()
        {
            MemoryStream memStream = new MemoryStream();
            BinaryFormatter binFormater = new BinaryFormatter();

            binFormater.Serialize(memStream, memoryCache);
            memStream.Flush();
            memStream.Close();

            long len = memStream.Length;
            return len;
        }

        private string GetKey(int index)
        {
            if (memoryCache.Keys.Count > 0 && memoryCache.Keys.Count > index)
            {
                string[] keys = new string[memoryCache.Keys.Count];
                memoryCache.Keys.CopyTo(keys, 0);

                return keys[index];
            }

            return null;
        }

        private static Dictionary<string, HDPCacheObject> SortDictionary(Dictionary<string, HDPCacheObject> dictionary)
        {
            List<KeyValuePair<string, HDPCacheObject>> result = new List<KeyValuePair<string, HDPCacheObject>>(dictionary);
            result.Sort((first, second) => first.Value.CacheDate.CompareTo(second.Value.CacheDate));

            dictionary.Clear();
            foreach (KeyValuePair<string, HDPCacheObject> pair in result)
                dictionary.Add(pair.Key, pair.Value);

            return dictionary;
        }
        #endregion
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer (Senior)
Cyprus Cyprus
I am a senior software engineer with over 8 years experience. Have worked for different international software companies using different technologies and programming languages like: C/C++, lotus script, lotus API, C#, ASP.NET, WCF, MS-SQL, Oracle, Domino Server, JavaScript.

Comments and Discussions