Click here to Skip to main content
15,893,594 members
Articles / Web Development / HTML

Package that speeds up loading of JavaScript, CSS and image files

Rate me:
Please Sign up or sign in to vote.
4.95/5 (114 votes)
29 Mar 2012CPOL44 min read 880.9K   4.4K   260  
Improves web site performance by combining and minifying JavaScript and CSS files on the fly. Processes ASP.NET AJAX toolkit .axd files too. Improves image caching and loading. Very easy to add to any ASP.NET web site.
using System;
using System.Collections.Generic;
using System.Web;
using System.Text;
using System.Text.RegularExpressions;

namespace CombineAndMinify
{
    /// <summary>
    /// Contains the results of an analysis of the head of a page.
    /// </summary>
    public class HeadAnalysis
    {
        // Specifies a string replacement in the head 
        public class Replacement
        {
            public string original { get; set; }
            public string replacement { get; set; }
        }

        // All the replacements that need to be made in the head
        public List<Replacement> Replacements { get; set; }

        // The urls of all images used in the CSS files loaded in the head
        public List<string> ProcessedImageUrls { get; set; }

        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="headHtml">
        /// the current contents of the head
        /// </param>
        /// <param name="totalFileNames">
        /// totalFileNames will be filled with a list of the names of all
        /// CSS and JavaScript files loaded in the head (that is, those
        /// that get combined and/or minified).
        /// 
        /// Null if no head caching takes place.
        /// </param>
        /// <param name="combineCSSFiles">
        /// If true, the CSS files in the group are combined into a single file.
        /// </param>
        /// <param name="combineJavaScriptFiles">
        /// If true, the JavaScript files in the group are combined into a single file.
        /// </param>
        /// <param name="urlProcessor">
        /// Use this UrlProcessor to for example insert version ids.
        /// The ProcessedImageUrls property of this UrlProcessor has already been loaded with the 
        /// urls of the images on the page.
        /// </param>
        /// <returns>
        /// New content of the head
        /// </returns>
        public HeadAnalysis(
            string headHtml, ISet<string> totalFileNames,
            ConfigSection.CombineOption combineCSSFiles, ConfigSection.CombineOption combineJavaScriptFiles,
            bool minifyCSS, bool minifyJavaScript, bool enableAxdProcessing,
            UrlProcessor urlProcessor)
        {
            // Process a version of the head html without comments.
            // This way, commented out script and link tags won't be included in combined script and css files.
            // And constructs of the form
            // <!--[if IE 7]>
            // <link href="css/ie7Fixes.css" rel="stylesheet" type="text/css" />
            // <![endif]-->
            // won't be processed. That way, if the user opts to combine all CSS files into one,
            // these conditional files won't be combined.
            // This should work ok, because when all CSS files get combined, the single link tag
            // replaces the very first link tag. And conditional links normally sit after some other link
            // because they override other css.
            //
            // The html comments are replaced by <!-- --> instead of the empty string, to cater for constructs such as
            //
            // <script type="text/javascript" src="js/script7.js">
            // <!--
            // // This tag should be excluded, because it contains code
            // var x = 5;
            // -->
            // </script> 
            //
            // This tag should not be processed, because it contains code. If the comment had been replaced
            // with the empty string, the rest of the code would no longer know that this tag originally did have
            // code.
            string headHtmlWithoutComments = CombinedFile.HtmlCommentsReplaced(headHtml, "<!-- -->"); 

            // Find groups of script or link tags that load a script or css file from
            // the local site.
            //
            // A script tag that has code between the <script> and </script>
            // has inline script, so we're not interested in that either.

            // This expression matches all script tags, even those that load axd files!
            // You can get away with this, because script tags for axd files are not generated inside the head,
            // only in the body.
            string regexpScriptLink =
                @"<script[^>]*?src=(?:""|')(?<src>" + 
                RegexExpressionRejectOtherHosts() + 
                @"[^""']*?)(?:""|')[^>]*?>[\s\n\r]*</script>";

            const string propertyNameUrlScript = "src";
            const string propertyNameMediaScript = null;

            const string tagTemplateScript = "<script type=\"text/javascript\" src=\"{0}\"></script>";

            string regexpCssLink =
                @"<link" +
                @"(?=[^>]*?href=(?:""|')" +
                "(?<href>" +
                    RegexExpressionRejectOtherHosts() +
                    (enableAxdProcessing ? ("(?:(?:" + webResourceAxdRegexpMatch() + ")|(?:") : "") +
                    @"[^""']*?\.css" + // positive look ahead for href property, ensuring there is a correct href
                    (enableAxdProcessing ? "))" : "") +
                @")" +
                @"(?:""|'))"+
                @"(?:(?:[^>]*?media=(?:""|')(?<media>[^""']*?)(?:""|'))*)" + // match zero or more media properties
                @"[^>]*?>"; // end of the link statement

            const string propertyNameUrlCss = "href";
            const string propertyNameMediaCss = "media";

            const string tagTemplateCss = "<link rel=\"stylesheet\" type=\"text/css\" href=\"{0}\" {1}/>";

            // ProcessFileType adds records to the Replacements list

            Replacements = new List<Replacement>();

            ProcessFileType(
                headHtmlWithoutComments,
                regexpScriptLink,
                propertyNameUrlScript,
                propertyNameMediaScript,
                FileTypeUtilities.FileType.JavaScript,
                tagTemplateScript,
                totalFileNames,
                combineJavaScriptFiles,
                true,
                minifyCSS, minifyJavaScript,
                urlProcessor);

            ProcessFileType(
                headHtmlWithoutComments,
                regexpCssLink,
                propertyNameUrlCss,
                propertyNameMediaCss,
                FileTypeUtilities.FileType.CSS,
                tagTemplateCss,
                totalFileNames,
                combineCSSFiles,
                false,
                minifyCSS, minifyJavaScript,
                urlProcessor);

            // The urlProcessor now contains all image urls contained in CSS files.
            // Copy those urls to this.ProcessedImageUrls.
            ProcessedImageUrls = new List<string>(urlProcessor.ProcessedImageUrls);
            urlProcessor.ProcessedImageUrls.Clear();
        }

        /// <summary>
        /// Returns a regular expression that matches a 
        /// WebResource.axd file, such as
        /// /Testsite/WebResource.axd?d=4HYID9....pVLjyQ2&amp;t=634092594280000000
        /// or
        /// /WebResource.axd?d=4HYID9....pVLjyQ2&amp;t=634092594280000000
        /// </summary>
        /// <returns></returns>
        private string webResourceAxdRegexpMatch()
        {
            string fileName = @"~/" + CombinedFile.WebResourceAxdFileName;
            AbsoluteUrl resolvedFileName = CombinedFile.UrlToAbsolutePath(fileName, null, null);
            string result = EscapedForRegex(resolvedFileName.AbsoluteUrlWithQueryAndFragment) + @"[^""']*";
            return result;
        }

        /// <summary>
        /// Returns the "server" bit of the current url. For example
        /// http://www.mydomain.com:1080
        /// If the port being used is 80, no port is included.
        /// </summary>
        /// <param name="useRelativeProtocol">
        /// If true, the returned string uses a relative protocol, such as
        /// //localhost:1080
        /// If false, it uses a normal protocol, such as
        /// http://localhost:1080
        /// </param>
        /// <returns></returns>
        private string CurrentProtocolHostPort(bool useRelativeProtocol)
        {
            Uri currentUri = HttpContext.Current.Request.Url;
            int currentPort = currentUri.Port;

            // If running on Cassini, this will also give you the directory in which the site is stored, such as 
            // /TestSite/
            string siteFolder = VirtualPathUtility.ToAbsolute("~/");

            string result =
                (useRelativeProtocol ? "//" : currentUri.Scheme + "://") + 
                currentUri.Host + 
                (currentPort == 80 ? "" : ":" + currentPort.ToString()) +
                siteFolder;

            return result;
        }

        /// <summary>
        /// Generates a regular expression sub expression that you'd put at the beginning of an expression
        /// matching a url. 
        /// 
        /// It rejects all urls located on a host other than the current host.
        /// 
        /// It takes both urls with normal protocols such as
        /// http://.........
        /// and those with relative protocols such as
        /// //.......
        /// into account.
        /// 
        /// It rejects all authorities (host + port) where the port doesn't match, even if this host's port
        /// equals the beginning of the candidate port. So:
        /// http://localhost:23456
        /// will not match
        /// http://localhost:2345
        /// </summary>
        private string RegexExpressionRejectOtherHosts()
        {
            string escapedCurrentHost = EscapedForRegex(CurrentProtocolHostPort(false));
            string escapedCurrentHostRelativeProtocol = EscapedForRegex(CurrentProtocolHostPort(true));
            string result = 
                @"(?:" +
                @"(?:(?!http://)(?!https://)(?!//))|" + 
                @"(?=" + escapedCurrentHost + @")|" +
                @"(?=" + escapedCurrentHostRelativeProtocol + @")" +
                @")";
            return result;
        }

        /// <summary>
        /// Escapes a string for regular expressions.
        /// </summary>
        /// <param name="s"></param>
        /// <returns></returns>
        private string EscapedForRegex(string s)
        {
            return Regex.Escape(s).Replace(":", @"\:");
        }

        // This type maps a media (such as "screen" or "printer" to a list of urls)
        // The default name for a media is "all". So even if a link (or script tag)
        // doesn't belong to a specific media, it is associated here with the media "all".
        private class MediaSpecificUrls : Dictionary<string, List<AbsoluteUrl>> 
        {
            public void Add(string media, AbsoluteUrl url)
            {
                if (!ContainsKey(media))
                {
                    base.Add(media, new List<AbsoluteUrl>());
                }

                base[media].Add(url);
            }        
        }

        // This type stores all info about a group of links:
        // * The text of the file group, so it can be easily removed from the head
        // * The urls in the group grouped by their media.
        private class groupInfo
        {
            public string linkGroupText;
            public MediaSpecificUrls mediaSpecificUrls;
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="headHtmlSb"></param>
        /// <param name="linkRegexp">
        /// Regular expression that matches a single link (that is, a CSS link or a script tag).
        /// </param>
        /// <param name="propertyNameUrl">
        /// The name of the property within the link that holds the url.
        /// </param>
        /// <param name="propertyNameMedia">
        /// The name of the property within the link that holds the media.
        /// null if there is no such property.
        /// </param>
        /// <param name="tagTemplate"></param>
        /// <param name="totalFileNames">
        /// The urls of all the links in the group get added to this set.
        /// </param>
        /// <param name="combineFiles"></param>
        /// <param name="placeCombinedFilesAtEnd">
        /// This is only relevant if combineFiles equals All.
        /// If placeCombinedFilesAtEnd is true, the tag loading the combined file
        /// replaces the very last file group (important if you're loading js, because it means that if any
        /// js is dependent on a library loaded from a CDN, all the js will load after that library.
        /// 
        /// If placeCombinedFilesAtEnd is false, the tag replaces the very first file group.
        /// You'd use this with CSS, to get it load possibly sooner than the js.
        /// </param>
        /// <param name="urlProcessor"></param>
        private void ProcessFileType(
            string headHtml,
            string linkRegexp,
            string propertyNameUrl,
            string propertyNameMedia,
            FileTypeUtilities.FileType fileType,
            string tagTemplate,
            ISet<string> totalFileNames,
            ConfigSection.CombineOption combineFiles,
            bool placeCombinedFilesAtEnd,
            bool minifyCSS, bool minifyJavaScript,
            UrlProcessor urlProcessor)
        {
            List<groupInfo> allGroups = new List<groupInfo>();
            List<Uri> totalFileUrlsList = new List<Uri>();
            MediaSpecificUrls mediaSpecificUrlsAllGroups = new MediaSpecificUrls();

            // Create a regular expression matching groups of the given linkRegexp.
            // Two links are in the same group if they are separated by no more than white space.
            string groupRegexp = 
                @"(?:" + linkRegexp + @"[\s\n\r]*)+";

            Regex regexGroup = new Regex(groupRegexp, RegexOptions.IgnoreCase);
            Match matchGroup = regexGroup.Match(headHtml);

            // Visit each group of script or link tags. Record the html of each file group
            // and a list of the urls in the tags in that file group in allGroups.
            while (matchGroup.Success)
            {
                string linkGroupText = matchGroup.Value;
                MediaSpecificUrls mediaSpecificUrlsInGroup = new MediaSpecificUrls();

                AnalyzeGroup(
                    linkGroupText, linkRegexp, propertyNameUrl, propertyNameMedia,
                    mediaSpecificUrlsInGroup, mediaSpecificUrlsAllGroups);

                allGroups.Add(new groupInfo() { linkGroupText = linkGroupText, mediaSpecificUrls = mediaSpecificUrlsInGroup });
                matchGroup = matchGroup.NextMatch();
            }

            // Process each file group in allGroups
            if (allGroups.Count > 0)
            {
                switch (combineFiles)
                {
                    case ConfigSection.CombineOption.None:
                        // In each group, process all URLs individually into tags.
                        // Note that CombinedFile.Url not only has the ability to combine urls, but also
                        // to insert version info - and we still want that to be able to use far future cache expiry,
                        // even if not combining files.
                        // Concatenate the tags and replace the group with the concatenated tags.
                        foreach (groupInfo g in allGroups)
                        {
                            StringBuilder tagsInGroup = new StringBuilder();

                            foreach (string media in g.mediaSpecificUrls.Keys)
                            {
                                List<AbsoluteUrl> fileUrlsList = g.mediaSpecificUrls[media];

                                foreach (AbsoluteUrl u in fileUrlsList)
                                {
                                    string versionedUrl = CombinedFile.Url(
                                        HttpContext.Current,
                                        new List<AbsoluteUrl>(new AbsoluteUrl[] { u }), 
                                        fileType,
                                        minifyCSS, minifyJavaScript,
                                        urlProcessor, totalFileNames);
                                    string versionedFileTag =
                                        string.Format(tagTemplate, versionedUrl, MediaProperty(media));
                                    tagsInGroup.Append(versionedFileTag);
                                }
                            }
                            // Be sure to trim the group before storing it (that is, remove space at the front and end).
                            // If you don't, you may store a group with white space at either end, that then doesn't match
                            // a group in some other file that is exactly the same, except for the white space at either end.
                            Replacements.Add(new Replacement { original = g.linkGroupText.Trim(), replacement = tagsInGroup.ToString() });
                        }

                        break;

                    case ConfigSection.CombineOption.PerGroup:
                        // In each group, process all URLs together into a combined tag.
                        // Replace the group with that one tag.
                        foreach (groupInfo g in allGroups)
                        {
                            StringBuilder tagsInGroup = new StringBuilder();

                            foreach (string media in g.mediaSpecificUrls.Keys)
                            {
                                List<AbsoluteUrl> fileUrlsList = g.mediaSpecificUrls[media];

                                string combinedFileUrl = CombinedFile.Url(
                                    HttpContext.Current, fileUrlsList, fileType, minifyCSS, minifyJavaScript, 
                                    urlProcessor, totalFileNames);

                                string combinedFileTag =
                                    string.Format(tagTemplate, combinedFileUrl, MediaProperty(media));

                                tagsInGroup.Append(combinedFileTag);
                            }

                            Replacements.Add(
                                new Replacement { original = g.linkGroupText.Trim(), replacement = tagsInGroup.ToString() });
                        }
                        break;

                    case ConfigSection.CombineOption.All:
                        // Combine all urls into a single tag. Then insert that tag in the head.
                        // Also, remove all groups.
                        {
                            StringBuilder tagsInGroup = new StringBuilder();

                            foreach (string media in mediaSpecificUrlsAllGroups.Keys)
                            {
                                List<AbsoluteUrl> fileUrlsList = mediaSpecificUrlsAllGroups[media];

                                string combinedFileUrl = CombinedFile.Url(
                                    HttpContext.Current, fileUrlsList, fileType, minifyCSS, minifyJavaScript, 
                                    urlProcessor, totalFileNames);
                                string combinedFileTag =
                                    string.Format(tagTemplate, combinedFileUrl, MediaProperty(media));

                                tagsInGroup.Append(combinedFileTag);
                            }

                            int idxFileGroupToReplace = placeCombinedFilesAtEnd ? (allGroups.Count - 1) : 0;

                            Replacements.Add(
                                new Replacement
                                {
                                    original = allGroups[idxFileGroupToReplace].linkGroupText.Trim(),
                                    replacement = tagsInGroup.ToString()
                                });

                            // Replace all file groups with empty string, except for the one
                            // we just replaced with the tag.
                            allGroups.RemoveAt(idxFileGroupToReplace);
                            foreach (groupInfo g in allGroups)
                            {
                                Replacements.Add(
                                    new Replacement { original = g.linkGroupText.Trim(), replacement = "" });
                            }
                        }
                        break;

                    default:
                        throw new ArgumentException("ProcessFileType - combineFiles=" + combineFiles.ToString());
                }
            }
        }

        /// <summary>
        /// Generates a media property based on a given media type (screen, print, etc.)
        /// </summary>
        /// <param name="mediaType"></param>
        /// <returns></returns>
        private string MediaProperty(string mediaType)
        {
            string result = "";

            if ((!string.IsNullOrEmpty(mediaType)) &&
                (string.Compare(mediaType, "all", true) != 0))
            {
                // Be sure to have at least one space after the generated property
                result = string.Format("media=\"{0}\" ", mediaType);
            }

            return result;
        }

        /// <summary>
        /// Analyzes a group of links. A link is a link tag for CSS files or a script tag
        /// for JavaScript files.
        /// </summary>
        /// <param name="linkGroupText">
        /// Text of the group.
        /// </param>
        /// <param name="linkRegexp">
        /// Regular expression that matches a single link (that is, a CSS link or a script tag).
        /// </param>
        /// <param name="groupNameUrl">
        /// Name of the capture group that holds the url
        /// </param>
        /// <param name="groupNameMedia">
        /// Name of the capture group that holds the media.
        /// null if there is no such group.
        /// </param>
        /// <param name="mediaSpecificUrls">
        /// Adds the url in the link to this object, with the correct media.
        /// </param>
        /// <param name="mediaSpecificUrls2">
        /// Adds the url in the link to this object too, with the correct media.
        /// </param>
        private void AnalyzeGroup(
            string linkGroupText,
            string linkRegexp,
            string groupNameUrl, string groupNameMedia,
            MediaSpecificUrls mediaSpecificUrls, MediaSpecificUrls mediaSpecificUrls2)
        {
            // Visit each link within the group
            Regex regexLink = new Regex(linkRegexp, RegexOptions.IgnoreCase);
            Match matchLink = regexLink.Match(linkGroupText);

            while (matchLink.Success)
            {
                string link = matchLink.Value;

                // ---------------
                // Get the url

                CaptureCollection urls = matchLink.Groups[groupNameUrl].Captures;
                string linkUrl = null;

                if (urls.Count > 0)
                {
                    linkUrl = urls[0].Value;
                }

                if (string.IsNullOrEmpty(linkUrl))
                {
                    throw new Exception(
                        string.Format("Tag {0} in group {1} has no url", link, linkGroupText));
                }

                // ---------------
                // Get the media

                string linkMedia = null;
                if (!string.IsNullOrEmpty(groupNameMedia))
                {
                    CaptureCollection medias = matchLink.Groups[groupNameMedia].Captures;
                    if (medias.Count > 0)
                    {
                        linkMedia = medias[0].Value;
                    }
                }

                if (string.IsNullOrEmpty(linkMedia))
                {
                    linkMedia = "all"; // "all" is default value for media
                }


                // ---------------
                // Process the url and media

                AbsoluteUrl linkUri = CombinedFile.UrlToAbsolutePath(linkUrl, null, HttpContext.Current.Request.Url);
                mediaSpecificUrls.Add(linkMedia, linkUri);
                mediaSpecificUrls2.Add(linkMedia, linkUri);

                matchLink = matchLink.NextMatch();
            }
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect
Australia Australia
Twitter: @MattPerdeck
LinkedIn: au.linkedin.com/in/mattperdeck
Current project: JSNLog JavaScript Logging Package

Matt has over 9 years .NET and SQL Server development experience. Before getting into .Net, he worked on a number of systems, ranging from the largest ATM network in The Netherlands to embedded software in advanced Wide Area Networks and the largest ticketing web site in Australia. He has lived and worked in Australia, The Netherlands, Slovakia and Thailand.

He is the author of the book ASP.NET Performance Secrets (www.amazon.com/ASP-NET-Site-Performance-Secrets-Perdeck/dp/1849690685) in which he shows in clear and practical terms how to quickly find the biggest bottlenecks holding back the performance of your web site, and how to then remove those bottlenecks. The book deals with all environments affecting a web site - the web server, the database server and the browser.

Matt currently lives in Sydney, Australia. He recently worked at Readify and the global professional services company PwC. He now works at SP Health, a global provider of weight loss web sites such at CSIRO's TotalWellBeingDiet.com and BiggestLoserClub.com.

Comments and Discussions