Click here to Skip to main content
15,883,901 members
Articles / Desktop Programming / Win32

Stopwatch

Rate me:
Please Sign up or sign in to vote.
4.97/5 (29 votes)
3 Jan 2015CPOL6 min read 66K   1.5K   43  
Benchmark C++ std::vector vs raw arrays, move assignable/constructable & copy assignable/constructable
#include "stdafx.h"

#include <stdio.h>
#include <stdlib.h>
#include "hnum_pssp_defs.h"

namespace harlinn
{
    namespace numerics
    {
        namespace SuperLU
        {
            namespace Single
            {


                int
                psgstrf_column_bmod(
		                    const int  pnum,   /* process number */
		                    const int  jcol,   /* current column in the panel */
		                    const int  fpanelc,/* first column in the panel */
		                    const int  nseg,   /* number of s-nodes to update jcol */
		                    int        *segrep,/* in */
		                    int        *repfnz,/* in */
		                    float     *dense, /* modified */
		                    float     *tempv, /* working array */
		                    pxgstrf_shared_t *pxgstrf_shared, /* modified */
		                    Gstat_t *Gstat     /* modified */
		                    )
                {
                /*
                 * -- SuperLU MT routine (version 2.0) --
                 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
                 * and Xerox Palo Alto Research Center.
                 * September 10, 2007
                 *
                 * Purpose:
                 * ========
                 *    Performs numeric block updates (sup-col) in topological order.
                 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
                 *    Special processing on the supernodal portion of L\U[*,j].
                 *
                 * Return value:
                 * =============
                 *      0 - successful return
                 *    > 0 - number of bytes allocated when run out of space
                 *
                 */
                #if ( MACH==CRAY_PVP )
                    _fcd ftcs1 = _cptofcd("L", strlen("L")),
                         ftcs2 = _cptofcd("N", strlen("N")),
                         ftcs3 = _cptofcd("U", strlen("U"));
                #endif
    
                #ifdef USE_VENDOR_BLAS    
                    int         incx = 1, incy = 1;
                    float      alpha, beta;
                #endif
                    GlobalLU_t *Glu = pxgstrf_shared->Glu;   /* modified */
    
                    /* krep = representative of current k-th supernode
                     * fsupc = first supernodal column
                     * nsupc = no of columns in supernode
                     * nsupr = no of rows in supernode (used as leading dimension)
                     * luptr = location of supernodal LU-block in storage
                     * kfnz = first nonz in the k-th supernodal segment
                     * no_zeros = no of leading zeros in a supernodal U-segment
                     */
                    float	  ukj, ukj1, ukj2;
                    register int lptr, kfnz, isub, irow, i, no_zeros;
                    register int luptr, luptr1, luptr2;
                    int          fsupc, nsupc, nsupr, segsze;
                    int          nrow;	  /* No of rows in the matrix of matrix-vector */
                    int          jsupno, k, ksub, krep, krep_ind, ksupno;
                    int          ufirst, nextlu;
                    int          fst_col; /* First column within small LU update */
                    int          d_fsupc; /* Distance between the first column of the current
			                     panel and the first column of the current snode.*/
                    int          *xsup, *supno;
                    int          *lsub, *xlsub, *xlsub_end;
                    float       *lusup;
                    int          *xlusup, *xlusup_end;
                    float       *tempv1;
                    int          mem_error;
                    register float flopcnt;

                    float      zero = 0.0;
                    float      one = 1.0;
                    float      none = -1.0;

                    xsup       = Glu->xsup;
                    supno      = Glu->supno;
                    lsub       = Glu->lsub;
                    xlsub      = Glu->xlsub;
                    xlsub_end  = Glu->xlsub_end;
                    lusup      = Glu->lusup;
                    xlusup     = Glu->xlusup;
                    xlusup_end = Glu->xlusup_end;
                    jsupno     = supno[jcol];

                    /* 
                     * For each nonz supernode segment of U[*,j] in topological order 
                     */
                    k = nseg - 1;
                    for (ksub = 0; ksub < nseg; ksub++) {

	                krep = segrep[k];
	                k--;
	                ksupno = supno[krep];
                #if ( DEBUGlvel>=2 )
                if (jcol==BADCOL)
                printf("(%d) psgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n",
                       pnum, jcol, nseg, krep, jsupno, ksupno);
                #endif    
	                if ( jsupno != ksupno ) { /* Outside the rectangular supernode */

	                    fsupc = xsup[ksupno];
	                    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

  	                    /* Distance from the current supernode to the current panel; 
	                       d_fsupc=0 if fsupc >= fpanelc. */
  	                    d_fsupc = fst_col - fsupc; 

	                    luptr = xlusup[fst_col] + d_fsupc;
	                    lptr = xlsub[fsupc] + d_fsupc;
	                    kfnz = repfnz[krep];
	                    kfnz = SUPERLU_MAX ( kfnz, fpanelc );
	                    segsze = krep - kfnz + 1;
	                    nsupc = krep - fst_col + 1;
	                    nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	                    nrow = nsupr - d_fsupc - nsupc;
	                    krep_ind = lptr + nsupc - 1;

                            flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;
	                    Gstat->procstat[pnum].fcops += flopcnt;

                #if ( DEBUGlevel>=2 )
                if (jcol==BADCOL)	    
                printf("(%d) psgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\
                fsupc %d, nsupr %d, nsupc %d\n",
                       pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc);

                #endif


	                    /* 
	                     * Case 1: Update U-segment of size 1 -- col-col update 
	                     */
	                    if ( segsze == 1 ) {
	  	                ukj = dense[lsub[krep_ind]];
		                luptr += nsupr*(nsupc-1) + nsupc;

		                for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
		                    irow = lsub[i];
		                    dense[irow] -=  ukj*lusup[luptr];
		                    luptr++;
		                }
	                    } else if ( segsze <= 3 ) {
		                ukj = dense[lsub[krep_ind]];
		                luptr += nsupr*(nsupc-1) + nsupc-1;
		                ukj1 = dense[lsub[krep_ind - 1]];
		                luptr1 = luptr - nsupr;
		                if ( segsze == 2 ) { /* Case 2: 2cols-col update */
		                    ukj -= ukj1 * lusup[luptr1];
		                    dense[lsub[krep_ind]] = ukj;
		                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
		    	                irow = lsub[i];
		    	                luptr++;
		    	                luptr1++;
		    	                dense[irow] -= ( ukj*lusup[luptr]
					                + ukj1*lusup[luptr1] );
		                    }
		                } else { /* Case 3: 3cols-col update */
		                    ukj2 = dense[lsub[krep_ind - 2]];
		                    luptr2 = luptr1 - nsupr;
		                    ukj1 -= ukj2 * lusup[luptr2-1];
		                    ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
		                    dense[lsub[krep_ind]] = ukj;
		                    dense[lsub[krep_ind-1]] = ukj1;
		                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
		    	                irow = lsub[i];
		    	                luptr++;
		    	                luptr1++;
			                luptr2++;
		    	                dense[irow] -= ( ukj*lusup[luptr]
			                     + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
		                    }
		                }


	                    } else {
	  	                /*
		                 * Case: sup-col update
		                 * Perform a triangular solve and block update,
		                 * then scatter the result of sup-col update to dense
		                 */
		                no_zeros = kfnz - fst_col;

	                        /* Copy U[*,j] segment from dense[*] to tempv[*] */
	                        isub = lptr + no_zeros;
	                        for (i = 0; i < segsze; i++) {
	  	                    irow = lsub[isub];
		                    tempv[i] = dense[irow];
		                    ++isub; 
	                        }

	                        /* Dense triangular solve -- start effective triangle */
		                luptr += nsupr * no_zeros + no_zeros; 
                #ifdef USE_VENDOR_BLAS
                #if ( MACH==CRAY_PVP )
		                STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		                       &nsupr, tempv, &incx );
                #else
		                strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		                       &nsupr, tempv, &incx );
                #endif
		
 		                luptr += segsze;  /* Dense matrix-vector */
		                tempv1 = &tempv[segsze];
		                alpha = one;
		                beta = zero;
                #if ( MACH==CRAY_PVP )
		                SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		                       &nsupr, tempv, &incx, &beta, tempv1, &incy );
                #else
		                sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		                       &nsupr, tempv, &incx, &beta, tempv1, &incy );
                #endif
                #else
		                slsolve ( nsupr, segsze, &lusup[luptr], tempv );

 		                luptr += segsze;  /* Dense matrix-vector */
		                tempv1 = &tempv[segsze];
		                smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
                #endif
                                /* Scatter tempv[] into SPA dense[*] */
                                isub = lptr + no_zeros;
                                for (i = 0; i < segsze; i++) {
                                    irow = lsub[isub];
                                    dense[irow] = tempv[i]; /* Scatter */
                                    tempv[i] = zero;
                                    isub++;
                                }

		                /* Scatter tempv1[] into SPA dense[*] */
		                for (i = 0; i < nrow; i++) {
		                    irow = lsub[isub];
                                    dense[irow] -= tempv1[i];
		                    tempv1[i] = zero;
		                    ++isub;
		                }
	                    } /* else segsze >= 4 */
	    
	                } /* if jsupno ... */

                    } /* for each segment... */

    
                    /* ------------------------------------------
                       Process the supernodal portion of L\U[*,j]
                       ------------------------------------------ */
    
                    fsupc = SUPER_FSUPC (jsupno);
                    nsupr = xlsub_end[fsupc] - xlsub[fsupc];
                    if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, 
			                       pxgstrf_shared)) )
	                return mem_error;
                    xlusup[jcol] = nextlu;
                    lusup = Glu->lusup;
    
                    /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */
                    for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) {
  	                irow = lsub[isub];
	                lusup[nextlu] = dense[irow];
	                dense[irow] = zero;
                #ifdef DEBUG
                if (jcol == -1)
                    printf("(%d) psgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n",
	                   pnum, jcol, irow, lusup[nextlu]);
                #endif	
	                ++nextlu;
                    }
                    xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */

                #if ( DEBUGlevel>=2 )
                if (jcol == -1) {
                    nrow = xlusup_end[jcol] - xlusup[jcol];
                    print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]],
		                     &lusup[xlusup[jcol]]);
                }
                #endif    
    
                    /*
                     * For more updates within the panel (also within the current supernode), 
                     * should start from the first column of the panel, or the first column 
                     * of the supernode, whichever is bigger. There are 2 cases:
                     *    (1) fsupc < fpanelc,  then fst_col := fpanelc
                     *    (2) fsupc >= fpanelc, then fst_col := fsupc
                     */
                    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

                    if ( fst_col < jcol ) {

  	                /* distance between the current supernode and the current panel;
	                   d_fsupc=0 if fsupc >= fpanelc. */
  	                d_fsupc = fst_col - fsupc;

	                lptr = xlsub[fsupc] + d_fsupc;
	                luptr = xlusup[fst_col] + d_fsupc;
	                nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	                nsupc = jcol - fst_col;	/* Excluding jcol */
	                nrow = nsupr - d_fsupc - nsupc;

	                /* points to the beginning of jcol in supernode L\U[*,jsupno] */
	                ufirst = xlusup[jcol] + d_fsupc;	

                #if ( DEBUGlevel>=2 )
                if (jcol==BADCOL)
                printf("(%d) psgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n",
                       pnum, jcol, fsupc, nsupr, nsupc, nrow);
                #endif    

                        flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc;
	                Gstat->procstat[pnum].fcops += flopcnt;

                /*	ops[TRSV] += nsupc * (nsupc - 1);
	                ops[GEMV] += 2 * nrow * nsupc;    */
	
                #ifdef USE_VENDOR_BLAS
	                alpha = none; beta = one; /* y := beta*y + alpha*A*x */
                #if ( MACH==CRAY_PVP )
	                STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
	                       &nsupr, &lusup[ufirst], &incx );
	                SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	                       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
                #else
	                strsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
	                       &nsupr, &lusup[ufirst], &incx );
	                sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	                       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
                #endif
                #else
	                slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );

	                smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
		                 &lusup[ufirst], tempv );
	
                        /* Copy updates from tempv[*] into lusup[*] */
	                isub = ufirst + nsupc;
	                for (i = 0; i < nrow; i++) {
                            lusup[isub] -= tempv[i];
                            tempv[i] = 0.0;
	                    ++isub;
	                }
                #endif
                    } /* if fst_col < jcol ... */ 

                    return 0;
                }

            };
        };
    };
};

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Architect Sea Surveillance AS
Norway Norway
Chief Architect - Sea Surveillance AS.

Specializing in integrated operations and high performance computing solutions.

I’ve been fooling around with computers since the early eighties, I’ve even done work on CP/M and MP/M.

Wrote my first “real” program on a BBC micro model B based on a series in a magazine at that time. It was fun and I got hooked on this thing called programming ...

A few Highlights:

  • High performance application server development
  • Model Driven Architecture and Code generators
  • Real-Time Distributed Solutions
  • C, C++, C#, Java, TSQL, PL/SQL, Delphi, ActionScript, Perl, Rexx
  • Microsoft SQL Server, Oracle RDBMS, IBM DB2, PostGreSQL
  • AMQP, Apache qpid, RabbitMQ, Microsoft Message Queuing, IBM WebSphereMQ, Oracle TuxidoMQ
  • Oracle WebLogic, IBM WebSphere
  • Corba, COM, DCE, WCF
  • AspenTech InfoPlus.21(IP21), OsiSoft PI


More information about what I do for a living can be found at: harlinn.com or LinkedIn

You can contact me at espen@harlinn.no

Comments and Discussions