#include "stdafx.h"
/*
* File name: dmyblas2.c
* Purpose:
* Level 2 BLAS operations: solves and matvec, written in C.
* Note:
* This is only used when the system lacks an efficient BLAS library.
*/
#include "../hnum_pdsp_defs.h"
namespace harlinn
{
namespace numerics
{
namespace SuperLU
{
/*
* Solves a dense UNIT lower triangular system. The unit lower
* triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
* The solution will be returned in the resultVector vector.
*/
void dlsolve ( int ldm, int ncol, double *M, double *resultVector )
{
int k;
double x0, x1, x2, x3, x4, x5, x6, x7;
double *M0;
register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
register int firstcol = 0;
M0 = &M[0];
while ( firstcol < ncol - 7 )
{
// Do 8 columns
Mki0 = M0 + 1;
Mki1 = Mki0 + ldm + 1;
Mki2 = Mki1 + ldm + 1;
Mki3 = Mki2 + ldm + 1;
Mki4 = Mki3 + ldm + 1;
Mki5 = Mki4 + ldm + 1;
Mki6 = Mki5 + ldm + 1;
Mki7 = Mki6 + ldm + 1;
x0 = resultVector[firstcol];
x1 = resultVector[firstcol+1] - x0 * *Mki0++;
x2 = resultVector[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
x3 = resultVector[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
x4 = resultVector[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++;
x5 = resultVector[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++ - x4 * *Mki4++;
x6 = resultVector[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
x7 = resultVector[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++ - x6 * *Mki6++;
resultVector[++firstcol] = x1;
resultVector[++firstcol] = x2;
resultVector[++firstcol] = x3;
resultVector[++firstcol] = x4;
resultVector[++firstcol] = x5;
resultVector[++firstcol] = x6;
resultVector[++firstcol] = x7;
++firstcol;
for (k = firstcol; k < ncol; k++)
{
resultVector[k] = resultVector[k] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++ - x6 * *Mki6++ - x7 * *Mki7++;
}
M0 += 8 * ldm + 8;
}
while ( firstcol < ncol - 3 )
{
Mki0 = M0 + 1;
Mki1 = Mki0 + ldm + 1;
Mki2 = Mki1 + ldm + 1;
Mki3 = Mki2 + ldm + 1;
x0 = resultVector[firstcol];
x1 = resultVector[firstcol+1] - x0 * *Mki0++;
x2 = resultVector[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
x3 = resultVector[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
resultVector[++firstcol] = x1;
resultVector[++firstcol] = x2;
resultVector[++firstcol] = x3;
++firstcol;
for (k = firstcol; k < ncol; k++)
{
resultVector[k] = resultVector[k] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++;
}
M0 += 4 * ldm + 4;
}
if ( firstcol < ncol - 1 )
{
Mki0 = M0 + 1;
Mki1 = Mki0 + ldm + 1;
x0 = resultVector[firstcol];
x1 = resultVector[firstcol+1] - x0 * *Mki0++;
resultVector[++firstcol] = x1;
++firstcol;
for (k = firstcol; k < ncol; k++)
{
resultVector[k] = resultVector[k] - x0 * *Mki0++ - x1 * *Mki1++;
}
}
}
/*
* Solves a dense upper triangular system. The upper triangular matrix is
* stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
* in the rhs vector.
*/
void dusolve ( int ldm, int ncol, double *M, double *resultVector )
{
double xj;
int jcol, j, irow;
jcol = ncol - 1;
for (j = 0; j < ncol; j++)
{
xj = resultVector[jcol] / M[jcol + jcol*ldm];
resultVector[jcol] = xj;
for (irow = 0; irow < jcol; irow++)
{
resultVector[irow] -= xj * M[irow + jcol*ldm];
}
jcol--;
}
}
/*
* Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
* The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
*/
void dmatvec ( int ldm, int nrow, int ncol, double *M, double *vec, double *Mxvec )
{
double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
double *M0;
register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
register int firstcol = 0;
int k;
M0 = &M[0];
while ( firstcol < ncol - 7 )
{
// Do 8 columns
Mki0 = M0;
Mki1 = Mki0 + ldm;
Mki2 = Mki1 + ldm;
Mki3 = Mki2 + ldm;
Mki4 = Mki3 + ldm;
Mki5 = Mki4 + ldm;
Mki6 = Mki5 + ldm;
Mki7 = Mki6 + ldm;
vi0 = vec[firstcol++];
vi1 = vec[firstcol++];
vi2 = vec[firstcol++];
vi3 = vec[firstcol++];
vi4 = vec[firstcol++];
vi5 = vec[firstcol++];
vi6 = vec[firstcol++];
vi7 = vec[firstcol++];
for (k = 0; k < nrow; k++)
{
Mxvec[k] += vi0 * *Mki0++ +
vi1 * *Mki1++ +
vi2 * *Mki2++ +
vi3 * *Mki3++ +
vi4 * *Mki4++ +
vi5 * *Mki5++ +
vi6 * *Mki6++ +
vi7 * *Mki7++;
}
M0 += 8 * ldm;
}
while ( firstcol < ncol - 3 )
{
// Do 4 columns
Mki0 = M0;
Mki1 = Mki0 + ldm;
Mki2 = Mki1 + ldm;
Mki3 = Mki2 + ldm;
vi0 = vec[firstcol++];
vi1 = vec[firstcol++];
vi2 = vec[firstcol++];
vi3 = vec[firstcol++];
for (k = 0; k < nrow; k++)
{
Mxvec[k] += vi0 * *Mki0++ +
vi1 * *Mki1++ +
vi2 * *Mki2++ +
vi3 * *Mki3++;
}
M0 += 4 * ldm;
}
while ( firstcol < ncol )
{
// Do 1 column
Mki0 = M0;
vi0 = vec[firstcol++];
for (k = 0; k < nrow; k++)
{
Mxvec[k] += vi0 * *Mki0++;
}
M0 += ldm;
}
}
};
};
};