#include "stdafx.h"
/*
* -- SuperLU MT routine (version 1.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* August 15, 1997
*
*/
#include <stdio.h>
#include <math.h>
#include "hnum_slucmn.h"
namespace harlinn
{
namespace numerics
{
namespace SuperLU
{
#define SPLIT_TOP
int
ParallelInit(int n, pxgstrf_relax_t *pxgstrf_relax,
superlumt_options_t *superlumt_options,
pxgstrf_shared_Base_t *pxgstrf_shared)
{
int *etree = superlumt_options->etree;
register int w, dad, ukids, i, j, k, rs, panel_size, relax;
register int P, w_top, do_split = 0;
panel_t panel_type;
int *panel_histo = pxgstrf_shared->Gstat->panel_histo;
register int nthr, concurrency, info;
Gstat_t *Gstat = pxgstrf_shared->Gstat;
#if ( MACH==SUN )
register int sync_type = USYNC_THREAD;
/* Set concurrency level. */
nthr = sysconf(_SC_NPROCESSORS_ONLN);
thr_setconcurrency(nthr); /* number of LWPs */
concurrency = thr_getconcurrency();
#if ( PRNTlevel==1 )
printf(".. CPUs %d, concurrency (#LWP) %d, P %d\n",
nthr, concurrency, P);
#endif
/* Initialize mutex variables. */
pxgstrf_shared->lu_locks = (mutex_t *)
SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(mutex_t));
for (i = 0; i < NO_GLU_LOCKS; ++i)
mutex_init(&pxgstrf_shared->lu_locks[i], sync_type, 0);
#elif ( MACH==DEC || MACH==PTHREAD )
pxgstrf_shared->lu_locks = (pthread_mutex_t *)
SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(pthread_mutex_t));
for (i = 0; i < NO_GLU_LOCKS; ++i)
pthread_mutex_init(&pxgstrf_shared->lu_locks[i], NULL);
#else
pxgstrf_shared->lu_locks = (mutex_t *) SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(mutex_t));
#endif
#if ( PRNTlevel==1 )
printf(".. ParallelInit() ... nprocs %2d\n", superlumt_options->nprocs);
#endif
pxgstrf_shared->spin_locks = intCalloc(n);
pxgstrf_shared->pan_status =
(pan_status_t *) SUPERLU_MALLOC((n+1)*sizeof(pan_status_t));
pxgstrf_shared->fb_cols = intMalloc(n+1);
panel_size = superlumt_options->panel_size;
relax = superlumt_options->relax;
w = SUPERLU_MAX(panel_size, relax) + 1;
for (i = 0; i < w; ++i) panel_histo[i] = 0;
pxgstrf_shared->num_splits = 0;
if ( (info = queue_init(&pxgstrf_shared->taskq, n)) ) {
fprintf(stderr, "ParallelInit(): %d\n", info);
SUPERLU_ABORT("queue_init fails.");
}
/* Count children of each node in the etree. */
for (i = 0; i <= n; ++i) pxgstrf_shared->pan_status[i].ukids = 0;
for (i = 0; i < n; ++i) {
dad = etree[i];
++pxgstrf_shared->pan_status[dad].ukids;
}
/* Find the panel partitions and initialize each panel's status */
#ifdef PROFILE
Gstat->num_panels = 0;
#endif
pxgstrf_shared->tasks_remain = 0;
rs = 1; /* index for the next relaxed s-node */
w_top = panel_size/2;
if ( w_top == 0 ) w_top = 1;
P = 12;
for (i = 0; i < n; ) {
if ( pxgstrf_relax[rs].fcol == i ) {
w = pxgstrf_relax[rs++].size;
panel_type = RELAXED_SNODE;
pxgstrf_shared->pan_status[i].state = CANGO;
} else {
/* Adjust panel_size so that a panel won't overlap with
the next relaxed snode. */
#if 0
/* Only works when etree is postordered. */
w = SUPERLU_MIN(panel_size, pxgstrf_relax[rs].fcol - i);
#else
w = panel_size;
for (k = i + 1; k < SUPERLU_MIN(i + panel_size, n); ++k)
if ( k == pxgstrf_relax[rs].fcol ) {
w = k - i; /* panel stops at column k-1 */
break;
}
if ( k == n ) w = n - i;
#endif
#ifdef SPLIT_TOP
if ( !do_split ) {
if ( (n-i) < panel_size * P ) do_split = 1;
}
if ( do_split && w > w_top ) { /* split large panel */
w = w_top;
++pxgstrf_shared->num_splits;
}
#endif
for (j = i+1; j < i + w; ++j)
/* Do not allow panel to cross a branch point in the etree. */
if ( pxgstrf_shared->pan_status[j].ukids > 1 ) break;
w = j - i; /* j should start a new panel */
panel_type = REGULAR_PANEL;
pxgstrf_shared->pan_status[i].state = UNREADY;
#ifdef DOMAINS
if ( in_domain[i] == TREE_DOMAIN ) panel_type = TREE_DOMAIN;
#endif
}
if ( panel_type == REGULAR_PANEL ) {
++pxgstrf_shared->tasks_remain;
/*printf("nondomain panel %6d -- %6d\n", i, i+w-1);
fflush(stdout);*/
}
ukids = k = 0;
for (j = i; j < i + w; ++j) {
pxgstrf_shared->pan_status[j].size = k--;
pxgstrf_shared->pan_status[j].type = panel_type;
ukids += pxgstrf_shared->pan_status[j].ukids;
}
pxgstrf_shared->pan_status[i].size = w; /* leading column */
/* only count those kids outside the panel */
pxgstrf_shared->pan_status[i].ukids = ukids - (w-1);
panel_histo[w]++;
#ifdef PROFILE
Gstat->panstat[i].size = w;
++Gstat->num_panels;
#endif
pxgstrf_shared->fb_cols[i] = i;
i += w; /* move to the next panel */
} /* for i ... */
/* Dummy root */
pxgstrf_shared->pan_status[n].size = 1;
pxgstrf_shared->pan_status[n].state = UNREADY;
#if ( PRNTlevel==1 )
printf(".. Split: P %d, #nondomain panels %d\n", P, pxgstrf_shared->tasks_remain);
#endif
#ifdef DOMAINS
EnqueueDomains(&pxgstrf_shared->taskq, list_head, pxgstrf_shared);
#else
EnqueueRelaxSnode(&pxgstrf_shared->taskq, n, pxgstrf_relax, pxgstrf_shared);
#endif
#if ( PRNTlevel==1 )
printf(".. # tasks %d\n", pxgstrf_shared->tasks_remain);
fflush(stdout);
#endif
#ifdef PREDICT_OPT
/* Set up structure describing children */
for (i = 0; i <= n; cp_firstkid[i++] = EMPTY);
for (i = n-1; i >= 0; i--) {
dad = etree[i];
cp_nextkid[i] = cp_firstkid[dad];
cp_firstkid[dad] = i;
}
#endif
return 0;
} /* ParallelInit */
/*
* Free the storage used by the parallel scheduling algorithm.
*/
int ParallelFinalize(pxgstrf_shared_Base_t *pxgstrf_shared)
{
/* Destroy mutexes */
#if ( MACH==SUN )
register int i;
for (i = 0; i < NO_GLU_LOCKS; ++i)
mutex_destroy( &pxgstrf_shared->lu_locks[i] );
#elif ( MACH==DEC || MACH==PTHREAD )
register int i;
for (i = 0; i < NO_GLU_LOCKS; ++i)
pthread_mutex_destroy( &pxgstrf_shared->lu_locks[i] );
#endif
SUPERLU_FREE ((void*)pxgstrf_shared->lu_locks);
SUPERLU_FREE ((int*)pxgstrf_shared->spin_locks);
SUPERLU_FREE (pxgstrf_shared->pan_status);
SUPERLU_FREE (pxgstrf_shared->fb_cols);
//SUPERLU_FREE (pxgstrf_shared->Glu->map_in_sup); TODO: Ensure that map_in_sup gets deleted
queue_destroy(&pxgstrf_shared->taskq);
#if ( PRNTlevel==1 )
printf(".. # panel splittings %d\n", pxgstrf_shared->num_splits);
#endif
return 0;
}
int queue_init(queue_t *q, int n)
{
if ( n < 1 ) return (-1);
q->queue = (qitem_t *) SUPERLU_MALLOC(n*sizeof(qitem_t));
q->count = 0;
q->head = 0;
q->tail = 0;
return 0;
}
int queue_destroy(queue_t *q)
{
SUPERLU_FREE( q->queue );
return 0;
}
/*
* Return value: number of items in the queue
*/
int Enqueue(queue_t *q, qitem_t item)
{
q->queue[q->tail++] = item;
++q->count;
return (q->count);
}
/*
* Return value: >= 0 number of items in the queue
* = -1 queue is empty
*/
int Dequeue(queue_t *q, qitem_t *item)
{
if ( q->count <= 0 ) return EMPTY;
*item = q->queue[q->head++];
--q->count;
return (q->count);
}
int QueryQueue(queue_t *q)
{
register int i;
printf("Queue count: %d\n", q->count);
for (i = q->head; i < q->tail; ++i)
printf("%8d\titem %8d\n", i, q->queue[i]);
return 0;
}
int EnqueueRelaxSnode(queue_t *q, int n, pxgstrf_relax_t *pxgstrf_relax,
pxgstrf_shared_Base_t *pxgstrf_shared)
{
register int rs, j, m;
m = pxgstrf_relax[0].size;
for (rs = 1; rs <= m; ++rs) {
j = pxgstrf_relax[rs].fcol;
q->queue[q->tail++] = j;
q->count++;
++pxgstrf_shared->tasks_remain;
}
#if ( PRNTlevel==1 )
printf(".. EnqueueRelaxSnode(): count %d\n", q->count);
#endif
return 0;
}
/*
* Enqueue the initial independent domains.
* A pair of two numbers {root, fst_desc} is added in the queue.
*/
/*int EnqueueDomains(int P, queue_t *q, struct Branch **proc_domains_h)*/
int EnqueueDomains(queue_t *q, struct Branch *list_head,
pxgstrf_shared_Base_t *pxgstrf_shared)
{
struct Branch *b, *thrash;
/* for (pnum = 0; pnum < P; ++pnum) {
for (b = proc_domains_h[pnum]; b != NULL; ) {*/
b = list_head;
while ( b ) {
thrash = b;
q->queue[q->tail++] = b->root;
q->queue[q->tail++] = b->first_desc;
q->count = q->count + 2;
STATE ( b->root ) = CANGO;
++pxgstrf_shared->tasks_remain;
b = b->next;
SUPERLU_FREE (thrash);
}
printf("EnqueueDomains(): count %d\n", q->count);
return 0;
}
int NewNsuper(const int pnum, pxgstrf_shared_Base_t *pxgstrf_shared, int *data)
{
register int i;
mutex_t *lock = &pxgstrf_shared->lu_locks[NSUPER_LOCK];
Gstat_t *Gstat = pxgstrf_shared->Gstat;
#ifdef PROFILE
double t = SuperLU_timer_();
#endif
#if ( MACH==SUN )
mutex_lock(lock);
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_lock(lock);
#elif ( MACH==SGI || MACH==ORIGIN )
#pragma critical lock(lock)
#elif ( MACH==CRAY_PVP )
#pragma _CRI guard (*lock)
#endif
{
i = ++(*data);
}
#if ( MACH==SUN )
mutex_unlock(lock);
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_unlock(lock);
#elif ( MACH==CRAY_PVP )
#pragma _CRI endguard (*lock)
#endif
#ifdef PROFILE
Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t;
#endif
return i;
}
};
};
};