|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file cmemory.c
|
|
kusano |
7d535a |
* \brief Memory details
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 4.0) --
|
|
kusano |
7d535a |
* Lawrence Berkeley National Laboratory.
|
|
kusano |
7d535a |
* June 30, 2009
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
#include "slu_cdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Internal prototypes */
|
|
kusano |
7d535a |
void *cexpand (int *, MemType,int, int, GlobalLU_t *);
|
|
kusano |
7d535a |
int cLUWorkInit (int, int, int, int **, complex **, GlobalLU_t *);
|
|
kusano |
7d535a |
void copy_mem_complex (int, void *, void *);
|
|
kusano |
7d535a |
void cStackCompress (GlobalLU_t *);
|
|
kusano |
7d535a |
void cSetupSpace (void *, int, GlobalLU_t *);
|
|
kusano |
7d535a |
void *cuser_malloc (int, int, GlobalLU_t *);
|
|
kusano |
7d535a |
void cuser_free (int, int, GlobalLU_t *);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* External prototypes (in memory.c - prec-independent) */
|
|
kusano |
7d535a |
extern void copy_mem_int (int, void *, void *);
|
|
kusano |
7d535a |
extern void user_bcopy (char *, char *, int);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Macros to manipulate stack */
|
|
kusano |
7d535a |
#define StackFull(x) ( x + Glu->stack.used >= Glu->stack.size )
|
|
kusano |
7d535a |
#define NotDoubleAlign(addr) ( (long int)addr & 7 )
|
|
kusano |
7d535a |
#define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L )
|
|
kusano |
7d535a |
#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
|
|
kusano |
7d535a |
(w + 1) * m * sizeof(complex) )
|
|
kusano |
7d535a |
#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Setup the memory model to be used for factorization.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* lwork = 0: use system malloc;
|
|
kusano |
7d535a |
* lwork > 0: use user-supplied work[] space.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void cSetupSpace(void *work, int lwork, GlobalLU_t *Glu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
if ( lwork == 0 ) {
|
|
kusano |
7d535a |
Glu->MemModel = SYSTEM; /* malloc/free */
|
|
kusano |
7d535a |
} else if ( lwork > 0 ) {
|
|
kusano |
7d535a |
Glu->MemModel = USER; /* user provided space */
|
|
kusano |
7d535a |
Glu->stack.used = 0;
|
|
kusano |
7d535a |
Glu->stack.top1 = 0;
|
|
kusano |
7d535a |
Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */
|
|
kusano |
7d535a |
Glu->stack.size = Glu->stack.top2;
|
|
kusano |
7d535a |
Glu->stack.array = (void *) work;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
void *cuser_malloc(int bytes, int which_end, GlobalLU_t *Glu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
void *buf;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( StackFull(bytes) ) return (NULL);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( which_end == HEAD ) {
|
|
kusano |
7d535a |
buf = (char*) Glu->stack.array + Glu->stack.top1;
|
|
kusano |
7d535a |
Glu->stack.top1 += bytes;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
Glu->stack.top2 -= bytes;
|
|
kusano |
7d535a |
buf = (char*) Glu->stack.array + Glu->stack.top2;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Glu->stack.used += bytes;
|
|
kusano |
7d535a |
return buf;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
void cuser_free(int bytes, int which_end, GlobalLU_t *Glu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
if ( which_end == HEAD ) {
|
|
kusano |
7d535a |
Glu->stack.top1 -= bytes;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
Glu->stack.top2 += bytes;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
Glu->stack.used -= bytes;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* mem_usage consists of the following fields:
|
|
kusano |
7d535a |
* - for_lu (float)
|
|
kusano |
7d535a |
* The amount of space used in bytes for the L\U data structures.
|
|
kusano |
7d535a |
* - total_needed (float)
|
|
kusano |
7d535a |
* The amount of space needed in bytes to perform factorization.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
SCformat *Lstore;
|
|
kusano |
7d535a |
NCformat *Ustore;
|
|
kusano |
7d535a |
register int n, iword, dword, panel_size = sp_ienv(1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Lstore = L->Store;
|
|
kusano |
7d535a |
Ustore = U->Store;
|
|
kusano |
7d535a |
n = L->ncol;
|
|
kusano |
7d535a |
iword = sizeof(int);
|
|
kusano |
7d535a |
dword = sizeof(complex);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* For LU factors */
|
|
kusano |
7d535a |
mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword +
|
|
kusano |
7d535a |
Lstore->nzval_colptr[n] * dword +
|
|
kusano |
7d535a |
Lstore->rowind_colptr[n] * iword );
|
|
kusano |
7d535a |
mem_usage->for_lu += (float)( (n + 1.0) * iword +
|
|
kusano |
7d535a |
Ustore->colptr[n] * (dword + iword) );
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Working storage to support factorization */
|
|
kusano |
7d535a |
mem_usage->total_needed = mem_usage->for_lu +
|
|
kusano |
7d535a |
(float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword +
|
|
kusano |
7d535a |
(panel_size + 1.0) * n * dword );
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
} /* cQuerySpace */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* mem_usage consists of the following fields:
|
|
kusano |
7d535a |
* - for_lu (float)
|
|
kusano |
7d535a |
* The amount of space used in bytes for the L\U data structures.
|
|
kusano |
7d535a |
* - total_needed (float)
|
|
kusano |
7d535a |
* The amount of space needed in bytes to perform factorization.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int ilu_cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
SCformat *Lstore;
|
|
kusano |
7d535a |
NCformat *Ustore;
|
|
kusano |
7d535a |
register int n, panel_size = sp_ienv(1);
|
|
kusano |
7d535a |
register float iword, dword;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Lstore = L->Store;
|
|
kusano |
7d535a |
Ustore = U->Store;
|
|
kusano |
7d535a |
n = L->ncol;
|
|
kusano |
7d535a |
iword = sizeof(int);
|
|
kusano |
7d535a |
dword = sizeof(double);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* For LU factors */
|
|
kusano |
7d535a |
mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword +
|
|
kusano |
7d535a |
Lstore->nzval_colptr[n] * dword +
|
|
kusano |
7d535a |
Lstore->rowind_colptr[n] * iword );
|
|
kusano |
7d535a |
mem_usage->for_lu += (float)( (n + 1.0f) * iword +
|
|
kusano |
7d535a |
Ustore->colptr[n] * (dword + iword) );
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Working storage to support factorization.
|
|
kusano |
7d535a |
ILU needs 5*n more integers than LU */
|
|
kusano |
7d535a |
mem_usage->total_needed = mem_usage->for_lu +
|
|
kusano |
7d535a |
(float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword +
|
|
kusano |
7d535a |
(panel_size + 1.0f) * n * dword );
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
} /* ilu_cQuerySpace */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Allocate storage for the data structures common to all factor routines.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* For those unpredictable size, estimate as fill_ratio * nnz(A).
|
|
kusano |
7d535a |
* Return value:
|
|
kusano |
7d535a |
* If lwork = -1, return the estimated amount of space required, plus n;
|
|
kusano |
7d535a |
* otherwise, return the amount of space actually allocated when
|
|
kusano |
7d535a |
* memory allocation failure occurred.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
|
|
kusano |
7d535a |
int panel_size, float fill_ratio, SuperMatrix *L, SuperMatrix *U,
|
|
kusano |
7d535a |
GlobalLU_t *Glu, int **iwork, complex **dwork)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
int info, iword, dword;
|
|
kusano |
7d535a |
SCformat *Lstore;
|
|
kusano |
7d535a |
NCformat *Ustore;
|
|
kusano |
7d535a |
int *xsup, *supno;
|
|
kusano |
7d535a |
int *lsub, *xlsub;
|
|
kusano |
7d535a |
complex *lusup;
|
|
kusano |
7d535a |
int *xlusup;
|
|
kusano |
7d535a |
complex *ucol;
|
|
kusano |
7d535a |
int *usub, *xusub;
|
|
kusano |
7d535a |
int nzlmax, nzumax, nzlumax;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
iword = sizeof(int);
|
|
kusano |
7d535a |
dword = sizeof(complex);
|
|
kusano |
7d535a |
Glu->n = n;
|
|
kusano |
7d535a |
Glu->num_expansions = 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( !Glu->expanders )
|
|
kusano |
7d535a |
Glu->expanders = (ExpHeader*)SUPERLU_MALLOC( NO_MEMTYPE *
|
|
kusano |
7d535a |
sizeof(ExpHeader) );
|
|
kusano |
7d535a |
if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( fact != SamePattern_SameRowPerm ) {
|
|
kusano |
7d535a |
/* Guess for L\U factors */
|
|
kusano |
7d535a |
nzumax = nzlumax = fill_ratio * annz;
|
|
kusano |
7d535a |
nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( lwork == -1 ) {
|
|
kusano |
7d535a |
return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
|
|
kusano |
7d535a |
+ (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
cSetupSpace(work, lwork, Glu);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#if ( PRNTlevel >= 1 )
|
|
kusano |
7d535a |
printf("cLUMemInit() called: fill_ratio %.0f, nzlmax %ld, nzumax %ld\n",
|
|
kusano |
7d535a |
fill_ratio, nzlmax, nzumax);
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Integer pointers for L\U factors */
|
|
kusano |
7d535a |
if ( Glu->MemModel == SYSTEM ) {
|
|
kusano |
7d535a |
xsup = intMalloc(n+1);
|
|
kusano |
7d535a |
supno = intMalloc(n+1);
|
|
kusano |
7d535a |
xlsub = intMalloc(n+1);
|
|
kusano |
7d535a |
xlusup = intMalloc(n+1);
|
|
kusano |
7d535a |
xusub = intMalloc(n+1);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
xsup = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
|
|
kusano |
7d535a |
supno = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
|
|
kusano |
7d535a |
xlsub = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
|
|
kusano |
7d535a |
xlusup = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
|
|
kusano |
7d535a |
xusub = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
|
|
kusano |
7d535a |
ucol = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
|
|
kusano |
7d535a |
lsub = (int *) cexpand( &nzlmax, LSUB, 0, 0, Glu );
|
|
kusano |
7d535a |
usub = (int *) cexpand( &nzumax, USUB, 0, 1, Glu );
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
while ( !lusup || !ucol || !lsub || !usub ) {
|
|
kusano |
7d535a |
if ( Glu->MemModel == SYSTEM ) {
|
|
kusano |
7d535a |
SUPERLU_FREE(lusup);
|
|
kusano |
7d535a |
SUPERLU_FREE(ucol);
|
|
kusano |
7d535a |
SUPERLU_FREE(lsub);
|
|
kusano |
7d535a |
SUPERLU_FREE(usub);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
cuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword,
|
|
kusano |
7d535a |
HEAD, Glu);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
nzlumax /= 2;
|
|
kusano |
7d535a |
nzumax /= 2;
|
|
kusano |
7d535a |
nzlmax /= 2;
|
|
kusano |
7d535a |
if ( nzlumax < annz ) {
|
|
kusano |
7d535a |
printf("Not enough memory to perform factorization.\n");
|
|
kusano |
7d535a |
return (cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
#if ( PRNTlevel >= 1)
|
|
kusano |
7d535a |
printf("cLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
|
|
kusano |
7d535a |
nzlmax, nzumax);
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
|
|
kusano |
7d535a |
ucol = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
|
|
kusano |
7d535a |
lsub = (int *) cexpand( &nzlmax, LSUB, 0, 0, Glu );
|
|
kusano |
7d535a |
usub = (int *) cexpand( &nzumax, USUB, 0, 1, Glu );
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
/* fact == SamePattern_SameRowPerm */
|
|
kusano |
7d535a |
Lstore = L->Store;
|
|
kusano |
7d535a |
Ustore = U->Store;
|
|
kusano |
7d535a |
xsup = Lstore->sup_to_col;
|
|
kusano |
7d535a |
supno = Lstore->col_to_sup;
|
|
kusano |
7d535a |
xlsub = Lstore->rowind_colptr;
|
|
kusano |
7d535a |
xlusup = Lstore->nzval_colptr;
|
|
kusano |
7d535a |
xusub = Ustore->colptr;
|
|
kusano |
7d535a |
nzlmax = Glu->nzlmax; /* max from previous factorization */
|
|
kusano |
7d535a |
nzumax = Glu->nzumax;
|
|
kusano |
7d535a |
nzlumax = Glu->nzlumax;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( lwork == -1 ) {
|
|
kusano |
7d535a |
return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
|
|
kusano |
7d535a |
+ (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
|
|
kusano |
7d535a |
} else if ( lwork == 0 ) {
|
|
kusano |
7d535a |
Glu->MemModel = SYSTEM;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
Glu->MemModel = USER;
|
|
kusano |
7d535a |
Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */
|
|
kusano |
7d535a |
Glu->stack.size = Glu->stack.top2;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
lsub = Glu->expanders[LSUB].mem = Lstore->rowind;
|
|
kusano |
7d535a |
lusup = Glu->expanders[LUSUP].mem = Lstore->nzval;
|
|
kusano |
7d535a |
usub = Glu->expanders[USUB].mem = Ustore->rowind;
|
|
kusano |
7d535a |
ucol = Glu->expanders[UCOL].mem = Ustore->nzval;;
|
|
kusano |
7d535a |
Glu->expanders[LSUB].size = nzlmax;
|
|
kusano |
7d535a |
Glu->expanders[LUSUP].size = nzlumax;
|
|
kusano |
7d535a |
Glu->expanders[USUB].size = nzumax;
|
|
kusano |
7d535a |
Glu->expanders[UCOL].size = nzumax;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Glu->xsup = xsup;
|
|
kusano |
7d535a |
Glu->supno = supno;
|
|
kusano |
7d535a |
Glu->lsub = lsub;
|
|
kusano |
7d535a |
Glu->xlsub = xlsub;
|
|
kusano |
7d535a |
Glu->lusup = lusup;
|
|
kusano |
7d535a |
Glu->xlusup = xlusup;
|
|
kusano |
7d535a |
Glu->ucol = ucol;
|
|
kusano |
7d535a |
Glu->usub = usub;
|
|
kusano |
7d535a |
Glu->xusub = xusub;
|
|
kusano |
7d535a |
Glu->nzlmax = nzlmax;
|
|
kusano |
7d535a |
Glu->nzumax = nzumax;
|
|
kusano |
7d535a |
Glu->nzlumax = nzlumax;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
info = cLUWorkInit(m, n, panel_size, iwork, dwork, Glu);
|
|
kusano |
7d535a |
if ( info )
|
|
kusano |
7d535a |
return ( info + cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
++Glu->num_expansions;
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* cLUMemInit */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Allocate known working storage. Returns 0 if success, otherwise
|
|
kusano |
7d535a |
returns the number of bytes allocated so far when failure occurred. */
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
cLUWorkInit(int m, int n, int panel_size, int **iworkptr,
|
|
kusano |
7d535a |
complex **dworkptr, GlobalLU_t *Glu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
int isize, dsize, extra;
|
|
kusano |
7d535a |
complex *old_ptr;
|
|
kusano |
7d535a |
int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
|
|
kusano |
7d535a |
rowblk = sp_ienv(4);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
|
|
kusano |
7d535a |
dsize = (m * panel_size +
|
|
kusano |
7d535a |
NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(complex);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( Glu->MemModel == SYSTEM )
|
|
kusano |
7d535a |
*iworkptr = (int *) intCalloc(isize/sizeof(int));
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
*iworkptr = (int *) cuser_malloc(isize, TAIL, Glu);
|
|
kusano |
7d535a |
if ( ! *iworkptr ) {
|
|
kusano |
7d535a |
fprintf(stderr, "cLUWorkInit: malloc fails for local iworkptr[]\n");
|
|
kusano |
7d535a |
return (isize + n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( Glu->MemModel == SYSTEM )
|
|
kusano |
7d535a |
*dworkptr = (complex *) SUPERLU_MALLOC(dsize);
|
|
kusano |
7d535a |
else {
|
|
kusano |
7d535a |
*dworkptr = (complex *) cuser_malloc(dsize, TAIL, Glu);
|
|
kusano |
7d535a |
if ( NotDoubleAlign(*dworkptr) ) {
|
|
kusano |
7d535a |
old_ptr = *dworkptr;
|
|
kusano |
7d535a |
*dworkptr = (complex*) DoubleAlign(*dworkptr);
|
|
kusano |
7d535a |
*dworkptr = (complex*) ((double*)*dworkptr - 1);
|
|
kusano |
7d535a |
extra = (char*)old_ptr - (char*)*dworkptr;
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
printf("cLUWorkInit: not aligned, extra %d\n", extra);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
Glu->stack.top2 -= extra;
|
|
kusano |
7d535a |
Glu->stack.used += extra;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( ! *dworkptr ) {
|
|
kusano |
7d535a |
fprintf(stderr, "malloc fails for local dworkptr[].");
|
|
kusano |
7d535a |
return (isize + dsize + n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Set up pointers for real working arrays.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
cSetRWork(int m, int panel_size, complex *dworkptr,
|
|
kusano |
7d535a |
complex **dense, complex **tempv)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
complex zero = {0.0, 0.0};
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
|
|
kusano |
7d535a |
rowblk = sp_ienv(4);
|
|
kusano |
7d535a |
*dense = dworkptr;
|
|
kusano |
7d535a |
*tempv = *dense + panel_size*m;
|
|
kusano |
7d535a |
cfill (*dense, m * panel_size, zero);
|
|
kusano |
7d535a |
cfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Free the working storage used by factor routines.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void cLUWorkFree(int *iwork, complex *dwork, GlobalLU_t *Glu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
if ( Glu->MemModel == SYSTEM ) {
|
|
kusano |
7d535a |
SUPERLU_FREE (iwork);
|
|
kusano |
7d535a |
SUPERLU_FREE (dwork);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
Glu->stack.used -= (Glu->stack.size - Glu->stack.top2);
|
|
kusano |
7d535a |
Glu->stack.top2 = Glu->stack.size;
|
|
kusano |
7d535a |
/* cStackCompress(Glu); */
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
SUPERLU_FREE (Glu->expanders);
|
|
kusano |
7d535a |
Glu->expanders = NULL;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Expand the data structures for L and U during the factorization.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Return value: 0 - successful return
|
|
kusano |
7d535a |
* > 0 - number of bytes allocated when run out of space
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
cLUMemXpand(int jcol,
|
|
kusano |
7d535a |
int next, /* number of elements currently in the factors */
|
|
kusano |
7d535a |
MemType mem_type, /* which type of memory to expand */
|
|
kusano |
7d535a |
int *maxlen, /* modified - maximum length of a data structure */
|
|
kusano |
7d535a |
GlobalLU_t *Glu /* modified - global LU data structures */
|
|
kusano |
7d535a |
)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
void *new_mem;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
printf("cLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
|
|
kusano |
7d535a |
jcol, next, *maxlen, mem_type);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (mem_type == USUB)
|
|
kusano |
7d535a |
new_mem = cexpand(maxlen, mem_type, next, 1, Glu);
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
new_mem = cexpand(maxlen, mem_type, next, 0, Glu);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( !new_mem ) {
|
|
kusano |
7d535a |
int nzlmax = Glu->nzlmax;
|
|
kusano |
7d535a |
int nzumax = Glu->nzumax;
|
|
kusano |
7d535a |
int nzlumax = Glu->nzlumax;
|
|
kusano |
7d535a |
fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
|
|
kusano |
7d535a |
return (cmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
switch ( mem_type ) {
|
|
kusano |
7d535a |
case LUSUP:
|
|
kusano |
7d535a |
Glu->lusup = (complex *) new_mem;
|
|
kusano |
7d535a |
Glu->nzlumax = *maxlen;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case UCOL:
|
|
kusano |
7d535a |
Glu->ucol = (complex *) new_mem;
|
|
kusano |
7d535a |
Glu->nzumax = *maxlen;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case LSUB:
|
|
kusano |
7d535a |
Glu->lsub = (int *) new_mem;
|
|
kusano |
7d535a |
Glu->nzlmax = *maxlen;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case USUB:
|
|
kusano |
7d535a |
Glu->usub = (int *) new_mem;
|
|
kusano |
7d535a |
Glu->nzumax = *maxlen;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
copy_mem_complex(int howmany, void *old, void *new)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int i;
|
|
kusano |
7d535a |
complex *dold = old;
|
|
kusano |
7d535a |
complex *dnew = new;
|
|
kusano |
7d535a |
for (i = 0; i < howmany; i++) dnew[i] = dold[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Expand the existing storage to accommodate more fill-ins.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
*cexpand (
|
|
kusano |
7d535a |
int *prev_len, /* length used from previous call */
|
|
kusano |
7d535a |
MemType type, /* which part of the memory to expand */
|
|
kusano |
7d535a |
int len_to_copy, /* size of the memory to be copied to new store */
|
|
kusano |
7d535a |
int keep_prev, /* = 1: use prev_len;
|
|
kusano |
7d535a |
= 0: compute new_len to expand */
|
|
kusano |
7d535a |
GlobalLU_t *Glu /* modified - global LU data structures */
|
|
kusano |
7d535a |
)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
float EXPAND = 1.5;
|
|
kusano |
7d535a |
float alpha;
|
|
kusano |
7d535a |
void *new_mem, *old_mem;
|
|
kusano |
7d535a |
int new_len, tries, lword, extra, bytes_to_copy;
|
|
kusano |
7d535a |
ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
alpha = EXPAND;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( Glu->num_expansions == 0 || keep_prev ) {
|
|
kusano |
7d535a |
/* First time allocate requested */
|
|
kusano |
7d535a |
new_len = *prev_len;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
new_len = alpha * *prev_len;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( type == LSUB || type == USUB ) lword = sizeof(int);
|
|
kusano |
7d535a |
else lword = sizeof(complex);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( Glu->MemModel == SYSTEM ) {
|
|
kusano |
7d535a |
new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
|
|
kusano |
7d535a |
if ( Glu->num_expansions != 0 ) {
|
|
kusano |
7d535a |
tries = 0;
|
|
kusano |
7d535a |
if ( keep_prev ) {
|
|
kusano |
7d535a |
if ( !new_mem ) return (NULL);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
while ( !new_mem ) {
|
|
kusano |
7d535a |
if ( ++tries > 10 ) return (NULL);
|
|
kusano |
7d535a |
alpha = Reduce(alpha);
|
|
kusano |
7d535a |
new_len = alpha * *prev_len;
|
|
kusano |
7d535a |
new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( type == LSUB || type == USUB ) {
|
|
kusano |
7d535a |
copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
copy_mem_complex(len_to_copy, expanders[type].mem, new_mem);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
SUPERLU_FREE (expanders[type].mem);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
expanders[type].mem = (void *) new_mem;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} else { /* MemModel == USER */
|
|
kusano |
7d535a |
if ( Glu->num_expansions == 0 ) {
|
|
kusano |
7d535a |
new_mem = cuser_malloc(new_len * lword, HEAD, Glu);
|
|
kusano |
7d535a |
if ( NotDoubleAlign(new_mem) &&
|
|
kusano |
7d535a |
(type == LUSUP || type == UCOL) ) {
|
|
kusano |
7d535a |
old_mem = new_mem;
|
|
kusano |
7d535a |
new_mem = (void *)DoubleAlign(new_mem);
|
|
kusano |
7d535a |
extra = (char*)new_mem - (char*)old_mem;
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
printf("expand(): not aligned, extra %d\n", extra);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
Glu->stack.top1 += extra;
|
|
kusano |
7d535a |
Glu->stack.used += extra;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
expanders[type].mem = (void *) new_mem;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
tries = 0;
|
|
kusano |
7d535a |
extra = (new_len - *prev_len) * lword;
|
|
kusano |
7d535a |
if ( keep_prev ) {
|
|
kusano |
7d535a |
if ( StackFull(extra) ) return (NULL);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
while ( StackFull(extra) ) {
|
|
kusano |
7d535a |
if ( ++tries > 10 ) return (NULL);
|
|
kusano |
7d535a |
alpha = Reduce(alpha);
|
|
kusano |
7d535a |
new_len = alpha * *prev_len;
|
|
kusano |
7d535a |
extra = (new_len - *prev_len) * lword;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( type != USUB ) {
|
|
kusano |
7d535a |
new_mem = (void*)((char*)expanders[type + 1].mem + extra);
|
|
kusano |
7d535a |
bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1
|
|
kusano |
7d535a |
- (char*)expanders[type + 1].mem;
|
|
kusano |
7d535a |
user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( type < USUB ) {
|
|
kusano |
7d535a |
Glu->usub = expanders[USUB].mem =
|
|
kusano |
7d535a |
(void*)((char*)expanders[USUB].mem + extra);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( type < LSUB ) {
|
|
kusano |
7d535a |
Glu->lsub = expanders[LSUB].mem =
|
|
kusano |
7d535a |
(void*)((char*)expanders[LSUB].mem + extra);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( type < UCOL ) {
|
|
kusano |
7d535a |
Glu->ucol = expanders[UCOL].mem =
|
|
kusano |
7d535a |
(void*)((char*)expanders[UCOL].mem + extra);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
Glu->stack.top1 += extra;
|
|
kusano |
7d535a |
Glu->stack.used += extra;
|
|
kusano |
7d535a |
if ( type == UCOL ) {
|
|
kusano |
7d535a |
Glu->stack.top1 += extra; /* Add same amount for USUB */
|
|
kusano |
7d535a |
Glu->stack.used += extra;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* if ... */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* else ... */
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
expanders[type].size = new_len;
|
|
kusano |
7d535a |
*prev_len = new_len;
|
|
kusano |
7d535a |
if ( Glu->num_expansions ) ++Glu->num_expansions;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return (void *) expanders[type].mem;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* cexpand */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Compress the work[] array to remove fragmentation.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
cStackCompress(GlobalLU_t *Glu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int iword, dword, ndim;
|
|
kusano |
7d535a |
char *last, *fragment;
|
|
kusano |
7d535a |
int *ifrom, *ito;
|
|
kusano |
7d535a |
complex *dfrom, *dto;
|
|
kusano |
7d535a |
int *xlsub, *lsub, *xusub, *usub, *xlusup;
|
|
kusano |
7d535a |
complex *ucol, *lusup;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
iword = sizeof(int);
|
|
kusano |
7d535a |
dword = sizeof(complex);
|
|
kusano |
7d535a |
ndim = Glu->n;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
xlsub = Glu->xlsub;
|
|
kusano |
7d535a |
lsub = Glu->lsub;
|
|
kusano |
7d535a |
xusub = Glu->xusub;
|
|
kusano |
7d535a |
usub = Glu->usub;
|
|
kusano |
7d535a |
xlusup = Glu->xlusup;
|
|
kusano |
7d535a |
ucol = Glu->ucol;
|
|
kusano |
7d535a |
lusup = Glu->lusup;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
dfrom = ucol;
|
|
kusano |
7d535a |
dto = (complex *)((char*)lusup + xlusup[ndim] * dword);
|
|
kusano |
7d535a |
copy_mem_complex(xusub[ndim], dfrom, dto);
|
|
kusano |
7d535a |
ucol = dto;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
ifrom = lsub;
|
|
kusano |
7d535a |
ito = (int *) ((char*)ucol + xusub[ndim] * iword);
|
|
kusano |
7d535a |
copy_mem_int(xlsub[ndim], ifrom, ito);
|
|
kusano |
7d535a |
lsub = ito;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
ifrom = usub;
|
|
kusano |
7d535a |
ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
|
|
kusano |
7d535a |
copy_mem_int(xusub[ndim], ifrom, ito);
|
|
kusano |
7d535a |
usub = ito;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
last = (char*)usub + xusub[ndim] * iword;
|
|
kusano |
7d535a |
fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last);
|
|
kusano |
7d535a |
Glu->stack.used -= (long int) fragment;
|
|
kusano |
7d535a |
Glu->stack.top1 -= (long int) fragment;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Glu->ucol = ucol;
|
|
kusano |
7d535a |
Glu->lsub = lsub;
|
|
kusano |
7d535a |
Glu->usub = usub;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
printf("cStackCompress: fragment %d\n", fragment);
|
|
kusano |
7d535a |
/* for (last = 0; last < ndim; ++last)
|
|
kusano |
7d535a |
print_lu_col("After compress:", last, 0);*/
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Allocate storage for original matrix A
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
callocateA(int n, int nnz, complex **a, int **asub, int **xa)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
*a = (complex *) complexMalloc(nnz);
|
|
kusano |
7d535a |
*asub = (int *) intMalloc(nnz);
|
|
kusano |
7d535a |
*xa = (int *) intMalloc(n+1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
complex *complexMalloc(int n)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
complex *buf;
|
|
kusano |
7d535a |
buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
|
|
kusano |
7d535a |
if ( !buf ) {
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC failed for buf in complexMalloc()\n");
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
return (buf);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
complex *complexCalloc(int n)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
complex *buf;
|
|
kusano |
7d535a |
register int i;
|
|
kusano |
7d535a |
complex zero = {0.0, 0.0};
|
|
kusano |
7d535a |
buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
|
|
kusano |
7d535a |
if ( !buf ) {
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC failed for buf in complexCalloc()\n");
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
for (i = 0; i < n; ++i) buf[i] = zero;
|
|
kusano |
7d535a |
return (buf);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int cmemory_usage(const int nzlmax, const int nzumax,
|
|
kusano |
7d535a |
const int nzlumax, const int n)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int iword, dword;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
iword = sizeof(int);
|
|
kusano |
7d535a |
dword = sizeof(complex);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return (10 * n * iword +
|
|
kusano |
7d535a |
nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|