|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 3.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* October 15, 2003
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include "slu_sdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#define HANDLE_SIZE 8
|
|
kusano |
7d535a |
/* kind of integer to hold a pointer. Use int.
|
|
kusano |
7d535a |
This might need to be changed on 64-bit systems. */
|
|
kusano |
7d535a |
typedef int fptr; /* 32-bit by default */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
typedef struct {
|
|
kusano |
7d535a |
SuperMatrix *L;
|
|
kusano |
7d535a |
SuperMatrix *U;
|
|
kusano |
7d535a |
int *perm_c;
|
|
kusano |
7d535a |
int *perm_r;
|
|
kusano |
7d535a |
} factors_t;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
c_fortran_sgssv_(int *iopt, int *n, int *nnz, int *nrhs,
|
|
kusano |
7d535a |
float *values, int *rowind, int *colptr,
|
|
kusano |
7d535a |
float *b, int *ldb,
|
|
kusano |
7d535a |
fptr *f_factors, /* a handle containing the address
|
|
kusano |
7d535a |
pointing to the factored matrices */
|
|
kusano |
7d535a |
int *info)
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* This routine can be called from Fortran.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* iopt (input) int
|
|
kusano |
7d535a |
* Specifies the operation:
|
|
kusano |
7d535a |
* = 1, performs LU decomposition for the first time
|
|
kusano |
7d535a |
* = 2, performs triangular solve
|
|
kusano |
7d535a |
* = 3, free all the storage in the end
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* f_factors (input/output) fptr*
|
|
kusano |
7d535a |
* If iopt == 1, it is an output and contains the pointer pointing to
|
|
kusano |
7d535a |
* the structure of the factored matrices.
|
|
kusano |
7d535a |
* Otherwise, it it an input.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
SuperMatrix A, AC, B;
|
|
kusano |
7d535a |
SuperMatrix *L, *U;
|
|
kusano |
7d535a |
int *perm_r; /* row permutations from partial pivoting */
|
|
kusano |
7d535a |
int *perm_c; /* column permutation vector */
|
|
kusano |
7d535a |
int *etree; /* column elimination tree */
|
|
kusano |
7d535a |
SCformat *Lstore;
|
|
kusano |
7d535a |
NCformat *Ustore;
|
|
kusano |
7d535a |
int i, panel_size, permc_spec, relax;
|
|
kusano |
7d535a |
trans_t trans;
|
|
kusano |
7d535a |
mem_usage_t mem_usage;
|
|
kusano |
7d535a |
superlu_options_t options;
|
|
kusano |
7d535a |
SuperLUStat_t stat;
|
|
kusano |
7d535a |
factors_t *LUfactors;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
trans = NOTRANS;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( *iopt == 1 ) { /* LU decomposition */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Set the default input options. */
|
|
kusano |
7d535a |
set_default_options(&options);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Initialize the statistics variables. */
|
|
kusano |
7d535a |
StatInit(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Adjust to 0-based indexing */
|
|
kusano |
7d535a |
for (i = 0; i < *nnz; ++i) --rowind[i];
|
|
kusano |
7d535a |
for (i = 0; i <= *n; ++i) --colptr[i];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
sCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
|
|
kusano |
7d535a |
SLU_NC, SLU_S, SLU_GE);
|
|
kusano |
7d535a |
L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
|
|
kusano |
7d535a |
U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
|
|
kusano |
7d535a |
if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[].");
|
|
kusano |
7d535a |
if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[].");
|
|
kusano |
7d535a |
if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[].");
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Get column permutation vector perm_c[], according to permc_spec:
|
|
kusano |
7d535a |
* permc_spec = 0: natural ordering
|
|
kusano |
7d535a |
* permc_spec = 1: minimum degree on structure of A'*A
|
|
kusano |
7d535a |
* permc_spec = 2: minimum degree on structure of A'+A
|
|
kusano |
7d535a |
* permc_spec = 3: approximate minimum degree for unsymmetric matrices
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
permc_spec = options.ColPerm;
|
|
kusano |
7d535a |
get_perm_c(permc_spec, &A, perm_c);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
sp_preorder(&options, &A, perm_c, etree, &AC);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
panel_size = sp_ienv(1);
|
|
kusano |
7d535a |
relax = sp_ienv(2);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
sgstrf(&options, &AC, relax, panel_size, etree,
|
|
kusano |
7d535a |
NULL, 0, perm_c, perm_r, L, U, &stat, info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( *info == 0 ) {
|
|
kusano |
7d535a |
Lstore = (SCformat *) L->Store;
|
|
kusano |
7d535a |
Ustore = (NCformat *) U->Store;
|
|
kusano |
7d535a |
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
|
|
kusano |
7d535a |
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
|
|
kusano |
7d535a |
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
|
|
kusano |
7d535a |
sQuerySpace(L, U, &mem_usage);
|
|
kusano |
7d535a |
printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
|
|
kusano |
7d535a |
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
printf("sgstrf() error returns INFO= %d\n", *info);
|
|
kusano |
7d535a |
if ( *info <= *n ) { /* factorization completes */
|
|
kusano |
7d535a |
sQuerySpace(L, U, &mem_usage);
|
|
kusano |
7d535a |
printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
|
|
kusano |
7d535a |
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Restore to 1-based indexing */
|
|
kusano |
7d535a |
for (i = 0; i < *nnz; ++i) ++rowind[i];
|
|
kusano |
7d535a |
for (i = 0; i <= *n; ++i) ++colptr[i];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Save the LU factors in the factors handle */
|
|
kusano |
7d535a |
LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t));
|
|
kusano |
7d535a |
LUfactors->L = L;
|
|
kusano |
7d535a |
LUfactors->U = U;
|
|
kusano |
7d535a |
LUfactors->perm_c = perm_c;
|
|
kusano |
7d535a |
LUfactors->perm_r = perm_r;
|
|
kusano |
7d535a |
*f_factors = (fptr) LUfactors;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Free un-wanted storage */
|
|
kusano |
7d535a |
SUPERLU_FREE(etree);
|
|
kusano |
7d535a |
Destroy_SuperMatrix_Store(&A);
|
|
kusano |
7d535a |
Destroy_CompCol_Permuted(&AC);
|
|
kusano |
7d535a |
StatFree(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} else if ( *iopt == 2 ) { /* Triangular solve */
|
|
kusano |
7d535a |
/* Initialize the statistics variables. */
|
|
kusano |
7d535a |
StatInit(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Extract the LU factors in the factors handle */
|
|
kusano |
7d535a |
LUfactors = (factors_t*) *f_factors;
|
|
kusano |
7d535a |
L = LUfactors->L;
|
|
kusano |
7d535a |
U = LUfactors->U;
|
|
kusano |
7d535a |
perm_c = LUfactors->perm_c;
|
|
kusano |
7d535a |
perm_r = LUfactors->perm_r;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
sCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_S, SLU_GE);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Solve the system A*X=B, overwriting B with X. */
|
|
kusano |
7d535a |
sgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Destroy_SuperMatrix_Store(&B);
|
|
kusano |
7d535a |
StatFree(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} else if ( *iopt == 3 ) { /* Free storage */
|
|
kusano |
7d535a |
/* Free the LU factors in the factors handle */
|
|
kusano |
7d535a |
LUfactors = (factors_t*) *f_factors;
|
|
kusano |
7d535a |
SUPERLU_FREE (LUfactors->perm_r);
|
|
kusano |
7d535a |
SUPERLU_FREE (LUfactors->perm_c);
|
|
kusano |
7d535a |
Destroy_SuperNode_Matrix(LUfactors->L);
|
|
kusano |
7d535a |
Destroy_CompCol_Matrix(LUfactors->U);
|
|
kusano |
7d535a |
SUPERLU_FREE (LUfactors->L);
|
|
kusano |
7d535a |
SUPERLU_FREE (LUfactors->U);
|
|
kusano |
7d535a |
SUPERLU_FREE (LUfactors);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
fprintf(stderr,"Invalid iopt=%d passed to c_fortran_sgssv()\n",*iopt);
|
|
kusano |
7d535a |
exit(-1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|