/** @file slu_util.h
* \brief Utility header file
*
* -- SuperLU routine (version 4.1) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November, 2010
*
*/
#ifndef __SUPERLU_UTIL /* allow multiple inclusions */
#define __SUPERLU_UTIL
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
#ifndef __STDC__
#include <malloc.h>
#endif
*/
#include <assert.h>
#include "superlu_enum_consts.h"
/***********************************************************************
* Macros
***********************************************************************/
#define FIRSTCOL_OF_SNODE(i) (xsup[i])
/* No of marker arrays used in the symbolic factorization,
each of size n */
#define NO_MARKER 3
#define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) )
#ifndef USER_ABORT
#define USER_ABORT(msg) superlu_abort_and_exit(msg)
#endif
#define ABORT(err_msg) \
{ char msg[256];\
sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
USER_ABORT(msg); }
#ifndef USER_MALLOC
#if 1
#define USER_MALLOC(size) superlu_malloc(size)
#else
/* The following may check out some uninitialized data */
#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
#endif
#endif
#define SUPERLU_MALLOC(size) USER_MALLOC(size)
#ifndef USER_FREE
#define USER_FREE(addr) superlu_free(addr)
#endif
#define SUPERLU_FREE(addr) USER_FREE(addr)
#define CHECK_MALLOC(where) { \
extern int superlu_malloc_total; \
printf("%s: malloc_total %d Bytes\n", \
where, superlu_malloc_total); \
}
#define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) )
#define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) )
/*********************************************************
* Macros used for easy access of sparse matrix entries. *
*********************************************************/
#define L_SUB_START(col) ( Lstore->rowind_colptr[col] )
#define L_SUB(ptr) ( Lstore->rowind[ptr] )
#define L_NZ_START(col) ( Lstore->nzval_colptr[col] )
#define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] )
#define U_NZ_START(col) ( Ustore->colptr[col] )
#define U_SUB(ptr) ( Ustore->rowind[ptr] )
/***********************************************************************
* Constants
***********************************************************************/
#define EMPTY (-1)
/*#define NO (-1)*/
#define FALSE 0
#define TRUE 1
#define NO_MEMTYPE 4 /* 0: lusup;
1: ucol;
2: lsub;
3: usub */
#define GluIntArray(n) (5 * (n) + 5)
/* Dropping rules */
#define NODROP ( 0x0000 )
#define DROP_BASIC ( 0x0001 ) /* ILU(tau) */
#define DROP_PROWS ( 0x0002 ) /* ILUTP: keep p maximum rows */
#define DROP_COLUMN ( 0x0004 ) /* ILUTP: for j-th column,
p = gamma * nnz(A(:,j)) */
#define DROP_AREA ( 0x0008 ) /* ILUTP: for j-th column, use
nnz(F(:,1:j)) / nnz(A(:,1:j))
to limit memory growth */
#define DROP_SECONDARY ( 0x000E ) /* PROWS | COLUMN | AREA */
#define DROP_DYNAMIC ( 0x0010 ) /* adaptive tau */
#define DROP_INTERP ( 0x0100 ) /* use interpolation */
#if 1
#define MILU_ALPHA (1.0e-2) /* multiple of drop_sum to be added to diagonal */
#else
#define MILU_ALPHA 1.0 /* multiple of drop_sum to be added to diagonal */
#endif
/***********************************************************************
* Type definitions
***********************************************************************/
typedef float flops_t;
typedef unsigned char Logical;
/*
*-- This contains the options used to control the solution process.
*
* Fact (fact_t)
* Specifies whether or not the factored form of the matrix
* A is supplied on entry, and if not, how the matrix A should
* be factorizaed.
* = DOFACT: The matrix A will be factorized from scratch, and the
* factors will be stored in L and U.
* = SamePattern: The matrix A will be factorized assuming
* that a factorization of a matrix with the same sparsity
* pattern was performed prior to this one. Therefore, this
* factorization will reuse column permutation vector
* ScalePermstruct->perm_c and the column elimination tree
* LUstruct->etree.
* = SamePattern_SameRowPerm: The matrix A will be factorized
* assuming that a factorization of a matrix with the same
* sparsity pattern and similar numerical values was performed
* prior to this one. Therefore, this factorization will reuse
* both row and column scaling factors R and C, both row and
* column permutation vectors perm_r and perm_c, and the
* data structure set up from the previous symbolic factorization.
* = FACTORED: On entry, L, U, perm_r and perm_c contain the
* factored form of A. If DiagScale is not NOEQUIL, the matrix
* A has been equilibrated with scaling factors R and C.
*
* Equil (yes_no_t)
* Specifies whether to equilibrate the system (scale A's row and
* columns to have unit norm).
*
* ColPerm (colperm_t)
* Specifies what type of column permutation to use to reduce fill.
* = NATURAL: use the natural ordering
* = MMD_ATA: use minimum degree ordering on structure of A'*A
* = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
* = COLAMD: use approximate minimum degree column ordering
* = MY_PERMC: use the ordering specified by the user
*
* Trans (trans_t)
* Specifies the form of the system of equations:
* = NOTRANS: A * X = B (No transpose)
* = TRANS: A**T * X = B (Transpose)
* = CONJ: A**H * X = B (Transpose)
*
* IterRefine (IterRefine_t)
* Specifies whether to perform iterative refinement.
* = NO: no iterative refinement
* = SINGLE: perform iterative refinement in single precision
* = DOUBLE: perform iterative refinement in double precision
* = EXTRA: perform iterative refinement in extra precision
*
* DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
* Specifies the threshold used for a diagonal entry to be an
* acceptable pivot.
*
* SymmetricMode (yest_no_t)
* Specifies whether to use symmetric mode. Symmetric mode gives
* preference to diagonal pivots, and uses an (A'+A)-based column
* permutation algorithm.
*
* PivotGrowth (yes_no_t)
* Specifies whether to compute the reciprocal pivot growth.
*
* ConditionNumber (ues_no_t)
* Specifies whether to compute the reciprocal condition number.
*
* RowPerm (rowperm_t) (only for SuperLU_DIST or ILU)
* Specifies whether to permute rows of the original matrix.
* = NO: not to permute the rows
* = LargeDiag: make the diagonal large relative to the off-diagonal
* = MY_PERMR: use the permutation given by the user
*
* ILU_DropRule (int)
* Specifies the dropping rule:
* = DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau).
* = DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n.
* = DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column,
* p = gamma * nnz(A(:,j)).
* = DROP_AREA: Variation of ILUTP, for j-th column, use
* nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
* = DROP_DYNAMIC: Modify the threshold tau during factorizaion:
* If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
* tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
* Otherwise
* tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
* tau_U(j) uses the similar rule.
* NOTE: the thresholds used by L and U are separate.
* = DROP_INTERP: Compute the second dropping threshold by
* interpolation instead of sorting (default).
* In this case, the actual fill ratio is not
* guaranteed to be smaller than gamma.
* Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
* ( Default: DROP_BASIC | DROP_AREA )
*
* ILU_DropTol (double)
* numerical threshold for dropping.
*
* ILU_FillFactor (double)
* Gamma in the secondary dropping.
*
* ILU_Norm (norm_t)
* Specify which norm to use to measure the row size in a
* supernode: infinity-norm, 1-norm, or 2-norm.
*
* ILU_FillTol (double)
* numerical threshold for zero pivot perturbation.
*
* ILU_MILU (milu_t)
* Specifies which version of MILU to use.
*
* ILU_MILU_Dim (double)
* Dimension of the PDE if available.
*
* ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
* Specifies whether to replace the tiny diagonals by
* sqrt(epsilon)*||A|| during LU factorization.
*
* SolveInitialized (yes_no_t) (only for SuperLU_DIST)
* Specifies whether the initialization has been performed to the
* triangular solve.
*
* RefineInitialized (yes_no_t) (only for SuperLU_DIST)
* Specifies whether the initialization has been performed to the
* sparse matrix-vector multiplication routine needed in iterative
* refinement.
*
* PrintStat (yes_no_t)
* Specifies whether to print the solver's statistics.
*/
typedef struct {
fact_t Fact;
yes_no_t Equil;
colperm_t ColPerm;
trans_t Trans;
IterRefine_t IterRefine;
double DiagPivotThresh;
yes_no_t SymmetricMode;
yes_no_t PivotGrowth;
yes_no_t ConditionNumber;
rowperm_t RowPerm;
int ILU_DropRule;
double ILU_DropTol; /* threshold for dropping */
double ILU_FillFactor; /* gamma in the secondary dropping */
norm_t ILU_Norm; /* infinity-norm, 1-norm, or 2-norm */
double ILU_FillTol; /* threshold for zero pivot perturbation */
milu_t ILU_MILU;
double ILU_MILU_Dim; /* Dimension of PDE (if available) */
yes_no_t ParSymbFact;
yes_no_t ReplaceTinyPivot; /* used in SuperLU_DIST */
yes_no_t SolveInitialized;
yes_no_t RefineInitialized;
yes_no_t PrintStat;
} superlu_options_t;
/*! \brief Headers for 4 types of dynamatically managed memory */
typedef struct e_node {
int size; /* length of the memory that has been used */
void *mem; /* pointer to the new malloc'd store */
} ExpHeader;
typedef struct {
int size;
int used;
int top1; /* grow upward, relative to &array[0] */
int top2; /* grow downward */
void *array;
} LU_stack_t;
typedef struct {
int *panel_histo; /* histogram of panel size distribution */
double *utime; /* running time at various phases */
flops_t *ops; /* operation count at various phases */
int TinyPivots; /* number of tiny pivots */
int RefineSteps; /* number of iterative refinement steps */
int expansions; /* number of memory expansions */
} SuperLUStat_t;
typedef struct {
float for_lu;
float total_needed;
} mem_usage_t;
/***********************************************************************
* Prototypes
***********************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
extern void Destroy_SuperMatrix_Store(SuperMatrix *);
extern void Destroy_CompCol_Matrix(SuperMatrix *);
extern void Destroy_CompRow_Matrix(SuperMatrix *);
extern void Destroy_SuperNode_Matrix(SuperMatrix *);
extern void Destroy_CompCol_Permuted(SuperMatrix *);
extern void Destroy_Dense_Matrix(SuperMatrix *);
extern void get_perm_c(int, SuperMatrix *, int *);
extern void set_default_options(superlu_options_t *options);
extern void ilu_set_default_options(superlu_options_t *options);
extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
SuperMatrix*);
extern void superlu_abort_and_exit(char*);
extern void *superlu_malloc (size_t);
extern int *intMalloc (int);
extern int *intCalloc (int);
extern void superlu_free (void*);
extern void SetIWork (int, int, int, int *, int **, int **, int **,
int **, int **, int **, int **);
extern int sp_coletree (int *, int *, int *, int, int, int *);
extern void relax_snode (const int, int *, const int, int *, int *);
extern void heap_relax_snode (const int, int *, const int, int *, int *);
extern int mark_relax(int, int *, int *, int *, int *, int *, int *);
extern void ilu_relax_snode (const int, int *, const int, int *,
int *, int *);
extern void ilu_heap_relax_snode (const int, int *, const int, int *,
int *, int*);
extern void resetrep_col (const int, const int *, int *);
extern int spcoletree (int *, int *, int *, int, int, int *);
extern int *TreePostorder (int, int *);
extern double SuperLU_timer_ ();
extern int sp_ienv (int);
extern int lsame_ (char *, char *);
extern int xerbla_ (char *, int *);
extern void ifill (int *, int, int);
extern void snode_profile (int, int *);
extern void super_stats (int, int *);
extern void check_repfnz(int, int, int, int *);
extern void PrintSumm (char *, int, int, int);
extern void StatInit(SuperLUStat_t *);
extern void StatPrint (SuperLUStat_t *);
extern void StatFree(SuperLUStat_t *);
extern void print_panel_seg(int, int, int, int, int *, int *);
extern int print_int_vec(char *,int, int *);
extern int slu_PrintInt10(char *, int, int *);
#ifdef __cplusplus
}
#endif
#endif /* __SUPERLU_UTIL */