kusano 7d535a
/** @file slu_util.h
kusano 7d535a
 * \brief Utility header file 
kusano 7d535a
 *
kusano 7d535a
 * -- SuperLU routine (version 4.1) --
kusano 7d535a
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
kusano 7d535a
 * and Lawrence Berkeley National Lab.
kusano 7d535a
 * November, 2010
kusano 7d535a
 *
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
#ifndef __SUPERLU_UTIL /* allow multiple inclusions */
kusano 7d535a
#define __SUPERLU_UTIL
kusano 7d535a
kusano 7d535a
#include <stdio.h></stdio.h>
kusano 7d535a
#include <stdlib.h></stdlib.h>
kusano 7d535a
#include <string.h></string.h>
kusano 7d535a
/*
kusano 7d535a
#ifndef __STDC__
kusano 7d535a
#include <malloc.h></malloc.h>
kusano 7d535a
#endif
kusano 7d535a
*/
kusano 7d535a
#include <assert.h></assert.h>
kusano 7d535a
#include "superlu_enum_consts.h"
kusano 7d535a
kusano 7d535a
/***********************************************************************
kusano 7d535a
 * Macros
kusano 7d535a
 ***********************************************************************/
kusano 7d535a
#define FIRSTCOL_OF_SNODE(i)	(xsup[i])
kusano 7d535a
/* No of marker arrays used in the symbolic factorization,
kusano 7d535a
   each of size n */
kusano 7d535a
#define NO_MARKER     3
kusano 7d535a
#define NUM_TEMPV(m,w,t,b)  ( SUPERLU_MAX(m, (t + b)*w) )
kusano 7d535a
kusano 7d535a
#ifndef USER_ABORT
kusano 7d535a
#define USER_ABORT(msg) superlu_abort_and_exit(msg)
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
#define ABORT(err_msg) \
kusano 7d535a
 { char msg[256];\
kusano 7d535a
   sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
kusano 7d535a
   USER_ABORT(msg); }
kusano 7d535a
kusano 7d535a
kusano 7d535a
#ifndef USER_MALLOC
kusano 7d535a
#if 1
kusano 7d535a
#define USER_MALLOC(size) superlu_malloc(size)
kusano 7d535a
#else
kusano 7d535a
/* The following may check out some uninitialized data */
kusano 7d535a
#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
kusano 7d535a
#endif
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
#define SUPERLU_MALLOC(size) USER_MALLOC(size)
kusano 7d535a
kusano 7d535a
#ifndef USER_FREE
kusano 7d535a
#define USER_FREE(addr) superlu_free(addr)
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
#define SUPERLU_FREE(addr) USER_FREE(addr)
kusano 7d535a
kusano 7d535a
#define CHECK_MALLOC(where) {                 \
kusano 7d535a
    extern int superlu_malloc_total;        \
kusano 7d535a
    printf("%s: malloc_total %d Bytes\n",     \
kusano 7d535a
	   where, superlu_malloc_total); \
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
#define SUPERLU_MAX(x, y) 	( (x) > (y) ? (x) : (y) )
kusano 7d535a
#define SUPERLU_MIN(x, y) 	( (x) < (y) ? (x) : (y) )
kusano 7d535a
kusano 7d535a
/*********************************************************
kusano 7d535a
 * Macros used for easy access of sparse matrix entries. *
kusano 7d535a
 *********************************************************/
kusano 7d535a
#define L_SUB_START(col)     ( Lstore->rowind_colptr[col] )
kusano 7d535a
#define L_SUB(ptr)           ( Lstore->rowind[ptr] )
kusano 7d535a
#define L_NZ_START(col)      ( Lstore->nzval_colptr[col] )
kusano 7d535a
#define L_FST_SUPC(superno)  ( Lstore->sup_to_col[superno] )
kusano 7d535a
#define U_NZ_START(col)      ( Ustore->colptr[col] )
kusano 7d535a
#define U_SUB(ptr)           ( Ustore->rowind[ptr] )
kusano 7d535a
kusano 7d535a
kusano 7d535a
/***********************************************************************
kusano 7d535a
 * Constants 
kusano 7d535a
 ***********************************************************************/
kusano 7d535a
#define EMPTY	(-1)
kusano 7d535a
/*#define NO	(-1)*/
kusano 7d535a
#define FALSE	0
kusano 7d535a
#define TRUE	1
kusano 7d535a
kusano 7d535a
#define NO_MEMTYPE  4      /* 0: lusup;
kusano 7d535a
			      1: ucol;
kusano 7d535a
			      2: lsub;
kusano 7d535a
			      3: usub */
kusano 7d535a
kusano 7d535a
#define GluIntArray(n)   (5 * (n) + 5)
kusano 7d535a
kusano 7d535a
/* Dropping rules */
kusano 7d535a
#define  NODROP	        ( 0x0000 )
kusano 7d535a
#define	 DROP_BASIC	( 0x0001 )  /* ILU(tau) */
kusano 7d535a
#define  DROP_PROWS	( 0x0002 )  /* ILUTP: keep p maximum rows */
kusano 7d535a
#define  DROP_COLUMN	( 0x0004 )  /* ILUTP: for j-th column, 
kusano 7d535a
				              p = gamma * nnz(A(:,j)) */
kusano 7d535a
#define  DROP_AREA 	( 0x0008 )  /* ILUTP: for j-th column, use
kusano 7d535a
 		 			      nnz(F(:,1:j)) / nnz(A(:,1:j))
kusano 7d535a
					      to limit memory growth  */
kusano 7d535a
#define  DROP_SECONDARY	( 0x000E )  /* PROWS | COLUMN | AREA */
kusano 7d535a
#define  DROP_DYNAMIC	( 0x0010 )  /* adaptive tau */
kusano 7d535a
#define  DROP_INTERP	( 0x0100 )  /* use interpolation */
kusano 7d535a
kusano 7d535a
kusano 7d535a
#if 1
kusano 7d535a
#define MILU_ALPHA (1.0e-2) /* multiple of drop_sum to be added to diagonal */
kusano 7d535a
#else
kusano 7d535a
#define MILU_ALPHA  1.0 /* multiple of drop_sum to be added to diagonal */
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
kusano 7d535a
/***********************************************************************
kusano 7d535a
 * Type definitions
kusano 7d535a
 ***********************************************************************/
kusano 7d535a
typedef float    flops_t;
kusano 7d535a
typedef unsigned char Logical;
kusano 7d535a
kusano 7d535a
/* 
kusano 7d535a
 *-- This contains the options used to control the solution process.
kusano 7d535a
 *
kusano 7d535a
 * Fact   (fact_t)
kusano 7d535a
 *        Specifies whether or not the factored form of the matrix
kusano 7d535a
 *        A is supplied on entry, and if not, how the matrix A should
kusano 7d535a
 *        be factorizaed.
kusano 7d535a
 *        = DOFACT: The matrix A will be factorized from scratch, and the
kusano 7d535a
 *             factors will be stored in L and U.
kusano 7d535a
 *        = SamePattern: The matrix A will be factorized assuming
kusano 7d535a
 *             that a factorization of a matrix with the same sparsity
kusano 7d535a
 *             pattern was performed prior to this one. Therefore, this
kusano 7d535a
 *             factorization will reuse column permutation vector 
kusano 7d535a
 *             ScalePermstruct->perm_c and the column elimination tree
kusano 7d535a
 *             LUstruct->etree.
kusano 7d535a
 *        = SamePattern_SameRowPerm: The matrix A will be factorized
kusano 7d535a
 *             assuming that a factorization of a matrix with the same
kusano 7d535a
 *             sparsity	pattern and similar numerical values was performed
kusano 7d535a
 *             prior to this one. Therefore, this factorization will reuse
kusano 7d535a
 *             both row and column scaling factors R and C, both row and
kusano 7d535a
 *             column permutation vectors perm_r and perm_c, and the
kusano 7d535a
 *             data structure set up from the previous symbolic factorization.
kusano 7d535a
 *        = FACTORED: On entry, L, U, perm_r and perm_c contain the 
kusano 7d535a
 *              factored form of A. If DiagScale is not NOEQUIL, the matrix
kusano 7d535a
 *              A has been equilibrated with scaling factors R and C.
kusano 7d535a
 *
kusano 7d535a
 * Equil  (yes_no_t)
kusano 7d535a
 *        Specifies whether to equilibrate the system (scale A's row and
kusano 7d535a
 *        columns to have unit norm).
kusano 7d535a
 *
kusano 7d535a
 * ColPerm (colperm_t)
kusano 7d535a
 *        Specifies what type of column permutation to use to reduce fill.
kusano 7d535a
 *        = NATURAL: use the natural ordering 
kusano 7d535a
 *        = MMD_ATA: use minimum degree ordering on structure of A'*A
kusano 7d535a
 *        = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
kusano 7d535a
 *        = COLAMD: use approximate minimum degree column ordering
kusano 7d535a
 *        = MY_PERMC: use the ordering specified by the user
kusano 7d535a
 *         
kusano 7d535a
 * Trans  (trans_t)
kusano 7d535a
 *        Specifies the form of the system of equations:
kusano 7d535a
 *        = NOTRANS: A * X = B        (No transpose)
kusano 7d535a
 *        = TRANS:   A**T * X = B     (Transpose)
kusano 7d535a
 *        = CONJ:    A**H * X = B     (Transpose)
kusano 7d535a
 *
kusano 7d535a
 * IterRefine (IterRefine_t)
kusano 7d535a
 *        Specifies whether to perform iterative refinement.
kusano 7d535a
 *        = NO: no iterative refinement
kusano 7d535a
 *        = SINGLE: perform iterative refinement in single precision
kusano 7d535a
 *        = DOUBLE: perform iterative refinement in double precision
kusano 7d535a
 *        = EXTRA: perform iterative refinement in extra precision
kusano 7d535a
 *
kusano 7d535a
 * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
kusano 7d535a
 *        Specifies the threshold used for a diagonal entry to be an
kusano 7d535a
 *        acceptable pivot.
kusano 7d535a
 *
kusano 7d535a
 * SymmetricMode (yest_no_t)
kusano 7d535a
 *        Specifies whether to use symmetric mode. Symmetric mode gives 
kusano 7d535a
 *        preference to diagonal pivots, and uses an (A'+A)-based column
kusano 7d535a
 *        permutation algorithm.
kusano 7d535a
 *
kusano 7d535a
 * PivotGrowth (yes_no_t)
kusano 7d535a
 *        Specifies whether to compute the reciprocal pivot growth.
kusano 7d535a
 *
kusano 7d535a
 * ConditionNumber (ues_no_t)
kusano 7d535a
 *        Specifies whether to compute the reciprocal condition number.
kusano 7d535a
 *
kusano 7d535a
 * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU)
kusano 7d535a
 *        Specifies whether to permute rows of the original matrix.
kusano 7d535a
 *        = NO: not to permute the rows
kusano 7d535a
 *        = LargeDiag: make the diagonal large relative to the off-diagonal
kusano 7d535a
 *        = MY_PERMR: use the permutation given by the user
kusano 7d535a
 *
kusano 7d535a
 * ILU_DropRule (int)
kusano 7d535a
 *        Specifies the dropping rule:
kusano 7d535a
 *	  = DROP_BASIC:   Basic dropping rule, supernodal based ILUTP(tau).
kusano 7d535a
 *	  = DROP_PROWS:   Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n.
kusano 7d535a
 *	  = DROP_COLUMN:  Variant of ILUTP(p,tau), for j-th column,
kusano 7d535a
 *			      p = gamma * nnz(A(:,j)).
kusano 7d535a
 *	  = DROP_AREA:    Variation of ILUTP, for j-th column, use
kusano 7d535a
 *			      nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
kusano 7d535a
 *	  = DROP_DYNAMIC: Modify the threshold tau during factorizaion:
kusano 7d535a
 *			  If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
kusano 7d535a
 *				  tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
kusano 7d535a
 *			  Otherwise
kusano 7d535a
 *				  tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
kusano 7d535a
 *			  tau_U(j) uses the similar rule.
kusano 7d535a
 *			  NOTE: the thresholds used by L and U are separate.
kusano 7d535a
 *	  = DROP_INTERP:  Compute the second dropping threshold by
kusano 7d535a
 *	                  interpolation instead of sorting (default).
kusano 7d535a
 *  		          In this case, the actual fill ratio is not
kusano 7d535a
 *			  guaranteed to be smaller than gamma.
kusano 7d535a
 *   	  Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
kusano 7d535a
 *	  ( Default: DROP_BASIC | DROP_AREA )
kusano 7d535a
 *
kusano 7d535a
 * ILU_DropTol (double)
kusano 7d535a
 *        numerical threshold for dropping.
kusano 7d535a
 *
kusano 7d535a
 * ILU_FillFactor (double) 
kusano 7d535a
 *        Gamma in the secondary dropping.
kusano 7d535a
 *
kusano 7d535a
 * ILU_Norm (norm_t)
kusano 7d535a
 *        Specify which norm to use to measure the row size in a
kusano 7d535a
 *        supernode: infinity-norm, 1-norm, or 2-norm.
kusano 7d535a
 *
kusano 7d535a
 * ILU_FillTol (double)
kusano 7d535a
 *        numerical threshold for zero pivot perturbation.
kusano 7d535a
 *
kusano 7d535a
 * ILU_MILU (milu_t)
kusano 7d535a
 *        Specifies which version of MILU to use.
kusano 7d535a
 *
kusano 7d535a
 * ILU_MILU_Dim (double) 
kusano 7d535a
 *        Dimension of the PDE if available.
kusano 7d535a
 *
kusano 7d535a
 * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
kusano 7d535a
 *        Specifies whether to replace the tiny diagonals by
kusano 7d535a
 *        sqrt(epsilon)*||A|| during LU factorization.
kusano 7d535a
 *
kusano 7d535a
 * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
kusano 7d535a
 *        Specifies whether the initialization has been performed to the
kusano 7d535a
 *        triangular solve.
kusano 7d535a
 *
kusano 7d535a
 * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
kusano 7d535a
 *        Specifies whether the initialization has been performed to the
kusano 7d535a
 *        sparse matrix-vector multiplication routine needed in iterative
kusano 7d535a
 *        refinement.
kusano 7d535a
 *
kusano 7d535a
 * PrintStat (yes_no_t)
kusano 7d535a
 *        Specifies whether to print the solver's statistics.
kusano 7d535a
 */
kusano 7d535a
typedef struct {
kusano 7d535a
    fact_t        Fact;
kusano 7d535a
    yes_no_t      Equil;
kusano 7d535a
    colperm_t     ColPerm;
kusano 7d535a
    trans_t       Trans;
kusano 7d535a
    IterRefine_t  IterRefine;
kusano 7d535a
    double        DiagPivotThresh;
kusano 7d535a
    yes_no_t      SymmetricMode;
kusano 7d535a
    yes_no_t      PivotGrowth;
kusano 7d535a
    yes_no_t      ConditionNumber;
kusano 7d535a
    rowperm_t     RowPerm;
kusano 7d535a
    int 	  ILU_DropRule;
kusano 7d535a
    double	  ILU_DropTol;    /* threshold for dropping */
kusano 7d535a
    double	  ILU_FillFactor; /* gamma in the secondary dropping */
kusano 7d535a
    norm_t	  ILU_Norm;       /* infinity-norm, 1-norm, or 2-norm */
kusano 7d535a
    double	  ILU_FillTol;    /* threshold for zero pivot perturbation */
kusano 7d535a
    milu_t	  ILU_MILU;
kusano 7d535a
    double	  ILU_MILU_Dim;   /* Dimension of PDE (if available) */
kusano 7d535a
    yes_no_t      ParSymbFact;
kusano 7d535a
    yes_no_t      ReplaceTinyPivot; /* used in SuperLU_DIST */
kusano 7d535a
    yes_no_t      SolveInitialized;
kusano 7d535a
    yes_no_t      RefineInitialized;
kusano 7d535a
    yes_no_t      PrintStat;
kusano 7d535a
} superlu_options_t;
kusano 7d535a
kusano 7d535a
/*! \brief Headers for 4 types of dynamatically managed memory */
kusano 7d535a
typedef struct e_node {
kusano 7d535a
    int size;      /* length of the memory that has been used */
kusano 7d535a
    void *mem;     /* pointer to the new malloc'd store */
kusano 7d535a
} ExpHeader;
kusano 7d535a
kusano 7d535a
typedef struct {
kusano 7d535a
    int  size;
kusano 7d535a
    int  used;
kusano 7d535a
    int  top1;  /* grow upward, relative to &array[0] */
kusano 7d535a
    int  top2;  /* grow downward */
kusano 7d535a
    void *array;
kusano 7d535a
} LU_stack_t;
kusano 7d535a
kusano 7d535a
typedef struct {
kusano 7d535a
    int     *panel_histo; /* histogram of panel size distribution */
kusano 7d535a
    double  *utime;       /* running time at various phases */
kusano 7d535a
    flops_t *ops;         /* operation count at various phases */
kusano 7d535a
    int     TinyPivots;   /* number of tiny pivots */
kusano 7d535a
    int     RefineSteps;  /* number of iterative refinement steps */
kusano 7d535a
    int     expansions;   /* number of memory expansions */
kusano 7d535a
} SuperLUStat_t;
kusano 7d535a
kusano 7d535a
typedef struct {
kusano 7d535a
    float for_lu;
kusano 7d535a
    float total_needed;
kusano 7d535a
} mem_usage_t;
kusano 7d535a
kusano 7d535a
kusano 7d535a
/***********************************************************************
kusano 7d535a
 * Prototypes
kusano 7d535a
 ***********************************************************************/
kusano 7d535a
#ifdef __cplusplus
kusano 7d535a
extern "C" {
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
extern void    Destroy_SuperMatrix_Store(SuperMatrix *);
kusano 7d535a
extern void    Destroy_CompCol_Matrix(SuperMatrix *);
kusano 7d535a
extern void    Destroy_CompRow_Matrix(SuperMatrix *);
kusano 7d535a
extern void    Destroy_SuperNode_Matrix(SuperMatrix *);
kusano 7d535a
extern void    Destroy_CompCol_Permuted(SuperMatrix *);
kusano 7d535a
extern void    Destroy_Dense_Matrix(SuperMatrix *);
kusano 7d535a
extern void    get_perm_c(int, SuperMatrix *, int *);
kusano 7d535a
extern void    set_default_options(superlu_options_t *options);
kusano 7d535a
extern void    ilu_set_default_options(superlu_options_t *options);
kusano 7d535a
extern void    sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
kusano 7d535a
			    SuperMatrix*);
kusano 7d535a
extern void    superlu_abort_and_exit(char*);
kusano 7d535a
extern void    *superlu_malloc (size_t);
kusano 7d535a
extern int     *intMalloc (int);
kusano 7d535a
extern int     *intCalloc (int);
kusano 7d535a
extern void    superlu_free (void*);
kusano 7d535a
extern void    SetIWork (int, int, int, int *, int **, int **, int **,
kusano 7d535a
                         int **, int **, int **, int **);
kusano 7d535a
extern int     sp_coletree (int *, int *, int *, int, int, int *);
kusano 7d535a
extern void    relax_snode (const int, int *, const int, int *, int *);
kusano 7d535a
extern void    heap_relax_snode (const int, int *, const int, int *, int *);
kusano 7d535a
extern int     mark_relax(int, int *, int *, int *, int *, int *, int *);
kusano 7d535a
extern void    ilu_relax_snode (const int, int *, const int, int *,
kusano 7d535a
				int *, int *);
kusano 7d535a
extern void    ilu_heap_relax_snode (const int, int *, const int, int *,
kusano 7d535a
				     int *, int*);
kusano 7d535a
extern void    resetrep_col (const int, const int *, int *);
kusano 7d535a
extern int     spcoletree (int *, int *, int *, int, int, int *);
kusano 7d535a
extern int     *TreePostorder (int, int *);
kusano 7d535a
extern double  SuperLU_timer_ ();
kusano 7d535a
extern int     sp_ienv (int);
kusano 7d535a
extern int     lsame_ (char *, char *);
kusano 7d535a
extern int     xerbla_ (char *, int *);
kusano 7d535a
extern void    ifill (int *, int, int);
kusano 7d535a
extern void    snode_profile (int, int *);
kusano 7d535a
extern void    super_stats (int, int *);
kusano 7d535a
extern void    check_repfnz(int, int, int, int *);
kusano 7d535a
extern void    PrintSumm (char *, int, int, int);
kusano 7d535a
extern void    StatInit(SuperLUStat_t *);
kusano 7d535a
extern void    StatPrint (SuperLUStat_t *);
kusano 7d535a
extern void    StatFree(SuperLUStat_t *);
kusano 7d535a
extern void    print_panel_seg(int, int, int, int, int *, int *);
kusano 7d535a
extern int     print_int_vec(char *,int, int *);
kusano 7d535a
extern int     slu_PrintInt10(char *, int, int *);
kusano 7d535a
kusano 7d535a
#ifdef __cplusplus
kusano 7d535a
  }
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
#endif /* __SUPERLU_UTIL */