| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| #include "slu_zdefs.h" |
| |
| |
| |
| |
| void zusolve(int, int, doublecomplex*, doublecomplex*); |
| void zlsolve(int, int, doublecomplex*, doublecomplex*); |
| void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*); |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| int |
| sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, |
| SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info) |
| { |
| #ifdef _CRAY |
| _fcd ftcs1 = _cptofcd("L", strlen("L")), |
| ftcs2 = _cptofcd("N", strlen("N")), |
| ftcs3 = _cptofcd("U", strlen("U")); |
| #endif |
| SCformat *Lstore; |
| NCformat *Ustore; |
| doublecomplex *Lval, *Uval; |
| int incx = 1, incy = 1; |
| doublecomplex temp; |
| doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0}; |
| doublecomplex comp_zero = {0.0, 0.0}; |
| int nrow; |
| int fsupc, nsupr, nsupc, luptr, istart, irow; |
| int i, k, iptr, jcol; |
| doublecomplex *work; |
| flops_t solve_ops; |
| |
| |
| *info = 0; |
| if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; |
| else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && |
| !lsame_(trans, "C")) *info = -2; |
| else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; |
| else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; |
| else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; |
| if ( *info ) { |
| i = -(*info); |
| xerbla_("sp_ztrsv", &i); |
| return 0; |
| } |
| |
| Lstore = L->Store; |
| Lval = Lstore->nzval; |
| Ustore = U->Store; |
| Uval = Ustore->nzval; |
| solve_ops = 0; |
| |
| if ( !(work = doublecomplexCalloc(L->nrow)) ) |
| ABORT("Malloc fails for work in sp_ztrsv()."); |
| |
| if ( lsame_(trans, "N") ) { |
| |
| if ( lsame_(uplo, "L") ) { |
| |
| if ( L->nrow == 0 ) return 0; |
| |
| for (k = 0; k <= Lstore->nsuper; k++) { |
| fsupc = L_FST_SUPC(k); |
| istart = L_SUB_START(fsupc); |
| nsupr = L_SUB_START(fsupc+1) - istart; |
| nsupc = L_FST_SUPC(k+1) - fsupc; |
| luptr = L_NZ_START(fsupc); |
| nrow = nsupr - nsupc; |
| |
| |
| solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc; |
| solve_ops += 8 * nrow * nsupc; |
| |
| if ( nsupc == 1 ) { |
| for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) { |
| irow = L_SUB(iptr); |
| ++luptr; |
| zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]); |
| z_sub(&x[irow], &x[irow], &comp_zero); |
| } |
| } else { |
| #ifdef USE_VENDOR_BLAS |
| #ifdef _CRAY |
| CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| |
| CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], |
| &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); |
| #else |
| ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| |
| zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], |
| &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); |
| #endif |
| #else |
| zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]); |
| |
| zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc], |
| &x[fsupc], &work[0] ); |
| #endif |
| |
| iptr = istart + nsupc; |
| for (i = 0; i < nrow; ++i, ++iptr) { |
| irow = L_SUB(iptr); |
| z_sub(&x[irow], &x[irow], &work[i]); |
| work[i] = comp_zero; |
| |
| } |
| } |
| } |
| |
| } else { |
| |
| |
| if ( U->nrow == 0 ) return 0; |
| |
| for (k = Lstore->nsuper; k >= 0; k--) { |
| fsupc = L_FST_SUPC(k); |
| nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); |
| nsupc = L_FST_SUPC(k+1) - fsupc; |
| luptr = L_NZ_START(fsupc); |
| |
| |
| solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; |
| |
| if ( nsupc == 1 ) { |
| z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); |
| for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) { |
| irow = U_SUB(i); |
| zz_mult(&comp_zero, &x[fsupc], &Uval[i]); |
| z_sub(&x[irow], &x[irow], &comp_zero); |
| } |
| } else { |
| #ifdef USE_VENDOR_BLAS |
| #ifdef _CRAY |
| CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #else |
| ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #endif |
| #else |
| zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] ); |
| #endif |
| |
| for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { |
| solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); |
| for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); |
| i++) { |
| irow = U_SUB(i); |
| zz_mult(&comp_zero, &x[jcol], &Uval[i]); |
| z_sub(&x[irow], &x[irow], &comp_zero); |
| } |
| } |
| } |
| } |
| |
| } |
| } else if ( lsame_(trans, "T") ) { |
| |
| if ( lsame_(uplo, "L") ) { |
| |
| if ( L->nrow == 0 ) return 0; |
| |
| for (k = Lstore->nsuper; k >= 0; --k) { |
| fsupc = L_FST_SUPC(k); |
| istart = L_SUB_START(fsupc); |
| nsupr = L_SUB_START(fsupc+1) - istart; |
| nsupc = L_FST_SUPC(k+1) - fsupc; |
| luptr = L_NZ_START(fsupc); |
| |
| solve_ops += 8 * (nsupr - nsupc) * nsupc; |
| |
| for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { |
| iptr = istart + nsupc; |
| for (i = L_NZ_START(jcol) + nsupc; |
| i < L_NZ_START(jcol+1); i++) { |
| irow = L_SUB(iptr); |
| zz_mult(&comp_zero, &x[irow], &Lval[i]); |
| z_sub(&x[jcol], &x[jcol], &comp_zero); |
| iptr++; |
| } |
| } |
| |
| if ( nsupc > 1 ) { |
| solve_ops += 4 * nsupc * (nsupc - 1); |
| #ifdef _CRAY |
| ftcs1 = _cptofcd("L", strlen("L")); |
| ftcs2 = _cptofcd("T", strlen("T")); |
| ftcs3 = _cptofcd("U", strlen("U")); |
| CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #else |
| ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #endif |
| } |
| } |
| } else { |
| |
| if ( U->nrow == 0 ) return 0; |
| |
| for (k = 0; k <= Lstore->nsuper; k++) { |
| fsupc = L_FST_SUPC(k); |
| nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); |
| nsupc = L_FST_SUPC(k+1) - fsupc; |
| luptr = L_NZ_START(fsupc); |
| |
| for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { |
| solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); |
| for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { |
| irow = U_SUB(i); |
| zz_mult(&comp_zero, &x[irow], &Uval[i]); |
| z_sub(&x[jcol], &x[jcol], &comp_zero); |
| } |
| } |
| |
| |
| solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; |
| |
| if ( nsupc == 1 ) { |
| z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); |
| } else { |
| #ifdef _CRAY |
| ftcs1 = _cptofcd("U", strlen("U")); |
| ftcs2 = _cptofcd("T", strlen("T")); |
| ftcs3 = _cptofcd("N", strlen("N")); |
| CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #else |
| ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #endif |
| } |
| } |
| } |
| } else { |
| |
| if ( lsame_(uplo, "L") ) { |
| |
| if ( L->nrow == 0 ) return 0; |
| |
| for (k = Lstore->nsuper; k >= 0; --k) { |
| fsupc = L_FST_SUPC(k); |
| istart = L_SUB_START(fsupc); |
| nsupr = L_SUB_START(fsupc+1) - istart; |
| nsupc = L_FST_SUPC(k+1) - fsupc; |
| luptr = L_NZ_START(fsupc); |
| |
| solve_ops += 8 * (nsupr - nsupc) * nsupc; |
| |
| for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { |
| iptr = istart + nsupc; |
| for (i = L_NZ_START(jcol) + nsupc; |
| i < L_NZ_START(jcol+1); i++) { |
| irow = L_SUB(iptr); |
| zz_conj(&temp, &Lval[i]); |
| zz_mult(&comp_zero, &x[irow], &temp); |
| z_sub(&x[jcol], &x[jcol], &comp_zero); |
| iptr++; |
| } |
| } |
| |
| if ( nsupc > 1 ) { |
| solve_ops += 4 * nsupc * (nsupc - 1); |
| #ifdef _CRAY |
| ftcs1 = _cptofcd("L", strlen("L")); |
| ftcs2 = _cptofcd(trans, strlen("T")); |
| ftcs3 = _cptofcd("U", strlen("U")); |
| ZTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #else |
| ztrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #endif |
| } |
| } |
| } else { |
| |
| if ( U->nrow == 0 ) return 0; |
| |
| for (k = 0; k <= Lstore->nsuper; k++) { |
| fsupc = L_FST_SUPC(k); |
| nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); |
| nsupc = L_FST_SUPC(k+1) - fsupc; |
| luptr = L_NZ_START(fsupc); |
| |
| for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { |
| solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); |
| for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { |
| irow = U_SUB(i); |
| zz_conj(&temp, &Uval[i]); |
| zz_mult(&comp_zero, &x[irow], &temp); |
| z_sub(&x[jcol], &x[jcol], &comp_zero); |
| } |
| } |
| |
| |
| solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; |
| |
| if ( nsupc == 1 ) { |
| zz_conj(&temp, &Lval[luptr]); |
| z_div(&x[fsupc], &x[fsupc], &temp); |
| } else { |
| #ifdef _CRAY |
| ftcs1 = _cptofcd("U", strlen("U")); |
| ftcs2 = _cptofcd(trans, strlen("T")); |
| ftcs3 = _cptofcd("N", strlen("N")); |
| ZTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #else |
| ztrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr, |
| &x[fsupc], &incx); |
| #endif |
| } |
| } |
| } |
| } |
| |
| stat->ops[SOLVE] += solve_ops; |
| SUPERLU_FREE(work); |
| return 0; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| int |
| sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, |
| int incx, doublecomplex beta, doublecomplex *y, int incy) |
| { |
| |
| |
| NCformat *Astore; |
| doublecomplex *Aval; |
| int info; |
| doublecomplex temp, temp1; |
| int lenx, leny, i, j, irow; |
| int iy, jx, jy, kx, ky; |
| int notran; |
| doublecomplex comp_zero = {0.0, 0.0}; |
| doublecomplex comp_one = {1.0, 0.0}; |
| |
| notran = lsame_(trans, "N"); |
| Astore = A->Store; |
| Aval = Astore->nzval; |
| |
| |
| info = 0; |
| if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1; |
| else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; |
| else if (incx == 0) info = 5; |
| else if (incy == 0) info = 8; |
| if (info != 0) { |
| xerbla_("sp_zgemv ", &info); |
| return 0; |
| } |
| |
| |
| if (A->nrow == 0 || A->ncol == 0 || |
| z_eq(&alpha, &comp_zero) && |
| z_eq(&beta, &comp_one)) |
| return 0; |
| |
| |
| |
| |
| if (lsame_(trans, "N")) { |
| lenx = A->ncol; |
| leny = A->nrow; |
| } else { |
| lenx = A->nrow; |
| leny = A->ncol; |
| } |
| if (incx > 0) kx = 0; |
| else kx = - (lenx - 1) * incx; |
| if (incy > 0) ky = 0; |
| else ky = - (leny - 1) * incy; |
| |
| |
| |
| |
| if ( !z_eq(&beta, &comp_one) ) { |
| if (incy == 1) { |
| if ( z_eq(&beta, &comp_zero) ) |
| for (i = 0; i < leny; ++i) y[i] = comp_zero; |
| else |
| for (i = 0; i < leny; ++i) |
| zz_mult(&y[i], &beta, &y[i]); |
| } else { |
| iy = ky; |
| if ( z_eq(&beta, &comp_zero) ) |
| for (i = 0; i < leny; ++i) { |
| y[iy] = comp_zero; |
| iy += incy; |
| } |
| else |
| for (i = 0; i < leny; ++i) { |
| zz_mult(&y[iy], &beta, &y[iy]); |
| iy += incy; |
| } |
| } |
| } |
| |
| if ( z_eq(&alpha, &comp_zero) ) return 0; |
| |
| if ( notran ) { |
| |
| jx = kx; |
| if (incy == 1) { |
| for (j = 0; j < A->ncol; ++j) { |
| if ( !z_eq(&x[jx], &comp_zero) ) { |
| zz_mult(&temp, &alpha, &x[jx]); |
| for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { |
| irow = Astore->rowind[i]; |
| zz_mult(&temp1, &temp, &Aval[i]); |
| z_add(&y[irow], &y[irow], &temp1); |
| } |
| } |
| jx += incx; |
| } |
| } else { |
| ABORT("Not implemented."); |
| } |
| } else { |
| |
| jy = ky; |
| if (incx == 1) { |
| for (j = 0; j < A->ncol; ++j) { |
| temp = comp_zero; |
| for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { |
| irow = Astore->rowind[i]; |
| zz_mult(&temp1, &Aval[i], &x[irow]); |
| z_add(&temp, &temp, &temp1); |
| } |
| zz_mult(&temp1, &alpha, &temp); |
| z_add(&y[jy], &y[jy], &temp1); |
| jy += incy; |
| } |
| } else { |
| ABORT("Not implemented."); |
| } |
| } |
| return 0; |
| } |
| |